OpenCV HOG PeopleDetector：从训练到检测

现在的位置: 首页 > 综合 > 正文

OpenCV HOG PeopleDetector：从训练到检测

2014年09月05日 ⁄ 综合 ⁄ 共 18672字 ⁄ 字号小中大 ⁄ 评论关闭

【原文：http://blog.youtueye.com/work/opencv-hog-peopledetector-trainning.html】

opencv2.0 以上版本提供了行人检测的方法，以opencv2.2为例，该示例为opencv安装目录下的：/samples/cpp/peopledetect.cpp，测试效果如下：

我们先看看示例代码的内容：

#include

 "opencv2/imgproc/imgproc.hpp"

#include

 "opencv2/objdetect/objdetect.hpp"

#include

 "opencv2/highgui/highgui.hpp"

#include

 <stdio.h>

#include

 <string.h>

#include

 <ctype.h>

using

namespace 

cv;

using

namespace 

std;

void

help()

{

    printf(

            "\nDemonstrate

 the use of the HoG descriptor using\n"

            " 

 HOGDescriptor::hog.setSVMDetector(HOGDescriptor::getDefaultPeopleDetector());\n"

            "Usage:\n"

            "./peopledetect

 (<image_filename> | <image_list>.txt)\n\n");

}

int

main(int

argc, char**

 argv)

{

    Mat

 img;

    FILE*

 f = 0;

    char

_filename[1024];

    if(

 argc == 1 )

    {

        printf("Usage:

 peopledetect (<image_filename> | <image_list>.txt)\n");

        return

0;

    }

    img

 = imread(argv[1]);

    if(

 img.data )

    {

        strcpy(_filename,

 argv[1]);

    }

    else

    {

        f

 = fopen(argv[1],

"rt");

        if(!f)

        {

            fprintf(

 stderr, "ERROR:

 the specified file could not be loaded\n");

            return

-1;

        }

    }

    HOGDescriptor

 hog;

    hog.setSVMDetector(HOGDescriptor::getDefaultPeopleDetector());

    namedWindow("people

 detector",

 1);

    for(;;)

    {

        char*

 filename = _filename;

        if(f)

        {

            if(!fgets(filename,

 (int)sizeof(_filename)-2,

 f))

                break;

            //while(*filename

 && isspace(*filename))

            // 

 ++filename;

            if(filename[0]

 == '#')

                continue;

            int

l = strlen(filename);

            while(l

 > 0 && isspace(filename[l-1]))

                --l;

            filename[l]

 = '\0';

            img

 = imread(filename);

        }

        printf("%s:\n",

 filename);

        if(!img.data)

            continue;

        fflush(stdout);

        vector<Rect>

 found, found_filtered;

        double

t = (double)getTickCount();

        //

 run the detector with default parameters. to get a higher hit-rate

        //

 (and more false alarms, respectively), decrease the hitThreshold and

        //

 groupThreshold (set groupThreshold to 0 to turn off the grouping completely).

        hog.detectMultiScale(img,

 found, 0, Size(8,8), Size(32,32), 1.05, 2);

        t

 = (double)getTickCount()

 - t;

        printf("tdetection

 time = %gms\n",

 t*1000./cv::getTickFrequency());

        size_t

i, j;

        for(

 i = 0; i < found.size(); i++ )

        {

            Rect

 r = found[i];

            for(

 j = 0; j < found.size(); j++ )

                if(

 j != i && (r & found[j]) == r)

                    break;

            if(

 j == found.size() )

                found_filtered.push_back(r);

        }

        for(

 i = 0; i < found_filtered.size(); i++ )

        {

            Rect

 r = found_filtered[i];

            //

 the HOG detector returns slightly larger rectangles than the real objects.

            //

 so we slightly shrink the rectangles to get a nicer output.

            r.x

 += cvRound(r.width*0.1);

            r.width

 = cvRound(r.width*0.8);

            r.y

 += cvRound(r.height*0.07);

            r.height

 = cvRound(r.height*0.8);

            rectangle(img,

 r.tl(), r.br(), cv::Scalar(0,255,0), 3);

        }

        imshow("people

 detector",

 img);

        int

c = waitKey(0) & 255;

        if(

 c == 'q'

|| c == 'Q'

|| !f)

            break;

    }

    if(f)

        fclose(f);

    return

0;

}

你会发现代码中没有类似人脸检测时的分类器（模型）的载入过程，而是用

hog.setSVMDetector(HOGDescriptor::getDefaultPeopleDetector());

载入默认模型。找到hog的源码，在/modules/objdetect/hog.cpp，从第907行开始的代码如下：

vector<float>

 HOGDescriptor::getDefaultPeopleDetector()

{

    static

const 

float 

detector[] = {

       0.05359386f,

 -0.14721455f, -0.05532170f, 0.05077307f,

       0.11547081f,

 -0.04268804f, 0.04635834f, -0.05468199f, 0.08232084f,

       0.10424068f,

 -0.02294518f, 0.01108519f, 0.01378693f, 0.11193510f,

       0.01268418f,

 0.08528346f, -0.06309239f, 0.13054633f, 0.08100729f,

       -0.05209739f,

 -0.04315529f, 0.09341384f, 0.11035026f, -0.07596218f,

       -0.05517511f,

 -0.04465296f, 0.02947334f, 0.04555536f,

       -3.55954492e-003f,

 0.07818956f, 0.07730991f, 0.07890715f, 0.06222893f,

       0.09001380f,

 -0.03574381f, 0.03414327f, 0.05677258f, -0.04773581f,

       0.03746637f,

 -0.03521175f, 0.06955440f, -0.03849038f, 0.01052293f,

       0.01736112f,

 0.10867710f, 0.08748853f, 3.29739624e-003f, 0.10907028f,

       0.07913758f,

 0.10393070f, 0.02091867f, 0.11594022f, 0.13182420f,

       0.09879354f,

 0.05362710f, -0.06745391f, -7.01260753e-003f,....

显然，行人检测所用到的模型数据被做为常量写在源代码中，但这些模型数据数据是如何的来的呢？如果我要用我自己的样本tainning ，然后用的到的model去detect该如何操作呢？别急，我们从opencv行人检测所用到的算法说起。

opencv行人检测所用到的算法源自Navneet Dalal和Bill
Triggs在2005's CVPR上的文章Histograms of Oriented Gradients for Human
Detection.这里是英文和中文的介绍。作者所在的研究机构(INRIA：French
National Institute for Research in Computer Science and Control,法国国家计算机技术和控制研究所)发布了这套算法的源码：INRIA
Object Detection and Localization Toolkit.

那么，opencv中的PeopleDetector是否就原封不动地使用了这套算法呢，为了求证这一问题，笔者首先比较了两者的模型数据：下载OLTbinaries.zip，找到/HOG/model_4BiSVMLight.alt文件（二进制形式的数据），用代码将其中数据保存为文本格式（这个要根据上面的源码learcode.zip），其内容如下：

0.05359386

 -0.14721455 -0.05532170 0.05077307 0.11547081 -0.04268804 0.04635834 -0.05468199 0.08232084 0.10424068

-0.02294518

 0.01108519 0.01378693 0.11193510 0.01268418 0.08528346 -0.06309239 0.13054632 0.08100729 -0.05209739

-0.04315529

 0.09341384 0.11035026 -0.07596217 -0.05517511 -0.04465296 0.02947334 0.04555536 -0.00355954 0.07818956

0.07730991

 0.07890716 0.06222893 0.09001381 -0.03574381 0.03414327 0.05677258 -0.04773581 0.03746637 -0.03521175

0.06955440

 -0.03849038 0.01052293 0.01736112 0.10867710 0.08748853 0.00329740 0.10907028 0.07913758 0.10393069

0.02091867

 0.11594022 0.13182420 0.09879354 0.05362710 -0.06745391 -0.00701261 0.00524702 0.03236255 0.01407916

0.02207983

 0.02537322 0.04547948 0.07200756 0.03129894 -0.06274468 0.02107014 0.06035208 0.08636236 0.00453164

0.02193363

 0.02309801 0.05568166 -0.02645093 0.04448695 0.02837519 0.08975695 0.04461517 0.08975355 0.07514391

0.02306982

 0.10410084 0.06368385 0.05943465 0.00458421 0.05220337 0.06675851 0.08358569 0.06712102 0.06559004

-0.03930481

 -0.00915937 -0.05897915 0.02816453 0.05032348 0.06780671 0.03377650 -0.00060942 -0.01795146 -0.03083684

-0.01302475

 -0.02972313 0.00788707 -0.03525961 -0.00250398 0.05245084 0.11791293 -0.02167498 0.05299332 0.06640524

0.05190265

 -0.00827317 0.03033127 0.05842173 -0.00401050 -0.00625106 0.05862958 -0.02465461 0.05546781 -0.08228195 ....

数值居然同HOGDescriptor::getDefaultPeopleDetector()中的是一样！那么，你就可以用上面的OLT去trainning你的样本，然后把得到的model数据替换getDefaultPeopleDetector()中的数据，你便可以去进行你要的目标检测啦！

为了验证这一想法的正确性和可行性，笔者做了些实验，在Ubuntu10.4 g++4.4.5环境中，步骤如下：

下载release版的程序：OLTbinaries.zip
下载样本：INRIAPerson.tar
在目录OLTbinaries/下建立两个文件夹test, train. 将INRIAperson/Test/neg拷贝到test/下，INRIAperson/Train/neg拷贝到train/下;将INRIAperson/test_64x128_H96拷贝到test/下重命名为pos，将INRIAperson/train_64x128_H96拷贝到train/下重命名为pos;

将test/neg , test/pos各自文件夹中的所有图片文件名分别输出到neg.list, pos.list, 并放置在test/下; 同样地操作在train/。

<pre class="brush: shell; gutter: true">amadeuzou@ubuntu:~/Projects/opencv/OLTbinaries/test$ ls ./neg &gt; neg.list
amadeuzou@ubuntu:~/Projects/opencv/OLTbinaries/test$ ls ./pos &gt; pos.list</pre>

到这里，样本数据便准备好了，那么，只要修改OLTbinaries/runall.sh相关参数然后这些此脚本，一小时左右的时间，便会在OLTbinaries/HOG/下产生一个model_4BiSVMLight.alt文件，你的模型数据便保存在这里面。到这里，你便成功trainning了一个model。

注意事项：

runall.sh中第5行，按你的正负样本数目修改：

1

HardOption=" --poscases 2416 --negcases 1218 "
runall.sh中第21行，按你的样本文件夹所在(InDir)及输出文件所在(OutDir)修改：

1

2

3

4

OutDir=./HOG

InDir=./

OutFile=$OutDir/record

CMDLINE=$OutDir/record
trainning过程中会产生两个G左右的临时文件在OutDir（=./HOG）中，所以要确保硬盘空间足够，结束时删除，只留model_4BiSVMLight.alt。
整个trainning过程分4步，有4条屏幕输出信息，最可能出现的错误是样本文件路径不对，可在pos.list neg.list 中用图像文件的绝对路径。

1

2

3

4

First iteration complete

Hard examples created

Doing second learning

Second iteration complete
如果你用的是自己的样本，注意修改其他参数（待究），如正样本的大小：

1

2

WIDTH=64; export WIDTH

HEIGHT=128; export HEIGHT

有了模型，怎么去做目标检测呢？你可以做以下的试验：

使用bin在OLTbinaries/bin/中classify_rhog： classify_rhog [待检测图片] [目标位置数据结果保存的文本文件] [模型文件] -i [位置画在图像文件]
```
amadeuzou@ubuntu:~/Projects/opencv/OLTbinaries/bin$ ./classify_rhog  person-1.jpg  result.txt model_4BiSVMLight.alt -i result.jpg
```
结果：

使用lib在OLTbinaries/lib/中：

/*

 *

 =============================================

 *

 *      

 Filename:  lib-detector.cpp

 *

 *   

 Description:  Code to detect object

 *

 *      

 Compiler:  gcc

 *

 *        

 Author:  Amadeu zou

 *        

 URL:  www.youtueye.com

 *

 *

 =============================================

 */

#include

 <cv.h>

#include

 <highgui.h>

#include

 <string>

#include

 <iostream>

#include

 <algorithm>

#include

 <iterator>

#include

 <X11/Xlib.h>

#include

 <Imlib2.h>

#include

 <lear/interface/windetect.h>// change this path as appropriate.

using

namespace 

std;

std::list<DetectedRegion>

 detector(char*

 imgf, WinDetectClassify windetect, LinearClassify* classifier)

{

    std::list<DetectedRegion>

 detections;

    //

 read image

    Imlib_Image

 image = imlib_load_image(imgf);

    //

 if the load was successful

    if

(image)

    {//

 set the image we loaded as the current context image to work on

        imlib_context_set_image(image);

    }

else

{

        //std::cerr

 << "Unable to read image: " << argv[1] << std::endl;

        return

detections;

    }

    int

width  = imlib_image_get_width(),

        height

 = imlib_image_get_height();

    typedef

unsigned char

uchar;

    DATA32*

 data = imlib_image_get_data_for_reading_only();

    uchar*

 imagedata = new

uchar[3*width*height*sizeof(uchar)];

    for

(int

j= 0; j< height; ++j)

    for

(int

i= 0; i< width; ++i) {

        uchar*

 pixel = imagedata+(i+j*width)*3;

        int

argb = data[i+j*width];

        pixel[0]

 = static_cast<uchar>((argb

 & 0x00FF0000)>>16);

        pixel[1]

 = static_cast<uchar>((argb

 & 0x0000FF00)>> 8);

        pixel[2]

 = static_cast<uchar>((argb

 & 0x000000FF)    );

    }

    imlib_free_image();

    //

 now get detections

    windetect.test(*classifier,

 detections, imagedata, width, height);

    delete[]

 imagedata;

    return

detections;

}

int

main(int

argc, char**

 argv) {

    if

(argc != 4) {

        std::cout

 << "Error"

<< std::endl;

        return

0;

    }

    char

modelpath[256];

    strcpy(modelpath,argv[2]);

    string

 model_file(modelpath) ;

    //

 initialize the person detector. All default parameters are set for person detector.

    WinDetectClassify

 windetect;//

 use default person detector.

    RHOGDenseParam

 desc;

    LinearClassify*

 classifier = NULL;//

 initialize it to 64x128 person detector.

    classifier

 = new

LinearClassify(model_file, 0);

    windetect.init(&desc);

//

 initialize the descriptor computation

    std::list<DetectedRegion>

 detections;

    detections

 = detector(argv[1], windetect, classifier);

    //draw

 region in image

    IplImage*

 img = cvLoadImage(argv[1],1);

    for(list<DetectedRegion>::iterator

 itor=detections.begin();itor!=detections.end();++itor)

    {

         cvRectangle(img,cvPoint(itor->x,itor->y),cvPoint(itor->x+itor->width,itor->y+itor->height),cvScalar(0,0,255),2);

    }

    cvSaveImage(argv[3],img);

    cvReleaseImage(&img);

    //print

 detections

    std::copy(detections.begin(),

 detections.end(), std::ostream_iterator<DetectedRegion>(std::cout, "\n"));

    return

0;

}

编译：

g++

 `pkg-config --cflags --libs opencv` -O3 -o lib-detector lib-detector.cpp   -I.  -I/usr/include

-L. -lcmdline -lcvip -lclassifier -llearutil -lboost_filesystem-gcc  -lblitz -L/usr/lib

-lImlib2 -lfreetype -lz -L/usr/X11R6/lib

-lX11 -lXext -ldl -lm

结果：

amadeuzou@ubuntu:~/Projects/opencv/OLTbinaries/lib$

 ./lib-detector

person-1.jpg   model_4BiSVMLight.alt  result.jpg

   298   

 215    145    290    2.2674 1.10256009e-01

    13     

 9    237    475   3.71704 1.31164089e-01

   234    

 -7    230    460   3.59693 1.35627717e-01

使用源码在learcode/app/中classify_rhog.cpp：配置及使用见learcode/README

将模型数据copy到opencv的getDefaultPeopleDetector()，但要重新编译objdetect链接，没必要这么搞。从model中读取数据如下（共classifier->length() = 3780 + 1数据，1为classifier->linearbias）：

int

main(int

argc, char**

 argv) {

    string

 model_file = "model_4BiSVMLight.alt"

;

    LinearClassify*

 classifier = NULL;

    classifier

 = new

LinearClassify(model_file, 0);

    cout<<"classifier->length()

 : "<<classifier->length()<<endl;

    FILE*

 fptr = fopen("hog.txt","w");

        for(int

i = 0;i<classifier->length();i++){

            fprintf(fptr,"%1.8f

 ",classifier->linearwt[i]);

            if(0

 == (i+1) % 10) fprintf(fptr,"%s","\n");

        }

    fprintf(fptr,"%1.8f

 ",classifier->linearbias);

    fclose(fptr);

    return

0;

}

opencv直接载入model: 将peopeldetect.cpp第49行hog.setSVMDetector(HOGDescriptor::getDefaultPeopleDetector())里面的vector换成你的模板数据，模板数据从model_4BiSVMLight.alt中载入，方法参照learcode/lib/windetect.cpp第1175行处LinearClassify::LinearClassify(std::string& modelfile, const int verbose)：

/*

 *

 =====================================================================================

 *

 *      

 Filename:  model-detector.cpp

 *

 *   

 Description:  Code to detect object

 *

 *      

 Compiler:  gcc

 *

 *        

 Author:  Amadeu zou

 *        

 URL:  www.youtueye.com

 *

 *

 =====================================================================================

 */

#include

 <fstream>

#include

 <iostream>

#include

 <vector>

#include

 <stdio.h>

#include

 <string.h>

#include

 <ctype.h>

#include

 "opencv2/imgproc/imgproc.hpp"

#include

 "opencv2/objdetect/objdetect.hpp"

#include

 "opencv2/highgui/highgui.hpp"

using

namespace 

std;

using

namespace 

cv;

vector<float>

 load_lear_model(const

char*

 model_file)

{

     vector<float> 

 detector;

     FILE

*modelfl;

     if

((modelfl = fopen

(model_file, "rb"))

 == NULL)

     {

         cout<<"Unable

 to open the modelfile"<<endl;

         return

detector;

     }

     char

version_buffer[10];

     if

(!fread

(&version_buffer,sizeof(char),10,modelfl))

     {

         cout<<"Unable

 to read version"<<endl;

         return

detector;

     }

     if(strcmp(version_buffer,"V6.01"))

     {

        cout<<"Version

 of model-file does not match version of svm_classify!"<<endl;

        return

detector;

     }

     //

 read version number

     int

version = 0;

     if

(!fread

(&version,sizeof(int),1,modelfl))

     {

         cout<<"Unable

 to read version number"<<endl;

         return

detector;

     }

     if

(version < 200)

     {

         cout<<"Does

 not support model file compiled for light version"<<endl;

         return

detector;

      }

     long

kernel_type;

     fread(&(kernel_type),sizeof(long),1,modelfl);  

     {//

 ignore these

        long

poly_degree;

        fread(&(poly_degree),sizeof(long),1,modelfl);

        double

rbf_gamma;

        fread(&(rbf_gamma),sizeof(double),1,modelfl);

        double 

coef_lin;

        fread(&(coef_lin),sizeof(double),1,modelfl);

        double

coef_const;

        fread(&(coef_const),sizeof(double),1,modelfl);

        long

l;

        fread(&l,sizeof(long),1,modelfl);

        char*

 custom = new

char[l];

        fread(custom,sizeof(char),l,modelfl);

        delete[]

 custom;

    }

    long

totwords;

    fread(&(totwords),sizeof(long),1,modelfl);

    {//

 ignore these

        long

totdoc;

        fread(&(totdoc),sizeof(long),1,modelfl);

        long

sv_num;

        fread(&(sv_num),

sizeof(long),1,modelfl);

    }

    double

linearbias = 0.0;

    fread(&linearbias,

sizeof(double),1,modelfl);

    if(kernel_type

 == 0) { /*

 linear kernel */

        /*

 save linear wts also */

        double*

 linearwt = new

double[totwords+1];

        int

length = totwords;

        fread(linearwt,

sizeof(double),totwords+1,modelfl);

        for(int

i = 0;i<totwords;i++){

            float

term = linearwt[i];

            detector.push_back(term);

        }

        float

term = -linearbias;

        detector.push_back(term);

        delete

[] linearwt;

    }

else

{

        cout<<"Only

 supports linear SVM model files"<<endl;

    }

    fclose(modelfl);

    return

detector;

}

void

help()

{

    printf(

            "\nDemonstrate

 the use of the HoG descriptor using\n"

            " 

 HOGDescriptor::hog.setSVMDetector(HOGDescriptor::getDefaultPeopleDetector());\n"

            "Usage:\n"

            "./peopledetect

 (<image_filename> | <image_list>.txt)\n\n");

}

int

main(int

argc, char**

 argv)

{

    Mat

 img;

    FILE*

 f = 0;

    char

_filename[1024];

    if(

 argc != 3 )

    {

        cout<<"ERROR"<<endl;

        return

0;

    }

    img

 = imread(argv[1]);

    if(

 img.data )

    {

        strcpy(_filename,

 argv[1]);

    }

    else

    {

        f

 = fopen(argv[1],

"rt");

        if(!f)

        {

            fprintf(

 stderr, "ERROR:

 the specified file could not be loaded\n");

            return

-1;

        }

    }

    HOGDescriptor

 hog;

    //hog.setSVMDetector(HOGDescriptor::getDefaultPeopleDetector());

    vector<float>

 detector = load_lear_model(argv[2]);

    hog.setSVMDetector(detector);

    namedWindow("people

 detector",

 1);

    for(;;)

    {

        char*

 filename = _filename;

        if(f)

        {

            if(!fgets(filename,

 (int)sizeof(_filename)-2,

 f))

                break;

            //while(*filename

 && isspace(*filename))

            // 

 ++filename;

            if(filename[0]

 == '#')

                continue;

            int

l = strlen(filename);

            while(l

 > 0 && isspace(filename[l-1]))

                --l;

            filename[l]

 = '\0';

            img

 = imread(filename);

        }

        printf("%s:\n",

 filename);

        if(!img.data)

            continue;

        fflush(stdout);

        vector<Rect>

 found, found_filtered;

        double

t = (double)getTickCount();

        //

 run the detector with default parameters. to get a higher hit-rate

        //

 (and more false alarms, respectively), decrease the hitThreshold and

        //

 groupThreshold (set groupThreshold to 0 to turn off the grouping completely).

        hog.detectMultiScale(img,

 found, 0, Size(8,8), Size(32,32), 1.05, 2);

        t

 = (double)getTickCount()

 - t;

        printf("tdetection

 time = %gms\n",

 t*1000./cv::getTickFrequency());

        size_t

i, j;

        for(

 i = 0; i < found.size(); i++ )

        {

            Rect

 r = found[i];

            for(

 j = 0; j < found.size(); j++ )

                if(

 j != i && (r & found[j]) == r)

                    break;

            if(

 j == found.size() )

                found_filtered.push_back(r);

        }

        for(

 i = 0; i < found_filtered.size(); i++ )

        {

            Rect

 r = found_filtered[i];

            //

 the HOG detector returns slightly larger rectangles than the real objects.

            //

 so we slightly shrink the rectangles to get a nicer output.

            r.x

 += cvRound(r.width*0.1);

            r.width

 = cvRound(r.width*0.8);

            r.y

 += cvRound(r.height*0.07);

            r.height

 = cvRound(r.height*0.8);

            rectangle(img,

 r.tl(), r.br(), cv::Scalar(0,255,0), 3);

        }

        imshow("people

 detector",

 img);

        int

c = waitKey(0) & 255;

        if(

 c == 'q'

|| c == 'Q'

|| !f)

            break;

    }

    if(f)

        fclose(f);

    return

0;

}

结果：

amadeuzou@ubuntu:~/Projects/opencv/OLTbinaries/lib$

 ./model-detect 

person-1.jpg model_4BiSVMLight.alt

到这里，opencv peopledetect从模型训练到目标检测的过程便完成了，训练过程用的是算法作者的程序，不知opencv是否已将这一过程融入。
下一篇我将介绍opencv latent svm detect从模型训练到目标检测的过程。

【上篇】Python练手程序——文件备份工具
【下篇】【原理】HOG+SVM

作者: rove

该日志由 rove 于10年前发表在综合分类下，最后更新于 2014年09月05日.
转载请注明: OpenCV HOG PeopleDetector：从训练到检测 | 学步园 +复制链接

抱歉!评论已关闭.

学步园

OpenCV HOG PeopleDetector：从训练到检测

作者: rove

书签

最新文章New

本站推荐

返回首页