此验证码识别技术有预处理、特征提取、神经网络识别几部分,参考过车牌识别技术。程序实现分样本下载器、用户交互程序、MLP建构程序三块,使用OpenCV,喜欢的可以留意下AnnMLP的使用方法,国内这方面的资料还是比较少的,欢迎交流QQ:13895354。
预处理技术是专门针对某种特定验证码设计的,其特点有:英文+数字,彩色,倾斜,无粘连,有不规则噪声,有删除线,至于具体哪家网站的验证码,我就不方便透露了
(用户交互程序)
int main( int argc, char** argv )
{
cvNamedWindow("1");
cvNamedWindow("2");
IplConvKernel* se = cvCreateStructuringElementEx(2, 2, 1, 1, CV_SHAPE_CROSS);
CvANN_MLP mlp;
mlp.load( "mpl.xml" );
for(int i=1; i<=100; i++)
{
// 产生文件名
char fn[1024];
sprintf(fn, ".//sample//%d.jpg", i);
// 读取图片
IplImage* gray = cvLoadImage(fn, CV_LOAD_IMAGE_GRAYSCALE);
// 二值化
cvThreshold(gray, gray, 180, 255, CV_THRESH_BINARY | CV_THRESH_OTSU);
// 去边框
cvRectangle(gray, cvPoint(0, 0), cvPoint(gray->width-1, gray->height-1), CV_RGB(255, 255, 255));
// 去噪
cvDilate(gray, gray, se);
/*
// 调整角度
cvShowImage("1", gray);
IplImage* rote = cvCreateImage( cvGetSize(gray), IPL_DEPTH_8U, 1 );
double t = tan(10.0 / 180.0 * CV_PI);
int w = gray->width;
int h = gray->height;
for(int i = 0; i<h; i++)
{
unsigned char* lineGray = (unsigned char*)gray->imageData + gray->widthStep * i;
unsigned char* lineRote = (unsigned char*)rote->imageData + rote->widthStep * i;
for(int j = 0; j<w; j++)
{
int j2 = j - ((int)(i*t+0.5));
if (j2<0)
j2+=w;
*(lineRote+j) = *(lineGray+j2);
}
}
cvCopy(rote, gray);
cvReleaseImage(&rote);
*/
// 计算连通域contour
cvXorS(gray, cvScalarAll(255), gray, 0);
cvShowImage("1", gray);
CvMemStorage* storage = cvCreateMemStorage();
CvSeq* contour = NULL;
cvFindContours(gray, storage, &contour, sizeof(CvContour), CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE);
// 分析连通域
CvSeq* p = contour;
while(p)
{
CvRect rect = cvBoundingRect(p, 0);
if( rect.height>=10 )// 文字需要有10像素高度
{
// 绘制该连通区域到character
cvZero(gray);
IplImage* character = cvCreateImage(cvSize(rect.width, rect.height), IPL_DEPTH_8U, 1);
cvZero(character);
cvDrawContours(character, p, CV_RGB(255, 255, 255), CV_RGB(0, 0, 0), -1, -1, 8, cvPoint(-rect.x, -rect.y));
// 归一化
IplImage* normal = cvCreateImage(cvSize(16, 16), IPL_DEPTH_8U, 1);
cvResize(character, normal, CV_INTER_AREA);
cvThreshold(normal, normal, 0, 255, CV_THRESH_BINARY | CV_THRESH_OTSU);// 修正
// 计算输入向量
float input[256];
for(int i=0; i<256; i++)
input[i] = (normal->imageData[i]==-1);
#if 0
// 用户输入结果
cvShowImage("2", normal);
char c = cvWaitKey(0);
if(c==27)
return 0;
// 编码0-9:0-9 a-z:10-35
unsigned char cc = 255;
if(c>='A'&&c<='Z')
cc=c-'A'+10;
else if(c>='a'&&c<='z')
cc=c-'a'+10;
else if(c>='0'&&c<='9')
cc=c-'0';
if(cc!=255)
{
// 转换成输出向量
float output[36];
for(int i=0; i<36; i++)
output[i] = 0.0f;
output[cc] = 1.0f;
// 存储到批处理文件
static FILE* batch = fopen("batch", "wb");
fwrite(output, 4*36, 1, batch);
fwrite(input, 4*256, 1, batch);
static int count = 0;
cout<<count++<<endl;
}
#else
// 识别
CvMat* output = cvCreateMat( 1, 36, CV_32F );
CvMat inputMat = cvMat( 1, 256, CV_32F, input);
mlp.predict( &inputMat, output );
CvPoint max_loc = {0,0};
cvMinMaxLoc( output, NULL, NULL, NULL, &max_loc, NULL );
int best = max_loc.x;// 识别结果
char c = (char)( best<10 ? '0'+best : 'A'+best-10 );
cout<<c<<"("<<rect.x<<")"<<" ";
cvReleaseMat( &output );
#endif
cvReleaseImage(&character);
cvReleaseImage(&normal);
}
p = p->h_next;
}
cout<<endl;
cvWaitKey(0);
cvReleaseMemStorage(&storage);
cvReleaseImage(&gray);
}
cvReleaseStructuringElement(&se);
cvDestroyAllWindows();
return 0;
}
事实上更有价值的是OpenCV中的AnnMLP,其技术很成熟,参考了letter_recg范例程序
(MLP建构程序)
void print_mat(CvMat& mat)
{
int count = 0;
for(int i=0; i<4/*mat.rows*/; i++)
{
for(int j=0; j<mat.cols; j++)
{
cout<<mat.data.fl[i*(mat.step/4)+j]/*<<" "*/;
}
cout<<endl<<endl;
}
}
int main( int argc, char *argv[] )
{
// 读入结果responses 特征data
FILE* f = fopen( "batch", "rb" );
fseek(f, 0l, SEEK_END);
long size = ftell(f);
fseek(f, 0l, SEEK_SET);
int count = size/4/(36+256);
CvMat* batch = cvCreateMat( count, 36+256, CV_32F );
fread(batch->data.fl, size-1, 1, f);
CvMat outputs, inputs;
cvGetCols(batch, &outputs, 0, 36);
cvGetCols(batch, &inputs, 36, 36+256);
// 新建MPL
CvANN_MLP mlp;
int layer_sz[] = { 256, 20, 36 };
CvMat layer_sizes = cvMat( 1, 3, CV_32S, layer_sz );
mlp.create( &layer_sizes );
// 训练
system( "time" );
mlp.train( &inputs, &outputs, NULL, NULL,
CvANN_MLP_TrainParams(cvTermCriteria(CV_TERMCRIT_ITER,300,0.01), CvANN_MLP_TrainParams::RPROP, 0.01)
);
system( "time" );
// 存储MPL
mlp.save( "mpl.xml" );
// 测试
int right = 0;
CvMat* output = cvCreateMat( 1, 36, CV_32F );
for(int i=0; i<count; i++)
{
CvMat input;
cvGetRow( &inputs, &input, i );
mlp.predict( &input, output );
CvPoint max_loc = {0,0};
cvMinMaxLoc( output, NULL, NULL, NULL, &max_loc, NULL );
int best = max_loc.x;// 识别结果
int ans = -1;// 实际结果
for(int j=0; j<36; j++)
{
if( outputs.data.fl[i*(outputs.step/4)+j] == 1.0f )
{
ans = j;
break;
}
}
cout<<(char)( best<10 ? '0'+best : 'A'+best-10 );
cout<<(char)( ans<10 ? '0'+ans : 'A'+ans-10 );
if( best==ans )
{
cout<<"+";
right++;
}
//cin.get();
cout<<endl;
}
cvReleaseMat( &output );
cout<<endl<<right<<"/"<<count<<endl;
cvReleaseMat( &batch );
system( "pause" );
return 0;
}
上面的代码都是vc6控制台程序,OpenCV入门的都懂的吧。