//
#include "stdafx.h"
#include <iostream>
#include <sstream>
#include <string>
#include <regex>
#include <Windows.h>
using namespace std;
template<typename LT, typename E>
int Extraction(std::basic_string<E> src, E extFlagBegin, E extFlagEnd, LT& extData, bool isIncludeFlag=false)
{
int rtn=0;
std::basic_string<E> temp;
bool isStart = false;
for(auto it=src.begin(); it!=src.end(); ++it)
{
//已开始提取
if(isStart)
{
//提取结束
if(*it==extFlagEnd)
{
isStart = false;
//包含标识符
if(isIncludeFlag)
{
temp.push_back(*it);
}
extData.push_back(temp);
temp.clear();
rtn++;
continue;
}
temp.push_back(*it);
}
//未开始提取
else
{
//提取开始
if(*it==extFlagBegin)
{
//包含标识符
if(isIncludeFlag)
{
temp.push_back(*it);
}
isStart = true;
}
}
}
return rtn;
}
template<typename LT, typename E>
int ExtractionRegex(std::basic_string<E> src, E extFlagBegin, E extFlagEnd, LT& extData, bool isIncludeFlag=false)
{
basic_stringstream<E, char_traits<E>, allocator<E> > ss;
ss<< extFlagBegin <<"[^" << extFlagEnd << "]*" << extFlagEnd;
std::regex pattern(ss.str());
std::sregex_token_iterator end;
int count=0;
for (std::sregex_token_iterator it(src.begin(),src.end(), pattern); it != end ; ++it)
{
std::basic_string<E> temp = *it;
//不包含标识符
if(!isIncludeFlag)
{
temp.erase(0, 1);
temp.erase(temp.size()-1, 1);
}
extData.push_back(temp);
count++;
}
return count;
}
int _tmain(int argc, _TCHAR* argv[])
{
std::string str = "jay@so<hj>u<>t<hr>idg<evnnnnnni av>deo@com";
std::vector<string> vt;
vt.clear();
ExtractionRegex(str,'<','>',vt);
std::for_each(vt.begin(),vt.end(),[](std::string& d){cout<<d<<endl;});
vt.clear();
ExtractionRegex(str,'<','>',vt,true);
std::for_each(vt.begin(),vt.end(),[](std::string& d){cout<<d<<endl;});
vt.clear();
Extraction(str,'<','>',vt);
std::for_each(vt.begin(),vt.end(),[](std::string& d){cout<<d<<endl;});
vt.clear();
Extraction(str,'<','>',vt,true);
std::for_each(vt.begin(),vt.end(),[](std::string& d){cout<<d<<endl;});
int ts = 1000;
clock_t start, finish;
start = clock();
for(int i=0; i<ts; i++)
{
vt.clear();
ExtractionRegex(str,'<','>',vt);
//std::for_each(vt.begin(),vt.end(),[](std::string& d){cout<<d<<endl;});
vt.clear();
ExtractionRegex(str,'<','>',vt,true);
//std::for_each(vt.begin(),vt.end(),[](std::string& d){cout<<d<<endl;});
}
finish = clock();
std::cout << (double)(finish - start) / CLOCKS_PER_SEC << endl;
vt.clear();
start = clock();
for(int i=0; i<ts; i++)
{
vt.clear();
Extraction(str,'<','>',vt);
//std::for_each(vt.begin(),vt.end(),[](std::string& d){cout<<d<<endl;});
vt.clear();
Extraction(str,'<','>',vt,true);
//std::for_each(vt.begin(),vt.end(),[](std::string& d){cout<<d<<endl;});
}
finish = clock();
std::cout << (double)(finish - start) / CLOCKS_PER_SEC << endl;
system("pause");
return 0;
}
-
baoming9999:回复 森哥先森 :互相讨论吧,用户的每一个操作,花费的时间不应该超过1.5秒,最好0.8秒内,1000条的数据处理还是很常见的,还要有别的处理,所以对多条数据处理时,每一个函数都要斟酌下了
-
//
#include <iostream>
#include <sstream>
#include <string>
#include <regex>
#include "pcre.h"
#pragma comment (lib, "pcre.lib")
#pragma comment (lib, "pcreposix.lib")
{
WXRegex() : re(nullptr)
{
if(re != nullptr)
{
free(re);
}
}
~WXRegex()
{
}
void Compile(const char* pattern="(?<=<)[^>]*(?=>)")
{
int erroffset;
const char *error;
if(re != nullptr)
{
free(re);
}
if (re == NULL)
{
throw exception(error);
}
}
int Extraction(const char* src, std::vector<string>& out)
{
int count= 0;
int rc=0;
int ovector[OVECCOUNT];
const char *captured_string;
rc = pcre_exec(re, NULL, src, strlen(src), exec_offset, 0, ovector, OVECCOUNT);
if (rc < 0)
{
break;
}
for (int i = 0; i < rc; i++)
{
pcre_get_substring( src, ovector, rc, 0, &captured_string );
out.push_back(captured_string);
}
}
};
{
std::string str = "<sd><jay@>so<hj>ut<h在r>i在工城某工dg<evnnn厅nnni av>deo@com";
WXRegex wxr;
wxr.Compile("<[^>]*>");
std::vector<string> out;
for_each(out.begin(),out.end(), [](string& c){ std::cout << c <<endl;});
out.clear();*/
c = wxr.Extraction(str.c_str(), out);
for_each(out.begin(),out.end(), [](string& c){ std::cout << c <<endl;});
start = clock();
for(int i=0;i<1000;i++)
{
out.clear();
c = wxr.Extraction(str.c_str(), out);
}
wxr.Compile("(?<=<)[^>]*(?=>)");
for(int i=0;i<1000;i++)
{
out.clear();
c = wxr.Extraction(str.c_str(), out);
}
out.clear();
c = wxr.Extraction(str.c_str(), out);
for_each(out.begin(),out.end(), [](string& c){ std::cout << c <<endl;});
std::cout << (double)(finish - start) / CLOCKS_PER_SEC << endl;
}