鉴于MFC的CStringList过于简单,且无法高效处理文件, 所以结合快速排序、二分查找、内存映射在CStringList的基础之上写了CStringListEx 。 该类的效率很高, 同样读取100多M的文件, BCB的TStringList大概要20多秒(包括处理),CStringListEx只要3~4秒。 各位可以应用在自己的项目中试试该类的效果。 Copyright : wxy3064one##163.com
直接给出源码:
//.h文件:
#ifndef _Q_STRING_LIST_EX_H
#define _Q_STRING_LIST_EX_H
#if _MSC_VER > 1000
#pragma once
#endif // _MSC_VER > 1000
#define USE_WIN_MEMORY_MAP
enum FILE_LINE_TYPE{FLT_WINDOW = 0,FLT_UNIX = 1,FLT_APPLE = 2};
class CStringListEx : public CStringList
{
bool m_bSorted;
bool CustomFind(CString value,int& index);
public:
CStringListEx(int nBlockSize = 10);
void Sort();
void Clear();
int IndexOf(CString value);
void Add(const CString& NewStr);
CString operator []( int Index );
void LoadFromFile(LPCTSTR lpszFileName);
void SaveToFile(LPCTSTR lpszFileName);
};
#endif
//.cpp文件
#include "StringListEx.h"
#ifndef USE_WIN_MEMORY_MAP
#include <fstream>
using namespace std;
#endif
/*
二分法查找
*/
template <typename T>
bool BinSearch(T* AList,int Low,int High,T value,int (*compare_func)(T,T),int &index)
{
bool ret = false;
int I,C,L = Low,R = High;
while( L <= R )
{
I = (L + R) >> 1;
C = (*compare_func)((T)(*(AList+I)),value);
if (C < 0) L = I + 1;
else{
R = I - 1;
if (C == 0)
{
ret = true;
L = I;
break;
}
}
}
index = L;
return ret;
}
/*
插入排序:
有N - 1趟排序组成,对于P = 1趟到P = N - 1趟,插入排序保证从位置0到位置P的元素为已排序状态.
类型:内部排序
复杂度: o(N(2))
*/
//通过交换相邻元素进行排序的任何算法平均需要o(N(2))时间
template <typename T>
void InsertionSort(T A[],int N)
{
int j,p;
T tmp;
for(p = 1; p < N; p++)
{
tmp = A[p];
for(j = p;j > 0 && A[j - 1] >tmp;j--)
{
A[j] = A[j - 1];
}
A[j] = tmp;
}
}
/*
快速排序:
算法由四步组成:
1. 如果S中元素个数是0或1,则返回
2. 取S中任一元素v,称之为枢纽元
3. 将S-{v}分成两个不相交的集合:S1 = {x∈S - {v},x≦v}和S2 = {x∈S - {v},x≧v}
4. 返回quicksort(S1)后,继随v,继而quicksort(S2)
一种安全的方针是随机选取枢纽元.但随机数的生成非常昂贵,根本减少不了算法其余部分的平均运行时间
推荐使用三数中值分割法
可以构造快速选择方法: 先备份数组,增加一个序号参数k, k<=i时,分割左侧快速排序,否则右侧快速排序.完成之后,直接取数组序号为
k的值即可.
最好:o(NLogN) 平均:o(NLogN) 最差o(N(2))
类型:内部排序
复杂度: o(NLogN)
*/
template <typename T>
void swap_t(T& a,T& b)
{
T c;
c = a;
a = b;
b = c;
}
template <typename T>
int Partion(T A[],int Left,int Right)
{
int i,j = Left - 1;
T s = A[--Right];
for(i = Left;i<Right;i++)
{
if (A[i] < s)
{
j++;
swap_t(A[i],A[j]);
}
}
j++;
A[Right] = A[j];
A[j] = s;
return j;
}
template <typename T>
int RandomPartion(T A[],int Left,int Right)
{
int i = Left + rand() % (Right - Left - 1);
swap_t(A[i],A[Right -1]);
return Partion(A,Left,Right);
}
template <typename T>
T Median3(T A[],int Left,int Right)
{
int Center = (Left + Right) / 2;
if (A[Left] > A[Center])
swap_t(A[Left],A[Center]);
if (A[Left] > A[Right])
swap_t(A[Left],A[Right]);
if (A[Center] > A[Right])
swap_t(A[Center],A[Right]);
swap_t(A[Center],A[Right - 1]);/*Hide pivot*/
return A[Right - 1];
}
template <typename T>
void QuickSort(T A[],int Left,int Right,bool busedef=false)
{
int i,j;
T Pivot;
if (!busedef)
{
const int Cutoff = 3;
if (Left + Cutoff <= Right)
{
Pivot = Median3(A,Left,Right);
i = Left;
j = Right - 1;
while (1)
{
while(A[++i] < Pivot){;}
while(A[--j] > Pivot){;}
if (i < j)
swap_t(A[i],A[j]);
else
break;
}
swap_t(A[i],A[Right - 1]); /*Restore pivot*/
QuickSort(A,Left,i - 1);
QuickSort(A,i + 1,Right);
}else{
InsertionSort(A+Left,Right - Left + 1);
}
}else{
if (Left < Right - 1)
{
i = RandomPartion(A,Left,Right);
QuickSort(A,Left,i);
QuickSort(A,i+1,Right);
}
}
}
#ifdef USE_WIN_MEMORY_MAP
HANDLE hThread;
DWORD dwThreadId;
struct MemoryMapReadStruct
{
CStringListEx* pList;
LPVOID Buf;
int FileLineType;
};
void SetString(CString &S,char* Buf,int Len)
{
if ( Buf != NULL)
{
char* P = new char[Len+1];
memset(P,0,Len+1);
memcpy(P,Buf,Len-1);
memcpy(P+Len-1,Buf+Len-1,1);
S = P;
delete[] P;
}
}
int get_file_delim_type(const char* filename)
{
int ret = FLT_WINDOW;
char tmpchar;
char buf[2] = {0};
int count = sizeof(buf);
int nCount = 0;
FILE* fp = fopen(filename,"r");
if (fp)
{
fread(buf,count,1,fp);
tmpchar = buf[0];
nCount++;
while( fread(buf,count,1,fp) && nCount <4095)
{
if (tmpchar == 0x0D)
{
if (buf[0] == 0x0A) //Window 类型 //0x0D 0x0A
ret = FLT_WINDOW;
else //UNIX 类型 //0x0D
ret = FLT_UNIX;
break;
}else{
if (tmpchar == 0x0A)//Apple 类型 //0x0A
{
ret = FLT_APPLE;
break;
}
}
tmpchar = buf[0];
nCount++;
}
fclose(fp);
}
return ret;
}
DWORD WINAPI LoadStrings(LPVOID lpParam)
{
CString S;
char* P,*Start;
MemoryMapReadStruct* pReadParam = (MemoryMapReadStruct* )lpParam;
P = (char*)pReadParam->Buf;
switch (pReadParam->FileLineType)
{
case FLT_APPLE:
case FLT_UNIX:
case FLT_WINDOW:
{
if (P!=NULL)
{
while ( *P != char(0) )
{
Start = P;
while (*P!=char(0) && *P!=char(0x0D) && *P!=char(0x0A))
P++;
if (P - Start >0)
{
SetString(S,Start,P-Start);
pReadParam->pList->Add(S);
}
if (*P == char(0x0D))
P++;
if (*P == char(0x0A))
P++;
}
}
}break;
default:break;
}
return 0;
}
#endif
CStringListEx::CStringListEx(int nBlockSize):CStringList(nBlockSize)
{
m_bSorted = false;
}
CString CStringListEx::operator []( int Index )
{
return GetAt(FindIndex(Index));
}
bool CStringListEx::CustomFind(CString value,int& index)
{
if (!m_bSorted)
{
index = 0;
POSITION pos = GetHeadPosition();
while (pos != NULL)
{
if (strcmp((LPCTSTR)GetAt(pos),(LPCTSTR)value) == 0)
{
return true;
}
GetNext(pos);
index++;
}
index = -1;
return false;
}else{
int nCount = GetCount();
POSITION pos = GetHeadPosition();
CString * ptrArray = new CString[nCount];
for(int i = 0;i<nCount;i++)
{
ptrArray[i] = GetNext(pos).GetBuffer(0);
}
bool ret = BinSearch<const char*>((const char**)ptrArray,0,nCount-1,(const char*)value,strcmp,index);
delete[] ptrArray;
return ret;
}
}
int CStringListEx::IndexOf(CString value)
{
int index = 0;
if (CustomFind(value,index))
return index;
return -1;
}
void CStringListEx::Add(const CString& NewStr)
{
AddTail(NewStr);
m_bSorted = false;
}
void CStringListEx::Sort()
{
int i;
int nCount = GetCount();
POSITION pos = GetHeadPosition();
CString* ptrArray = new CString[nCount];
for(i = 0;i<nCount;i++)
{
ptrArray[i] = GetNext(pos).GetBuffer(0);
}
QuickSort<CString>(ptrArray,0,nCount-1);
RemoveAll();
for(i = 0;i<nCount;i++)
{
Add(ptrArray[i]);
}
delete[] ptrArray;
m_bSorted = true;
}
void CStringListEx::Clear()
{
RemoveAll();
}
void CStringListEx::SaveToFile(LPCTSTR lpszFileName)
{
FILE *file;
if((file=fopen((LPCTSTR)lpszFileName,"w+"))==NULL)
{
return;
}
POSITION pos = GetHeadPosition();
while(pos != NULL)
{
CString str = GetNext(pos).GetBuffer(0);
fwrite((LPCTSTR)str,str.GetLength(),1,file);
}
fclose(file);
}
void CStringListEx::LoadFromFile(LPCTSTR lpszFileName)
{
#ifdef USE_WIN_MEMORY_MAP
HANDLE hFile = CreateFile(lpszFileName,GENERIC_READ|GENERIC_WRITE,
FILE_SHARE_READ,
NULL,
OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL,
NULL);
if (hFile == INVALID_HANDLE_VALUE)
{
return ;
}
HANDLE hFileMapping = CreateFileMapping(hFile,NULL,PAGE_READWRITE,0,0,NULL);
if (hFileMapping == NULL)
{
CloseHandle(hFile);
return ;
}
int FileLineType = get_file_delim_type(lpszFileName);
SYSTEM_INFO si;
GetSystemInfo(&si);
DWORD dwBytesInBlock = 1000*si.dwAllocationGranularity;
__int64 dwFileOffset = 0;
DWORD dwFileSizeHigh;
__int64 dwFileSize = GetFileSize(hFile,&dwFileSizeHigh);
dwFileSize |= (((__int64)dwFileSizeHigh)<<32);
CloseHandle(hFile);
hFile = INVALID_HANDLE_VALUE;
bool bPMEnough = false;
if (dwFileSize<dwBytesInBlock)
{
dwBytesInBlock = dwFileSize;
bPMEnough = true;
}
if (!bPMEnough)
{
dwBytesInBlock = dwFileSize;
bPMEnough = true;
}
if ( bPMEnough )
{
PBYTE pbFile = (PBYTE)MapViewOfFile(hFileMapping,FILE_MAP_ALL_ACCESS,
(DWORD)(dwFileOffset>>32),
(DWORD)(dwFileOffset & 0xFFFFFFFF),
dwBytesInBlock);
MemoryMapReadStruct thread_param;
thread_param.Buf = pbFile;
thread_param.pList = this;
thread_param.FileLineType = FileLineType;
hThread = CreateThread(NULL,0,LoadStrings,(LPVOID)&thread_param,0,&dwThreadId);
while( 1 == 1 )
{
DWORD dwRet;
MSG msg ;
HANDLE ObjArray[1];
ObjArray[0]=hThread;
dwRet = ::MsgWaitForMultipleObjects(1,ObjArray,FALSE, INFINITE, QS_ALLINPUT);
if (dwRet == WAIT_OBJECT_0)
{
break;
}
else
{
::PeekMessage(&msg, NULL, 0, 0, PM_REMOVE);
::DispatchMessage(&msg);
}
}
if (pbFile!=NULL)
UnmapViewOfFile(pbFile);
}
if ( hFileMapping != NULL)
{
CloseHandle(hFileMapping);
hFileMapping = NULL;
}
#else
//删除的这些代码在VS2008下是错误的,主要式peek函数
fstream infile;
char buf[0x400];
locale::global(locale(""));
infile.open(lpszFileName,ios_base::in);
if (infile.is_open())
{
while (infile.peek() != -1)
{
infile.getline(buf,0x400);
Add(buf);
}
}
infile.close();
locale::global(locale("C"));
//下面的代码为更改的代码
char tmpchar;
char buf[2] = {0};
int count = sizeof(buf);
int nCount = 0;
FILE* fp = fopen(lpszFileName,"r");
if (fp)
{
fread(buf,count,1,fp);
tmpchar = buf[0];
nCount++;
while( fread(buf,count,1,fp) && nCount <4095)
{
if (tmpchar == 0x0D)
{
break;
}else{
if (tmpchar == 0x0A)//Apple 类型 //0x0A
break;
}
tmpchar = buf[0];
nCount++;
}
fclose(fp);
if (tmpchar == 0x0D || tmpchar == 0x0A)
{
fstream infile;
char buf[0x400];
locale::global(locale(""));
infile.open(lpszFileName,ios_base::in);
if (infile.is_open())
{
while (infile.getline(buf,0x400,tmpchar))
{
Add(buf);
}
}
infile.close();
locale::global(locale("C"));
}
}
#endif
}
后续:
本人提供这个类的目的是为了在MFC的开发程序中增加比BORLAND TStringList更高效的类。上述代码中的内存映射处理是可以改进的,可以用分块读取。
作者声明:
本人ddddfw888享有版权,转载请注明出处。