现在的位置: 首页 > 综合 > 正文

比BCB更快的CStringListEx的实现(已更新)

2013年09月04日 ⁄ 综合 ⁄ 共 9374字 ⁄ 字号 评论关闭

鉴于MFC的CStringList过于简单,且无法高效处理文件, 所以结合快速排序、二分查找、内存映射在CStringList的基础之上写了CStringListEx 。 该类的效率很高, 同样读取100多M的文件, BCB的TStringList大概要20多秒(包括处理),CStringListEx只要3~4秒。   各位可以应用在自己的项目中试试该类的效果。 Copyright : wxy3064one##163.com

直接给出源码:

//.h文件:

 

#ifndef _Q_STRING_LIST_EX_H
#define _Q_STRING_LIST_EX_H

#if _MSC_VER > 1000
#pragma once
#endif // _MSC_VER > 1000

#define USE_WIN_MEMORY_MAP

 

enum FILE_LINE_TYPE{FLT_WINDOW = 0,FLT_UNIX = 1,FLT_APPLE = 2};

 

class CStringListEx : public CStringList
{
 bool m_bSorted;
 bool CustomFind(CString value,int& index);
public:
 CStringListEx(int nBlockSize = 10);
 void Sort();
 void Clear();
 int  IndexOf(CString value);
 void Add(const CString& NewStr);
 CString operator []( int Index );
 void LoadFromFile(LPCTSTR lpszFileName);
 void SaveToFile(LPCTSTR lpszFileName);
};

#endif

 

//.cpp文件

#include "StringListEx.h"
#ifndef USE_WIN_MEMORY_MAP
 #include <fstream>
 using namespace std;
#endif

 

 

/*
   二分法查找
*/

template <typename T>
bool BinSearch(T* AList,int Low,int High,T value,int (*compare_func)(T,T),int &index)
{
 bool ret = false;
 int I,C,L = Low,R = High;
 while( L <= R )
 {
  I = (L + R) >> 1;
  C = (*compare_func)((T)(*(AList+I)),value);
  if (C < 0) L = I + 1;
  else{
   R = I - 1;
   if (C == 0)
   {
    ret = true;
    L = I;
    break;
   }
  }
 }
 index = L;
 return ret;
}

/*
  插入排序:
   有N - 1趟排序组成,对于P = 1趟到P = N - 1趟,插入排序保证从位置0到位置P的元素为已排序状态.
  类型:内部排序
  复杂度: o(N(2))
*/
//通过交换相邻元素进行排序的任何算法平均需要o(N(2))时间
template <typename T>
void InsertionSort(T A[],int N)
{
 int j,p;
 T   tmp;
 for(p = 1; p < N; p++)
 {
  tmp = A[p];
  for(j = p;j > 0 && A[j - 1] >tmp;j--)
  {
   A[j] = A[j - 1];
  }
  A[j] = tmp;
 }
}
/*
  快速排序:
  算法由四步组成:
  1. 如果S中元素个数是0或1,则返回
  2. 取S中任一元素v,称之为枢纽元
  3. 将S-{v}分成两个不相交的集合:S1 = {x∈S - {v},x≦v}和S2 = {x∈S - {v},x≧v}
  4. 返回quicksort(S1)后,继随v,继而quicksort(S2)

  一种安全的方针是随机选取枢纽元.但随机数的生成非常昂贵,根本减少不了算法其余部分的平均运行时间
  推荐使用三数中值分割法
  可以构造快速选择方法: 先备份数组,增加一个序号参数k, k<=i时,分割左侧快速排序,否则右侧快速排序.完成之后,直接取数组序号为
                     k的值即可.
  最好:o(NLogN)  平均:o(NLogN)  最差o(N(2))
  类型:内部排序
  复杂度: o(NLogN)
*/
template <typename T>
void swap_t(T& a,T& b)
{
 T c;
 c = a;
 a = b;
 b = c;
}

template <typename T>
int Partion(T A[],int Left,int Right)
{
 int i,j = Left - 1;
 T s = A[--Right];
 for(i = Left;i<Right;i++)
 {
  if (A[i] < s)
  {
   j++;
   swap_t(A[i],A[j]);
  }
 }
 j++;
 A[Right] = A[j];
 A[j] = s;
 return j;
}

template <typename T>
int RandomPartion(T A[],int Left,int Right)
{
 int i = Left + rand() % (Right - Left - 1);
 swap_t(A[i],A[Right -1]);
 return Partion(A,Left,Right);
}

template <typename T>
T Median3(T A[],int Left,int Right)
{
 int Center = (Left + Right) / 2;

 if (A[Left] > A[Center])
  swap_t(A[Left],A[Center]);
 if (A[Left] > A[Right])
  swap_t(A[Left],A[Right]);
 if (A[Center] > A[Right])
  swap_t(A[Center],A[Right]);

 swap_t(A[Center],A[Right - 1]);/*Hide pivot*/
 return A[Right - 1];
}

template <typename T>
void QuickSort(T A[],int Left,int Right,bool busedef=false)
{
 int i,j;
 T Pivot;
 if (!busedef)
 {
  const int Cutoff = 3;
  if (Left + Cutoff <= Right)
  {
   Pivot = Median3(A,Left,Right);
   i = Left;
   j = Right - 1;
   while (1)
   {
    while(A[++i] < Pivot){;}
    while(A[--j] > Pivot){;}
    if (i < j)
     swap_t(A[i],A[j]);
    else
     break;
   }
   swap_t(A[i],A[Right - 1]); /*Restore pivot*/
   QuickSort(A,Left,i - 1);
   QuickSort(A,i + 1,Right);
  }else{
   InsertionSort(A+Left,Right - Left + 1);
  }
 }else{
  if (Left < Right - 1)
  {
   i = RandomPartion(A,Left,Right);
   QuickSort(A,Left,i);
   QuickSort(A,i+1,Right);
  }
 }
}

#ifdef USE_WIN_MEMORY_MAP
 HANDLE hThread;
 DWORD dwThreadId;

 struct  MemoryMapReadStruct
 {
  CStringListEx* pList;
  LPVOID         Buf;
  int            FileLineType;
 };

 void SetString(CString &S,char* Buf,int Len)
 {
  if ( Buf != NULL)
  {
   char* P = new char[Len+1];
   memset(P,0,Len+1);
   memcpy(P,Buf,Len-1);
   memcpy(P+Len-1,Buf+Len-1,1);
   S = P;
   delete[] P;
  }
 }

 int get_file_delim_type(const char* filename)
 {
  int ret = FLT_WINDOW;
  char tmpchar;
  char buf[2] = {0};
  int  count = sizeof(buf);
  int  nCount = 0;
  FILE* fp = fopen(filename,"r");
  if (fp)
  {
   fread(buf,count,1,fp);
   tmpchar = buf[0];
   nCount++;

   while( fread(buf,count,1,fp) && nCount <4095)
   {
    if (tmpchar == 0x0D)
    {
     if (buf[0] == 0x0A) //Window 类型  //0x0D 0x0A
      ret = FLT_WINDOW;
     else                //UNIX   类型  //0x0D
      ret = FLT_UNIX;
     break;
    }else{
     if (tmpchar == 0x0A)//Apple  类型  //0x0A
     {
      ret = FLT_APPLE;
      break;
     }
    }
    tmpchar = buf[0];
    nCount++;
   }
   fclose(fp);
  }
  return ret;
 }

 DWORD WINAPI LoadStrings(LPVOID lpParam)
 {
  CString S;
  char* P,*Start;
  MemoryMapReadStruct* pReadParam = (MemoryMapReadStruct* )lpParam;

  P = (char*)pReadParam->Buf;
  switch (pReadParam->FileLineType)
  {
   case FLT_APPLE:
   case FLT_UNIX:
   case FLT_WINDOW:
   {
    if (P!=NULL)
    {
     while ( *P != char(0) )
     {
      Start = P;
      while (*P!=char(0) &&  *P!=char(0x0D) && *P!=char(0x0A))
       P++;
      if (P - Start >0) 
      {
       SetString(S,Start,P-Start);
       pReadParam->pList->Add(S);
      }
      if (*P == char(0x0D))
       P++;
      if (*P == char(0x0A))
       P++;
     }
    }
   }break;
   default:break;
  }
  
  return 0;
 }
#endif

CStringListEx::CStringListEx(int nBlockSize):CStringList(nBlockSize)
{
 m_bSorted = false;
}

CString CStringListEx::operator []( int Index )
{
 return GetAt(FindIndex(Index));
}

bool CStringListEx::CustomFind(CString value,int& index)
{
 if (!m_bSorted)
 {
  index = 0;
  POSITION pos = GetHeadPosition();
  while (pos != NULL)
  {
   if (strcmp((LPCTSTR)GetAt(pos),(LPCTSTR)value) == 0)
   {
    return true;
   }
   GetNext(pos);
   index++;
  }
  index = -1;
  return false;
 }else{   
  int nCount = GetCount();
  POSITION pos = GetHeadPosition();
  CString * ptrArray = new CString[nCount];
  for(int i = 0;i<nCount;i++)
  {
   ptrArray[i] = GetNext(pos).GetBuffer(0);
  }
  bool ret = BinSearch<const char*>((const char**)ptrArray,0,nCount-1,(const char*)value,strcmp,index);
  delete[] ptrArray;
  return ret;
 }
}
int CStringListEx::IndexOf(CString value)
{
 int index = 0;
 if (CustomFind(value,index))
  return index;
 return -1;
}

void CStringListEx::Add(const CString& NewStr)
{
 AddTail(NewStr);
 m_bSorted = false;
}

void CStringListEx::Sort()
{
 int i;
 int nCount = GetCount();
 POSITION pos = GetHeadPosition();
 CString* ptrArray = new CString[nCount];
 for(i = 0;i<nCount;i++)
 {
  ptrArray[i] = GetNext(pos).GetBuffer(0);
 }
 QuickSort<CString>(ptrArray,0,nCount-1);
 RemoveAll();
 for(i = 0;i<nCount;i++)
 {
  Add(ptrArray[i]);
 }
 delete[] ptrArray;
 m_bSorted = true;
}

void CStringListEx::Clear()
{
 RemoveAll();
}

void CStringListEx::SaveToFile(LPCTSTR lpszFileName)
{
    FILE   *file;  
    if((file=fopen((LPCTSTR)lpszFileName,"w+"))==NULL)  
 {    
  return;  
 }  
 POSITION pos = GetHeadPosition();
 while(pos != NULL)
 {
  CString str = GetNext(pos).GetBuffer(0);
  fwrite((LPCTSTR)str,str.GetLength(),1,file);
 }
    fclose(file);
}
void CStringListEx::LoadFromFile(LPCTSTR lpszFileName)
{
#ifdef USE_WIN_MEMORY_MAP
    HANDLE hFile =  CreateFile(lpszFileName,GENERIC_READ|GENERIC_WRITE,
                             FILE_SHARE_READ,
                             NULL,
                             OPEN_EXISTING,
                             FILE_ATTRIBUTE_NORMAL,
                             NULL);
    if (hFile == INVALID_HANDLE_VALUE)
    {
        return ;
    }
    HANDLE hFileMapping = CreateFileMapping(hFile,NULL,PAGE_READWRITE,0,0,NULL);
    if (hFileMapping == NULL)
    {
       CloseHandle(hFile);
       return ;
    }
    int FileLineType = get_file_delim_type(lpszFileName);
    SYSTEM_INFO si;
    GetSystemInfo(&si);
    DWORD dwBytesInBlock = 1000*si.dwAllocationGranularity;
    __int64 dwFileOffset = 0;
    DWORD dwFileSizeHigh;
    __int64 dwFileSize = GetFileSize(hFile,&dwFileSizeHigh);
    dwFileSize |= (((__int64)dwFileSizeHigh)<<32);
    CloseHandle(hFile);
    hFile =  INVALID_HANDLE_VALUE;
    bool bPMEnough = false;
    if (dwFileSize<dwBytesInBlock)
    {
       dwBytesInBlock = dwFileSize;
       bPMEnough = true;
    }
    if (!bPMEnough)
    {
         dwBytesInBlock = dwFileSize;
         bPMEnough = true;
    }
    if (  bPMEnough  )
 {
   PBYTE pbFile = (PBYTE)MapViewOfFile(hFileMapping,FILE_MAP_ALL_ACCESS,
            (DWORD)(dwFileOffset>>32),
            (DWORD)(dwFileOffset & 0xFFFFFFFF),
            dwBytesInBlock);
   MemoryMapReadStruct thread_param;
   thread_param.Buf     = pbFile;
   thread_param.pList   = this; 
   thread_param.FileLineType = FileLineType;
   hThread = CreateThread(NULL,0,LoadStrings,(LPVOID)&thread_param,0,&dwThreadId);
         while( 1 == 1 )
         {
              DWORD dwRet;
              MSG msg ;
              HANDLE ObjArray[1];
              ObjArray[0]=hThread;
              dwRet = ::MsgWaitForMultipleObjects(1,ObjArray,FALSE, INFINITE, QS_ALLINPUT);
              if (dwRet == WAIT_OBJECT_0)
              {
                  break;
              }
              else
              {
                  ::PeekMessage(&msg, NULL, 0, 0, PM_REMOVE);
                  ::DispatchMessage(&msg);
              }
         }
         if (pbFile!=NULL)
             UnmapViewOfFile(pbFile);
 }
 if ( hFileMapping != NULL)
 {
  CloseHandle(hFileMapping);
  hFileMapping = NULL;
 }
#else

//删除的这些代码在VS2008下是错误的,主要式peek函数
 fstream infile;
 char buf[0x400];
 locale::global(locale(""));
 infile.open(lpszFileName,ios_base::in);
 if (infile.is_open())
 {
  while (infile.peek() != -1)
  {
   infile.getline(buf,0x400);
   Add(buf);
  }
 }
 infile.close();
 locale::global(locale("C"));

 

//下面的代码为更改的代码

 char tmpchar;
 char buf[2] = {0};
 int  count = sizeof(buf);
 int  nCount = 0;
 FILE* fp = fopen(lpszFileName,"r");
 if (fp)
 {
  fread(buf,count,1,fp);
  tmpchar = buf[0];
  nCount++;

  while( fread(buf,count,1,fp) && nCount <4095)
  {
   if (tmpchar == 0x0D)
   {
    break;
   }else{
    if (tmpchar == 0x0A)//Apple  类型  //0x0A
     break;
   }
   tmpchar = buf[0];
   nCount++;
  }
  fclose(fp);
  if (tmpchar == 0x0D || tmpchar == 0x0A)
  {
   fstream infile;
   char buf[0x400];
   locale::global(locale(""));
   infile.open(lpszFileName,ios_base::in);
   if (infile.is_open())
   {
    while (infile.getline(buf,0x400,tmpchar))
    {
     Add(buf);
    }
   }
   infile.close();
   locale::global(locale("C"));
  }
 }

#endif

 

后续:

     本人提供这个类的目的是为了在MFC的开发程序中增加比BORLAND TStringList更高效的类。上述代码中的内存映射处理是可以改进的,可以用分块读取。

 

作者声明:
本人ddddfw888享有版权,转载请注明出处。

抱歉!评论已关闭.