C++语言: Windiff 原理初探(C++源码)
001 //"Windiff 原理初探(C++源码)
002 //详细说明文章参见:http://www.2maomao.com/blog/how-windiff-works-continued-1/
003
004 // mydiff.cpp
005 //
006 #include "stdafx.h"
007 #include <BaseTsd.h>
008 #include <iostream>
009 #include <fstream>
010 #include <string>
011 #include <map>
012 #include <utility>
013 #include <vector>
014 #include <functional>
015 #include <algorithm>
016 using namespace std;
017
018 bool MyPairCompFirst( pair<int, int> elem1, pair<int, int> elem2 )
019 {
020 return elem1.first < elem2.first;
021 }
022
023 bool MyPairCompSecond( pair<int, int> elem1, pair<int, int> elem2 )
024 {
025 return elem1.second < elem2.second;
026 }
027
028 int BiSearch(vector< pair<int, int> > &valPairs, int value)
029 {
030 int left = 0;
031 int right = valPairs.size() - 1;
032 int mid = 0;
033 while (left <= right)
034 {
035 mid = (left + right) / 2;
036 if (valPairs[mid].first == value)
037 return mid;
038
039 if (valPairs[mid].first < value)
040 left = mid + 1;
041 else
042 right = mid - 1;
043 }
044 return -1;
045 }
046
047 int _tmain(int argc, _TCHAR* argv[])
048 {
049 vector<int> valsNew;
050 vector<pair<int, int>> valsOld;
051 vector<pair<int, int>> valsOldBackup;
052
053 if (argc != 3)
054 {
055 printf(" Usage:/n");
056 printf(" mydiff fileNew fileOld/n");
057 return 0;
058 } else
059 {
060 //read the two input file build the hash table, turn string into values for compare
061 string line;
062 ifstream finNew(argv[1]);
063 if (!finNew)
064 {
065 cout << "failed to open file:" << argv[1] << endl;
066 exit(0);
067 }
068
069 map<string, int> diffLines;
070 char ch[10001];
071 while (finNew.getline(ch, 10000))
072 {
073 line = ch;
074 const char* p = line.c_str();
075 //chop off the leading and ending blank chars
076 int pos=0;
077 while (pos < (int)line.size() && (line[pos] == ' ' || line[pos] == '/t')) pos++;
078 int posLeft = pos;
079 pos = (int)line.size() - 1;
080 while (pos >= 0 && (line[pos] == ' ' || line[pos] == '/t')) pos--;
081 string temp = line.substr(posLeft, pos-posLeft);
082 //@@@@ if (temp.empty()) continue; //blank lines is not counted for this special case
083
084 if (diffLines.find(line) == diffLines.end())
085 diffLines[line] = diffLines.size(); //use size() as the unique value
086 valsNew.push_back(diffLines[line]);
087 }
088 finNew.close();
089
090 ifstream finOld(argv[2]);
091 if (!finOld)
092 {
093 cout << "failed to open file:" << argv[2] << endl;
094 exit(0);
095 }
096 while (finOld.getline(ch, 10000))
097 {
098 line = ch;
099 //chop off the leading and ending blank chars
100 int pos=0;
101 while (pos < line.size() && (line[pos] == ' ' || line[pos] == '/t')) pos++;
102 int posLeft = pos;
103 pos = line.size() - 1;
104 while (pos >= 0 && (line[pos] == ' ' || line[pos] == '/t')) pos--;
105 string temp = line.substr(posLeft, pos-posLeft);
106 //@@@@ if (temp.empty()) continue; //blank lines is not counted for this special case
107
108 if (diffLines.find(line) == diffLines.end())
109 diffLines[line] = diffLines.size(); //use size() as the unique value
110 valsOld.push_back(make_pair(diffLines[line], valsOld.size()));
111 }
112 finOld.close();
113 diffLines.clear();
114 } //Here diffLines and file handles should be released
115 valsOldBackup = valsOld;
116
117 //use the greedy method, each step we should find the most "RECENT" equal lines
118
119 //Sort the old file values, so we can use bi-search against it when we want to compare
120 sort(valsOld.begin(), valsOld.end(), MyPairCompFirst);
121
122 int posNew = 0;
123
124 //Search for the next
125 vector<int> genLinesNew;
126 vector<int> genLinesOld;
127 int searchedOldLineCount = 0;
128 while (posNew < valsNew.size() && valsOld.size() > 0)
129 {
002 //详细说明文章参见:http://www.2maomao.com/blog/how-windiff-works-continued-1/
003
004 // mydiff.cpp
005 //
006 #include "stdafx.h"
007 #include <BaseTsd.h>
008 #include <iostream>
009 #include <fstream>
010 #include <string>
011 #include <map>
012 #include <utility>
013 #include <vector>
014 #include <functional>
015 #include <algorithm>
016 using namespace std;
017
018 bool MyPairCompFirst( pair<int, int> elem1, pair<int, int> elem2 )
019 {
020 return elem1.first < elem2.first;
021 }
022
023 bool MyPairCompSecond( pair<int, int> elem1, pair<int, int> elem2 )
024 {
025 return elem1.second < elem2.second;
026 }
027
028 int BiSearch(vector< pair<int, int> > &valPairs, int value)
029 {
030 int left = 0;
031 int right = valPairs.size() - 1;
032 int mid = 0;
033 while (left <= right)
034 {
035 mid = (left + right) / 2;
036 if (valPairs[mid].first == value)
037 return mid;
038
039 if (valPairs[mid].first < value)
040 left = mid + 1;
041 else
042 right = mid - 1;
043 }
044 return -1;
045 }
046
047 int _tmain(int argc, _TCHAR* argv[])
048 {
049 vector<int> valsNew;
050 vector<pair<int, int>> valsOld;
051 vector<pair<int, int>> valsOldBackup;
052
053 if (argc != 3)
054 {
055 printf(" Usage:/n");
056 printf(" mydiff fileNew fileOld/n");
057 return 0;
058 } else
059 {
060 //read the two input file build the hash table, turn string into values for compare
061 string line;
062 ifstream finNew(argv[1]);
063 if (!finNew)
064 {
065 cout << "failed to open file:" << argv[1] << endl;
066 exit(0);
067 }
068
069 map<string, int> diffLines;
070 char ch[10001];
071 while (finNew.getline(ch, 10000))
072 {
073 line = ch;
074 const char* p = line.c_str();
075 //chop off the leading and ending blank chars
076 int pos=0;
077 while (pos < (int)line.size() && (line[pos] == ' ' || line[pos] == '/t')) pos++;
078 int posLeft = pos;
079 pos = (int)line.size() - 1;
080 while (pos >= 0 && (line[pos] == ' ' || line[pos] == '/t')) pos--;
081 string temp = line.substr(posLeft, pos-posLeft);
082 //@@@@ if (temp.empty()) continue; //blank lines is not counted for this special case
083
084 if (diffLines.find(line) == diffLines.end())
085 diffLines[line] = diffLines.size(); //use size() as the unique value
086 valsNew.push_back(diffLines[line]);
087 }
088 finNew.close();
089
090 ifstream finOld(argv[2]);
091 if (!finOld)
092 {
093 cout << "failed to open file:" << argv[2] << endl;
094 exit(0);
095 }
096 while (finOld.getline(ch, 10000))
097 {
098 line = ch;
099 //chop off the leading and ending blank chars
100 int pos=0;
101 while (pos < line.size() && (line[pos] == ' ' || line[pos] == '/t')) pos++;
102 int posLeft = pos;
103 pos = line.size() - 1;
104 while (pos >= 0 && (line[pos] == ' ' || line[pos] == '/t')) pos--;
105 string temp = line.substr(posLeft, pos-posLeft);
106 //@@@@ if (temp.empty()) continue; //blank lines is not counted for this special case
107
108 if (diffLines.find(line) == diffLines.end())
109 diffLines[line] = diffLines.size(); //use size() as the unique value
110 valsOld.push_back(make_pair(diffLines[line], valsOld.size()));
111 }
112 finOld.close();
113 diffLines.clear();
114 } //Here diffLines and file handles should be released
115 valsOldBackup = valsOld;
116
117 //use the greedy method, each step we should find the most "RECENT" equal lines
118
119 //Sort the old file values, so we can use bi-search against it when we want to compare
120 sort(valsOld.begin(), valsOld.end(), MyPairCompFirst);
121
122 int posNew = 0;
123
124 //Search for the next
125 vector<int> genLinesNew;
126 vector<int> genLinesOld;
127 int searchedOldLineCount = 0;
128 while (posNew < valsNew.size() && valsOld.size() > 0)
129 {