You are given a string, S, and a list of words, L, that are all of the same length. Find all starting indices of substring(s) in S that is a concatenation of each word in L exactly once and without any intervening characters.
For example, given:
S: "barfoothefoobarman"
L: ["foo", "bar"]
You should return the indices: [0,9]
.
(order does not matter).
第三版: 双重Rolling Hash
1. 使用Rolling hash,给每组字符计算hash值。
2. 使用Rolling hash, 给整组计算hash值。
3. 以整组为单位的hash值作比较,以确定整组是否匹配。
时间复杂度为O(n)。此算法在leetcode上实际执行时间为54ms。
class Solution { public: vector<int> findSubstring(string S, vector<string> &L) { vector<int> result; if (L.empty() || L[0].empty() ) return result; if (S.size() < L.size() * L[0].size()) return result; const int lsize = L.size(); const int wsize = L[0].size(); const int gsize = lsize * wsize; const int r = 31; const int q = 997; const int m = 3355439; int lhash = 0; for (int i = 0; i<lsize; i++) { int whash = 0; for (int j=0; j<wsize; j++) { whash = (whash * r + L[i][j]) % q; } lhash = (lhash + whash) % m; } vector<int > ghashes(wsize); vector<int> window(gsize); int whead = 0; int whash = 0; int i = 0; int weight = 1; for (; i<wsize-1; i++) { whash = (whash * r + S[i]) % q; weight = (weight * r) % q; } unordered_multiset<string> Lset(L.begin(), L.end()); for (; i<S.size(); i++) { int &ghash = ghashes[whead % wsize]; ghash = (ghash - window[whead] + m) % m; window[whead] = whash = (whash * r + S[i]) % q; ghash = (ghash + whash) % m; whead = ++whead % gsize; whash = (whash - (S[i-wsize+1] * weight) % q + q) % q; if (ghash == lhash && match(S, i-gsize+1, Lset)) result.push_back(i - gsize + 1); } return result; } bool match(const string &S, int pos, unordered_multiset<string> L) { if (pos < 0) return false; const int lsize = L.size(); const int wsize = L.begin()->size(); for (int i=0; i<lsize; i++) { auto iter = L.find(S.substr(pos, wsize)); if (iter == L.end()) return false; L.erase(iter); pos += wsize; } return true; } };
第二版: Hash查找,多窗口同时移动
此算法利用C++11的unordered_map,并进行hash函数和比较函数的自定义,以省去取子串的复制操作。
窗口移动过程中,只需要考虑子串查不到,和查到但次数超过预期的两种情况。
做成多窗口同时移动,以希望能更高的缓存命中率。在S足够长时,有优势。省去多次遍历。
与单窗口多次遍历相比,付出了更多的额外空间。
此算法在leetcode上的实际势行时间为124ms。
class Solution { public: struct EqualHash { EqualHash(int size): size_(size) {} bool operator() (const char *x, const char *y) const { return !strncmp(x, y, size_); } size_t operator() (const char *s) const noexcept { return std::_Hash_impl::hash(s, size_); } size_t size_; }; vector<int> findSubstring(string S, vector<string> &L) { vector<int> result; if (L.empty()) return result; if (L[0].empty()) return result; if (S.size() < L[0].size()) return result; const int itemSize = L[0].size(); vector<int> total(itemSize); vector<int> left(itemSize); vector<unordered_map<const char *, int, EqualHash, EqualHash> > actuals; unordered_map<const char *, int, EqualHash, EqualHash> expected(L.size(), EqualHash(itemSize), EqualHash(itemSize)); actuals.reserve(itemSize); for (int i=0; i<itemSize; i++) { actuals.push_back(expected); left[i] = i; } for (const string &item: L) expected[item.data()]++; const char *const s = S.data(); const char *h = s + itemSize - 2; int group = -1; while (*++h) { group = (group + 1) % itemSize; auto &actual = actuals[group]; const char *item = h - itemSize + 1; auto iter1 = expected.find(item); if (iter1 == expected.end()) { total[group] = 0; left[group] = h - s + 1; actual.clear(); continue; } auto &value = ++actual[item]; ++total[group]; while (value > iter1->second) { --actual[s + left[group]]; left[group] = left[group] + itemSize; --total[group]; } if (total[group] == L.size()) { result.push_back(left[group]); --actual[s + left[group]]; left[group] = left[group] + itemSize; --total[group]; } } return result; } };
第一版:利用滚动Hash,接合窗口移动,完成下面这个算法。
思想是,先计算出Hash值,用Hash值完成在map中的查找,最后用字符串精确匹配最验证。
并尽可能推迟并省掉字符串精确匹配。
这个算法虽然被leetcode AC了。 但是实际运行时间并预想的慢多了,高达1272ms。
class Solution { public: vector<int> findSubstring(string S, vector<string> &L) { vector<int> result; if (L.empty()) return result; const int itemSize = L[0].size(); const int wndSize = itemSize * L.size(); const int q = 3355439; const int r = 256; map<int, vector<int> > map1; for (int i=0; i<L.size(); i++) { int nHash = 0; for (int j=0; j<L[i].size(); j++) { nHash = ((nHash * r) % q + L[i][j]) % q; } map1[nHash].push_back(i); } vector<int> total(itemSize); vector<int> revMap(wndSize); vector<pair<int,bool> > map2(wndSize); int head2 = 0; int weight = 1; for (int i=1; i<itemSize; i++) weight = (weight * r) % q; int nHash = 0; for (int i=0; i<itemSize-1; i++) nHash = ((nHash * r) % q + S[i]) % q; for (int i=itemSize-1; i<S.size(); i++) { const int group = i % itemSize; const int offset = group * L.size(); if (map2[head2].first) { revMap[offset + map2[head2].first - 1] = 0; map2[head2].first = 0; map2[head2].second = false; --total[group]; } nHash = ((nHash * r) % q + S[i]) % q; map<int, vector<int> >::iterator iter = map1.find(nHash); if (iter != map1.end()) { int pos = -1; bool compare = true; const vector<int> &val = (*iter).second; if (val.size() == 1) { const int entry = revMap[offset + val[0]]; if (!entry || !S.compare(i-itemSize+1, itemSize, L[val[0]])) { pos = 0; compare = !!entry; } } else { compare = true; int min = i+1; for (int j=0; j<val.size(); j++) { if (!S.compare(i-itemSize+1, itemSize, L[val[j]])) { const int entry = revMap[offset + val[j]]; if (!entry) { pos = j; break; } else if (entry < min) { min = entry; pos = j; } } } } if (pos != -1) { const int entry = revMap[offset + val[pos]]; revMap[offset + val[pos]] = i + 1; map2[head2].first = val[pos] + 1; map2[head2].second= compare; total[group]++; if (entry) { const int entry2 = (head2 - i + entry - 1 + wndSize) % wndSize; map2[entry2].first = 0; map2[entry2].second= false; total[group]--; } if (total[group] == L.size()) { for (int j=0; j<L.size(); j++) { const int entry2 = (head2 - i + revMap[offset+j] - 1 + wndSize) % wndSize; if (!map2[entry2].second) { if (!S.compare(revMap[offset+j]-itemSize, itemSize, L[j])) { map2[entry2].second = true; } else { revMap[offset+j] = 0; map2[entry2].first = 0; map2[entry2].second= false; total[group]--; break; } } } if (total[group] == L.size()) result.push_back(i-wndSize+1); } } } nHash = ((nHash - (S[i-itemSize+1] * weight) % q) % q + q) % q; head2 = (head2 + 1) % wndSize; } return result; } };