Substring with Concatenation of All Words — leetcode

现在的位置: 首页 > 综合 > 正文

Substring with Concatenation of All Words — leetcode

2018年10月19日 ⁄ 综合 ⁄ 共 5321字 ⁄ 字号小中大 ⁄ 评论关闭

You are given a string, S, and a list of words, L, that are all of the same length. Find all starting indices of substring(s) in S that is a concatenation of each word in L exactly once and without any intervening characters.

For example, given:
S: "barfoothefoobarman"
L: ["foo", "bar"]

You should return the indices: [0,9].
(order does not matter).

第三版：双重Rolling Hash

1. 使用Rolling hash，给每组字符计算hash值。

2. 使用Rolling hash, 给整组计算hash值。

3. 以整组为单位的hash值作比较，以确定整组是否匹配。

时间复杂度为O(n)。此算法在leetcode上实际执行时间为54ms。

class Solution {
public:
    vector<int> findSubstring(string S, vector<string> &L) {
        vector<int> result;
        if (L.empty() || L[0].empty() ) return result;
        if (S.size() < L.size() * L[0].size()) return result;

        const int lsize = L.size();
        const int wsize = L[0].size();
        const int gsize = lsize * wsize;
        const int r = 31;
        const int q = 997;
        const int m = 3355439;

        int lhash = 0;
        for (int i = 0; i<lsize; i++) {
                int whash = 0;
                for (int j=0; j<wsize; j++) {
                        whash = (whash * r + L[i][j]) % q;
                }
                lhash = (lhash + whash) % m;
        }

        vector<int > ghashes(wsize);
        vector<int> window(gsize);
        int whead = 0;

        int whash = 0;
        int i = 0;
        int weight = 1;
        for (; i<wsize-1; i++) {
                whash = (whash * r + S[i]) % q;
                weight = (weight * r) % q;
        }

        unordered_multiset<string> Lset(L.begin(), L.end());
        for (; i<S.size(); i++) {
                int &ghash = ghashes[whead % wsize];
                ghash = (ghash - window[whead] + m) % m;
                window[whead] = whash = (whash * r + S[i]) % q;
                ghash = (ghash + whash) % m;
                whead = ++whead % gsize;
                whash = (whash - (S[i-wsize+1] * weight) % q + q) % q;

                if (ghash == lhash && match(S, i-gsize+1, Lset))
                        result.push_back(i - gsize + 1);
        }

        return result;
    }
    
    bool match(const string &S, int pos, unordered_multiset<string> L) {
        if (pos < 0) return false;

        const int lsize = L.size();
        const int wsize = L.begin()->size();
        for (int i=0; i<lsize; i++) {
                auto iter = L.find(S.substr(pos, wsize));
                if (iter == L.end()) return false;
                L.erase(iter);
                pos += wsize;
        }
        return true;
    }
};

第二版： Hash查找，多窗口同时移动

此算法利用C++11的unordered_map，并进行hash函数和比较函数的自定义，以省去取子串的复制操作。

窗口移动过程中，只需要考虑子串查不到，和查到但次数超过预期的两种情况。

做成多窗口同时移动，以希望能更高的缓存命中率。在S足够长时，有优势。省去多次遍历。

与单窗口多次遍历相比，付出了更多的额外空间。

此算法在leetcode上的实际势行时间为124ms。

class Solution {
public:

struct EqualHash {
        EqualHash(int size): size_(size) {}

        bool operator() (const char *x, const char *y) const
        { return !strncmp(x, y, size_); }

        size_t operator() (const char *s) const noexcept
        { return std::_Hash_impl::hash(s, size_); }

        size_t size_;
};

    vector<int> findSubstring(string S, vector<string> &L) {
        vector<int> result;
        if (L.empty()) return result;
        if (L[0].empty()) return result;
        if (S.size() < L[0].size()) return result;

        const int itemSize = L[0].size();
        vector<int> total(itemSize);
        vector<int> left(itemSize);
        vector<unordered_map<const char *, int, EqualHash, EqualHash> > actuals;
        unordered_map<const char *, int, EqualHash, EqualHash> expected(L.size(), EqualHash(itemSize), EqualHash(itemSize));

        actuals.reserve(itemSize);
        for (int i=0; i<itemSize; i++) {
                actuals.push_back(expected);
                left[i] = i;
        }

        for (const string &item: L)
                expected[item.data()]++;

        const char *const s = S.data();
        const char *h = s + itemSize - 2;
        int group = -1;
        while (*++h) {
                group = (group + 1) % itemSize;
                auto &actual = actuals[group];
                const char *item = h - itemSize + 1;

                auto iter1 = expected.find(item);
                if (iter1 == expected.end()) {
                        total[group] = 0;
                        left[group] = h - s + 1;
                        actual.clear();
                        continue;
                }

                auto &value = ++actual[item];
                ++total[group];

                while (value > iter1->second) {
                        --actual[s + left[group]];
                        left[group] = left[group] + itemSize;
                        --total[group];
                }

                if (total[group] == L.size()) {
                        result.push_back(left[group]);
                        --actual[s + left[group]];
                        left[group] = left[group] + itemSize;
                        --total[group];
                }
        }

        return result;
    }
};

第一版：利用滚动Hash，接合窗口移动，完成下面这个算法。

思想是，先计算出Hash值，用Hash值完成在map中的查找，最后用字符串精确匹配最验证。

并尽可能推迟并省掉字符串精确匹配。

这个算法虽然被leetcode AC了。但是实际运行时间并预想的慢多了，高达1272ms。

class Solution {
public:
    vector<int> findSubstring(string S, vector<string> &L) {
        vector<int> result;
        if (L.empty()) return result;
        const int itemSize = L[0].size();
        const int wndSize = itemSize * L.size();

        const int q = 3355439;
        const int r = 256;
        map<int, vector<int> > map1;
        for (int i=0; i<L.size(); i++) {
                int nHash = 0;
                for (int j=0; j<L[i].size(); j++) {
                        nHash = ((nHash * r) % q + L[i][j]) % q;
                }
                map1[nHash].push_back(i);
        }

        vector<int> total(itemSize);
        vector<int> revMap(wndSize);
        vector<pair<int,bool> > map2(wndSize);
        int head2 = 0;
        int weight = 1;

        for (int i=1; i<itemSize; i++)
                weight = (weight * r) % q;

        int nHash = 0;
        for (int i=0; i<itemSize-1; i++)
                nHash = ((nHash * r) % q + S[i]) % q;

        for (int i=itemSize-1; i<S.size(); i++) {
                const int group = i % itemSize;
                const int offset = group * L.size();

                if (map2[head2].first) {
                        revMap[offset + map2[head2].first - 1] = 0;
                        map2[head2].first = 0;
                        map2[head2].second = false;
                        --total[group];
                }

                nHash = ((nHash * r) % q + S[i]) % q;
                map<int, vector<int> >::iterator iter = map1.find(nHash);
                if (iter != map1.end()) {
                        int pos = -1;
                        bool compare = true;
                        const vector<int> &val = (*iter).second;
                        if (val.size() == 1) {
                                const int entry = revMap[offset + val[0]];
                                if (!entry ||
                                        !S.compare(i-itemSize+1, itemSize, L[val[0]])) {
                                        pos = 0;
                                        compare = !!entry;
                                }
                        }
                        else {
                                compare = true;
                                int min = i+1;
                                for (int j=0; j<val.size(); j++) {
                                        if (!S.compare(i-itemSize+1, itemSize, L[val[j]])) {
                                                const int entry = revMap[offset + val[j]];
                                                if (!entry) {
                                                        pos = j;
                                                        break;
                                                }
                                                else if (entry < min) {
                                                        min = entry;
                                                        pos = j;
                                                }
                                        }
                                }
                        }
                        
                        if (pos != -1) {
                                const int entry = revMap[offset + val[pos]];

                                revMap[offset + val[pos]] = i + 1;
                                map2[head2].first = val[pos] + 1;
                                map2[head2].second= compare;
                                total[group]++;

                                if (entry) {
                                        const int entry2 = (head2 - i + entry - 1 + wndSize) % wndSize;
                                        map2[entry2].first = 0;
                                        map2[entry2].second= false;
                                        total[group]--;
                                }

                                if (total[group] == L.size()) {
                                        for (int j=0; j<L.size(); j++) {
                                                const int entry2 = (head2 - i + revMap[offset+j] - 1 + wndSize) % wndSize;
                                                if (!map2[entry2].second) {
                                                        if (!S.compare(revMap[offset+j]-itemSize, itemSize, L[j])) {
                                                                map2[entry2].second = true;
                                                        }
                                                        else {
                                                                revMap[offset+j] = 0;
                                                                map2[entry2].first = 0;
                                                                map2[entry2].second= false;
                                                                total[group]--;
                                                                break;
                                                        }
                                                }
                                        }

                                        if (total[group] == L.size())
                                                result.push_back(i-wndSize+1);
                                }
                        }
                }
                
                nHash = ((nHash - (S[i-itemSize+1] * weight) % q) % q + q) % q;
                head2 = (head2 + 1) % wndSize;
        }

        return result;
    }
};

【上篇】Remove Nth Node From End of List — leetcode
【下篇】Longest Substring Without Repeating Characters — leetcode

作者: cripple

该日志由 cripple 于6年前发表在综合分类下，最后更新于 2018年10月19日.
转载请注明: Substring with Concatenation of All Words — leetcode | 学步园 +复制链接

抱歉!评论已关闭.

学步园

Substring with Concatenation of All Words — leetcode

作者: cripple

书签

最新文章New

本站推荐

返回首页