现在的位置: 首页 > 综合 > 正文

CI20.8–多模式串匹配问题

2018年02月23日 ⁄ 综合 ⁄ 共 3402字 ⁄ 字号 评论关闭

给定一个目标串串T和若干个模式串P,设计一个算法去匹配每一个模式串。

思路:

多模式串匹配问题(设m为目标串的长度,n为模式串的平均长度)。可以用后缀trie树,时间复杂度为O(m^2 + kn)。利用AC自动机的时间复杂度为O(m + kn + z)(其中z为T中出现的模式串个数)。还可以用后缀树,后缀树的方法比较复杂,这里不做介绍。

下面是后缀trie树的代码

数组形式:

#include <iostream>
#include <string>
#include <vector>
using namespace std;

class Trie
{
public:
	static const int CLD = 26;
	int size;
	vector<int> trie;

	Trie(const string& s)
	{
		int len = s.size();
		for (int i = 0; i < len * len * CLD; ++i)
			trie.push_back(-1);
		size = 0;
		for (int i = 0; i < s.size(); ++i)
		{
			string sub(s, i, s.size() - 1);
			Insert(sub);
		}
	}
	void Insert(const string& s)
	{
		if (s.size() == 0)
			return;
		int index = 0;
		for (int i = 0; i < s.size(); ++i)
		{
			int j = s[i] - 'a';
			if (trie[index * CLD + j] == -1)
				trie[index * CLD + j] = ++size;
			index = trie[index * CLD + j];
		}
	}
	bool Search(const string& s)
	{
		if (s.size() == 0)
			return false;
		int index = 0;
		for (int i = 0; i < s.size(); ++i)
		{
			int j = s[i] - 'a';
			if (trie[index * CLD + j] == -1)
				return false;
			index = trie[index * CLD + j];
		}
		return true;
	}
};

void main()
{
	string s("mississipi");
	cout << s.size() << endl;
	Trie trie(s);
	vector<string> svec;
	svec.push_back("is");
	svec.push_back("sip");
	svec.push_back("hi");
	svec.push_back("sis");
	svec.push_back("mississippa");
	for (int i = 0; i < 5; ++i)
		cout << trie.Search(svec[i]) << endl;
}

树的形式:

#include <iostream>
#include <string>
#include <vector>
#include <assert.h>
using namespace std;

const int CLD = 26;
struct TNode
{
	vector<TNode*> pcld;
	TNode()
	{
		for (int i = 0; i < CLD; ++i)
			pcld.push_back(NULL);
	}
};

void Insert(TNode*& root, const string& s)
{
	assert(root != NULL && s.size() > 0);
	TNode* temp = root;
	for (int i = 0; i < s.size(); ++i)
	{
		int j = s[i] - 'a';
		if (temp->pcld[j] == NULL)
		{
			TNode* tn = new TNode();
			temp->pcld[j] = tn;
		}
		temp = temp->pcld[j];
	}
}

bool Search(TNode* root, const string& s)
{
	assert(root != NULL && s.size() > 0);
	TNode* temp = root;
	for (int i = 0; i < s.size(); ++i)
	{
		int j = s[i] - 'a';
		if (temp->pcld[j] == NULL)
			return false;
		temp = temp->pc ld[j];
	}
	return true;
}

void main()
{
	string s("mississipi");
	TNode* root = new TNode();
	for (int i = 0; i < s.size(); ++i)
	{
		string sub(s, i);
		Insert(root, sub);
	}
	vector<string> svec;
	svec.push_back("is");
	svec.push_back("sip");
	svec.push_back("hi");
	svec.push_back("sis");
	svec.push_back("mississippa");
	for (int i = 0; i < 5; ++i)
		cout << Search(root, svec[i]) << endl;
}

以下是AC自动机代码:

#include <iostream>
#include <string>
#include <vector>
#include <queue>
#include <assert.h>
using namespace std;

const int CLD = 26;
struct TNode
{
	vector<TNode*> pcld;
	TNode* fail;
	bool tag;
	TNode()
	{
		for (int i = 0; i < CLD; ++i)
			pcld.push_back(NULL);
		tag = false;
	}
};

void Insert(TNode*& root, const string& s)
{
	assert(root != NULL && s.size() > 0);
	TNode* temp = root;
	for (int i = 0; i < s.size(); ++i)
	{
		int j = s[i] - 'a';
		if (temp->pcld[j] == NULL)
		{
			TNode* tn = new TNode();
			temp->pcld[j] = tn;
		}
		temp = temp->pcld[j];
	}
	temp->tag = true;
}

void Build(TNode*& root)
{
	assert(root != NULL);
	queue<TNode*> que;
	que.push(root);
	root->fail = NULL;
	while (!que.empty())
	{
		TNode* cur = que.front();
		que.pop();
		for (int i = 0; i < CLD; ++i)
		{
			if (cur->pcld[i] == NULL)
				continue;
			TNode* temp = cur->fail;
			while (temp != NULL && temp->pcld[i] == NULL)
				temp = temp->fail;
			if (temp == NULL)
				cur->pcld[i]->fail = root;
			else
				cur->pcld[i]->fail = temp->pcld[i];
			que.push(cur->pcld[i]);
		}
	}
}

int Search(TNode* root, const string& s)
{
	assert(root != NULL && s.size() > 0);
	TNode* temp = root;
	int res = 0;
	for (int i = 0; i < s.size(); ++i)
	{
		int j = s[i] - 'a';
		while (temp != root && temp->pcld[j] == NULL)
			temp = temp->fail;
		temp = temp->pcld[j];
		if (temp == NULL)
			temp = root;
		TNode* p = temp;
		while (p != root)
		{
			if (p->tag)
				++res;
			p = p->fail;
		}
	}
	
	return res;
}

void main()
{
	string s("missisip");
	TNode* root = new TNode();
	vector<string> svec;
	svec.push_back("is");
	svec.push_back("sip");
	svec.push_back("ssis");
	svec.push_back("sis");
	svec.push_back("missisip");
	svec.push_back("ip");
	for (int i = 0; i < svec.size(); ++i)
		Insert(root, svec[i]);
	Build(root);
	cout << Search(root, s) << endl;
}

抱歉!评论已关闭.