现在的位置: 首页 > 综合 > 正文

Poj 3294 Life Forms (字符串_后缀树组)

2013年10月19日 ⁄ 综合 ⁄ 共 2424字 ⁄ 字号 评论关闭

题目链接:http://poj.org/problem?id=3294

题目大意:给定n个字符串,找出至少出现在n/2个字符串中的最长子串。

解题思路:本题可用后缀数组解决。先用倍增算法算出sa数组、rank数组再算出height数组,最后二分枚举长度,看是否符合出现在n/2个字符串中。题目并不难,但比较麻烦,首先要记录最长的字符串,作为二分查找的上届,然后在二分的时候要对height数组进行分组,将大于mid的分一组,看着一组中的子串都在哪些字符串中,用vis标记出现过的字符串,以免重复。还有一个地方要注意的是中间的分隔尽量不要一样,我就用了96个字符进行分隔,这可大幅度减少错误。

测试数据:

3
aaa
aaa

aaa


4
abcd
efgh
muck

nstv


3
abc
bcd

cde


3
abcdefg
bcdefgh

cdefghi


3
xxx
yyy
zzz


代码:

#include <stdio.h>
#include <string.h>
#define MIN 110
#define MAX 1100
#define INF 110000


char str[MIN][MAX];
int  ans[MAX],tot,minlen,maxlen;
int  sa[INF],ra[INF],h[INF],len;
int  n,arr[INF],in[INF],vis[MIN];
int  wa[INF],wb[INF],wv[INF],wn[INF];


int cmp(int *r,int ra,int rb,int l){

	return r[ra] == r[rb] && r[ra+l] == r[rb+l];
}
void Da(int *r,int n,int m) {

	int i,j,k,p,*t;
	int *x = wa,*y = wb;


	for (i = 0; i < m; ++i) wn[i] = 0;
	for (i = 0; i < n; ++i) wn[x[i]=r[i]]++;
	for (i = 1; i < m; ++i) wn[i] += wn[i-1];
	for (i = n - 1; i >= 0; --i) sa[--wn[x[i]]] = i;


	for (j = 1,p = 1; p < n; j *= 2,m = p) {

		for (p = 0,i = n - j; i < n; ++i) y[p++] = i;
		for (i = 0; i < n; ++i) if (sa[i] >= j) y[p++] = sa[i] - j;


		for (i = 0; i < n; ++i) wv[i] = x[y[i]];
		for (i = 0; i < m; ++i) wn[i] = 0;
		for (i = 0; i < n; ++i) wn[wv[i]]++;
		for (i = 1; i < m; ++i) wn[i] += wn[i-1];
		for (i = n - 1; i >= 0; --i) sa[--wn[wv[i]]] = y[i];


		t = x,x = y,y = t,p = 1;
		for (x[sa[0]] = 0,i = 1; i < n; ++i)
			x[sa[i]] = cmp(y,sa[i-1],sa[i],j) ? p - 1 : p++;
	}
}
void CalHeight(int *r,int n) {

	int i,j,k = 0;
	for (i = 1; i <= n; ++i) ra[sa[i]] = i;
	for (i = 0; i < n; h[ra[i++]] = k)
		for (k ? k-- : 0,j = sa[ra[i]-1]; r[i+k] == r[j+k]; k++);
}

void Solve(int *r,int len,int *ans,int minlen) {

	int i,j,num,beg,cnt,flag;
	int low,high,mid,half;


	maxlen = 0;
	low = 1,high = minlen;
	half = n / 2.0;
	while (low <= high) {

		cnt = 0;
		mid = low + (high - low) / 2;
		for (i = 1; i <= len; ++i){

			if (h[i] < mid) {

				memset(vis,0,sizeof(vis));
				vis[in[sa[i]]] = 1,flag = 0;				//flag标记使得当前开始的答案只记录一次
				num = 1,beg = sa[i+1];					    //beg从哪个地方开始,num表示子串出现在几个字符串中
			}
			else{

				j = sa[i];
				if (vis[in[j]] == 0 && arr[j] != '$')
					num++,vis[in[j]] = 1;
				if (num > half && !flag) 
					ans[cnt++] = beg,flag = 1;
			}
		}
		if (cnt != 0) {
			//符合情况
			maxlen = mid;
			tot = cnt;
			low = mid + 1;
		}
		else high = mid - 1;
	}
}


int main()
{
	int i,j,k,c;

	
	scanf("%d",&n);
	do{

		minlen = 0,tot = 0,c = 0;
		memset(in,0,sizeof(in));
		memset(arr,0,sizeof(arr));
		memset(ra,0,sizeof(ra));
		for (i = 0; i < n; ++i) {

			scanf("%s",str[i]);
			if (strlen(str[i]) > minlen);
				 minlen = strlen(str[i]);
		}
		for (i = k = 0; i < n; ++i) {

			for (j = 0; str[i][j]; ++j)
				arr[k] = str[i][j],in[k++] = i;
			c = (c + 1) % 96,arr[k++] = c;
		}

			
		len = k - 1;
		Da(arr,len + 1,150);
		CalHeight(arr,len);
		
		
		Solve(arr,len,ans,minlen);
		if (n == 1) printf("%s\n",str[0]);
		else if (maxlen == 0) printf("?\n");
		else {
			
			for (i = 0; i < tot; ++i)
				for (j = ans[i]; j < ans[i] + maxlen; ++j)
					printf(j == (ans[i] + maxlen - 1) ? "%c\n":"%c",arr[j]);
		}


		scanf("%d",&n);
		if (n != 0) printf("\n");
	}while (n);
}

本文ZeroClock原创,但可以转载,因为我们是兄弟。

抱歉!评论已关闭.