现在的位置: 首页 > 综合 > 正文

POJ2778—DNA Sequence(AC自动机+矩阵)

2019年02月13日 ⁄ 综合 ⁄ 共 3398字 ⁄ 字号 评论关闭

Description
It’s well known that DNA Sequence is a sequence only contains A, C, T and G, and it’s very useful to analyze a segment of DNA Sequence,For example, if a animal’s DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don’t contain those segments.

Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n.

Input
First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences.

Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.

Output
An integer, the number of DNA sequences, mod 100000.

Sample Input

4 3
AT
AC
AG
AA

Sample Output

36

Source
POJ Monthly–2006.03.26,dodo

比较简单的自动机dp,由于n很大,用矩阵来加快转移

/*************************************************************************
    > File Name: POJ2778.cpp
    > Author: ALex
    > Mail: zchao1995@gmail.com 
    > Created Time: 2015年03月10日 星期二 21时21分26秒
 ************************************************************************/

#include <map>
#include <set>
#include <queue>
#include <stack>
#include <vector>
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <algorithm>
using namespace std;

const double pi = acos(-1);
const int inf = 0x3f3f3f3f;
const double eps = 1e-15;
typedef long long LL;
typedef pair <int, int> PLL;

const int mod = 100000;
const int MAX_NODE = 110;
const int CHILD_NUM = 4;

struct MARTIX
{
    LL mat[MAX_NODE][MAX_NODE];
};

MARTIX mul(MARTIX a, MARTIX b, int L)
{
    MARTIX c;
    for (int i = 0; i < L; ++i)
    {
        for (int j = 0; j < L; ++j)
        {
            c.mat[i][j] = 0;
            for (int k = 0; k < L; ++k)
            {
                c.mat[i][j] += a.mat[i][k] * b.mat[k][j];
                c.mat[i][j] %= mod;
            }
        }
    }
    return c;
}

MARTIX fastpow(MARTIX ret, int n, int L)
{
    MARTIX ans;
    for (int i = 0; i < L; ++i)
    {
        for (int j = 0; j < L; ++j)
        {
            ans.mat[i][j] = (i == j);
        }
    }
    while (n)
    {
        if (n & 1)
        {
            ans = mul(ans, ret, L);
        }
        n >>= 1;
        ret = mul(ret, ret, L);
    }
    return ans;
}

struct AC_Automation
{
    int next[MAX_NODE][CHILD_NUM];
    int fail[MAX_NODE];
    int end[MAX_NODE];
    int root, L;

    int newnode()
    {
        for (int i = 0; i < CHILD_NUM; ++i)
        {
            next[L][i] = -1;
        }
        end[L++] = 0;
        return L - 1;
    }

    void init()
    {
        L = 0;
        root = newnode();
    }

    int ID(char c)
    {
        if (c == 'A')
        {
            return 0;
        }
        if (c == 'G')
        {
            return 1;
        }
        if (c == 'C')
        {
            return 2;
        }
        if (c == 'T')
        {
            return 3;
        }
    }

    void Build_Trie(char buf[])
    {
        int now = root;
        int len = strlen(buf);
        for (int i = 0; i < len; ++i)
        {
            if (next[now][ID(buf[i])] == -1)
            {
                next[now][ID(buf[i])] = newnode();
            }
            now = next[now][ID(buf[i])];
        }
        end[now] = 1;
    }

    void Build_AC()
    {
        queue <int> qu;
        fail[root] = root;
        for (int i = 0; i < CHILD_NUM; ++i)
        {
            if (next[root][i] == -1)
            {
                next[root][i] = root;
            }
            else
            {
                fail[next[root][i]] = root;
                qu.push(next[root][i]);
            }
        }
        while (!qu.empty())
        {
            int now = qu.front();
            qu.pop();
            if (end[fail[now]])
            {
                end[now] = 1;
            }
            for (int i = 0; i < CHILD_NUM; ++i)
            {
                if (next[now][i] == -1)
                {
                    next[now][i] = next[fail[now]][i];
                }
                else
                {
                    fail[next[now][i]] = next[fail[now]][i];
                    qu.push(next[now][i]);
                }
            }
        }
    }

    void solve(int n)
    {
        MARTIX c;
        for (int i = 0; i < L; ++i)
        {
            for (int j = 0; j < L; ++j)
            {
                c.mat[i][j] = 0;
            }
        }
        for (int i = 0; i < L; ++i)
        {
            if(end[i])
            {
                continue;
            }
            for (int j = 0; j < CHILD_NUM; ++j)
            {
                if (end[next[i][j]])
                {
                    continue;
                }
                ++c.mat[i][next[i][j]];
            }
        }
        MARTIX x = fastpow(c, n, L); 
        LL ans = 0;
        for (int i = 0; i < L; ++i)
        {
            if (!end[i])
            {
                ans += x.mat[0][i];
                ans %= mod;
            }
        }
        printf("%lld\n", ans);
    }
}AC;

char buf[20];

int main ()
{
    int m, n;
    while (~scanf("%d%d", &m, &n))
    {
        AC.init();
        for (int i = 1; i <= m; ++i)
        {
            scanf("%s", buf);
            AC.Build_Trie(buf);
        }
        AC.Build_AC();
        AC.solve(n);
    }
    return 0;
}

抱歉!评论已关闭.