[HNOI2006]最短母串问题——AC自动机+状压+bfs环形处理-白红宇

[HNOI2006]最短母串问题——AC自动机+状压+bfs环形处理

阅读量：7118 次

发布时间：2019-06-28

本文共 7108 字，大约阅读时间需要 23 分钟。

Description

给定n个字符串（S1,S2,„,Sn），要求找到一个最短的字符串T，使得这n个字符串（S1,S2,„,Sn）都是T的子串。

32MB

Input

第一行是一个正整数n（n<=12），表示给定的字符串的个数。

以下的n行，每行有一个全由大写字母组成的字符串。每个字符串的长度不超过50.

Output

只有一行，为找到的最短的字符串T。在保证最短的前提下，

如果有多个字符串都满足要求，那么必须输出按字典序排列的第一个。

Sample Input

ABCD

BCDABC

Sample Output

ABCDABC

Solution

一看是一个AC自动机。

一看是一个状压。

一看AC自动机节点再记录一个has包含的字符串集合。

一看要输出方案，肯定也要先考虑怎么弄出最短的长度。

f[i][(1<<n)-1]表示，匹配到AC自动机上的i点，包含的字符串集合为。。。的最短长度。

一看转移有环，然后无法再加入新的阶段，因为会MLE会TLE

所以要环形处理。

一看是一个取min的do

所以可以考虑最短路。

dij，spfa复杂度卡不过。

一看边权只有1……

BFS大法吼！

长度OK

怎么处理方案？

ywy_c_asm:

一遍bfs求出最短距离len，然后再一遍dfs找方案。

dfs时，相当于再把bfs的最短路怎么来的再访问一遍。如果dis[y]=dis[x]+1那么可以转移的，才可以访问。

还需要知道一个点到终点的最短路。

（反向多起点BFS???不行，或运算不可逆）

我们dfs时就可以实现的。类似树形dp

然后如果一个点到一个(1<<n)-1状态的点距离为juli的话，如果有dis[x]+juli[x]==len，那么，这次选择的这个y，所填的字符，就是最终答案的一个字符。

直接加入答案字符串。

char从A~Z枚举。保证第一次搜到的是字典序最小的。

而且一定是连续加入ans字符串。

dfs开头放上，如果tot==n return；

代码：

#include
      
       using namespace std;typedef long long ll;const int N=13;const int M=600;const int U=11*50*((1<<12)-1)+100;const int inf=0x3f3f3f3f;int n;char s[55];struct trie{    int fail[M],ch[M][26];    int has[M];    int cnt;    void ins(char *s,int l,int id){        int now=0;        for(int i=1;i<=l;i++){            int x=s[i]-'A';            if(!ch[now][x]) ch[now][x]=++cnt;            now=ch[now][x];        }        has[now]|=(1<<(id-1));    }    void build(){        queue
       
        q;        for(int i=0;i<=25;i++){            if(ch[0][i]) fail[ch[0][i]]=0,q.push(ch[0][i]);        }        while(!q.empty()){            int x=q.front();q.pop();            has[x]|=has[fail[x]];            for(int i=0;i<=25;i++){                if(ch[x][i]){                    fail[ch[x][i]]=ch[fail[x]][i];                    q.push(ch[x][i]);                }                else ch[x][i]=ch[fail[x]][i];            }        }    }}ac;int get(int ptr,int st){    return ptr*(1<
        
         q;void bfs(){        memset(dis,0x3f,sizeof dis);    int str=get(0,0);    dis[str]=0;    vis[str]=1;    node nn;nn.P=0,nn.S=0;    q.push(nn);    while(!q.empty()){        node lp=q.front();q.pop();        for(int i=0;i<=25;i++){            int to=ac.ch[lp.P][i];            int NS=lp.S|ac.has[ac.ch[lp.P][i]];            int NID=get(to,NS);            if(!vis[NID]){                dis[NID]=dis[get(lp.P,lp.S)]+1;                vis[NID]=1;                node kk;                kk.P=to;kk.S=NS;                q.push(kk);            }        }    }}int len;int tot;char ans[M];int juli[U];void dfs(int ptr,int st){        int now=get(ptr,st);    juli[now]=inf;        if(tot==len) return;        if(st==(1<
         
          =1;i--){        printf("%c",ans[i]);    }    return 0;}

但是不够优美。

为什么要bfs然后再dfs呢？

bfs也可以求前驱啊！！

bfs时，第一更新到的就是最短路。

如果我们char A~Z，那么更新到的char

也就叫from[y]，也就是到y这个点所形成的字典序最小字符串最后一个字符。

记录from，pre（也就是前驱）

bfs后，先找到len

再把所有f[i][(1<<n)-1]的字符找出来，cmp一下。

反正复杂度不超过600*600

代码：

#include
      
       using namespace std;typedef long long ll;const int N=13;const int M=600;const int U=11*50*((1<<12)-1)+100;const int inf=0x3f3f3f3f;int n;char s[55];struct trie{    int fail[M],ch[M][26];    int has[M];    int cnt;    void ins(char *s,int l,int id){        int now=0;        for(int i=1;i<=l;i++){            int x=s[i]-'A';            if(!ch[now][x]) ch[now][x]=++cnt;            now=ch[now][x];        }        has[now]|=(1<<(id-1));    }    void build(){        queue
       
        q;        for(int i=0;i<=25;i++){            if(ch[0][i]) fail[ch[0][i]]=0,q.push(ch[0][i]);        }        while(!q.empty()){            int x=q.front();q.pop();            has[x]|=has[fail[x]];            for(int i=0;i<=25;i++){                if(ch[x][i]){                    fail[ch[x][i]]=ch[fail[x]][i];                    q.push(ch[x][i]);                }                else ch[x][i]=ch[fail[x]][i];            }        }    }}ac;int get(int ptr,int st){    return ptr*(1<
        
         q;int pre[U];int from[U];void bfs(){    memset(dis,0x3f,sizeof dis);    int str=get(0,0);    dis[str]=0;    vis[str]=1;    pre[str]=-1;//warning!!    node nn;nn.P=0,nn.S=0;    q.push(nn);    while(!q.empty()){        node lp=q.front();q.pop();        for(int i=0;i<=25;i++){            int to=ac.ch[lp.P][i];            int NS=lp.S|ac.has[ac.ch[lp.P][i]];            int NID=get(to,NS);            if(!vis[NID]){                dis[NID]=dis[get(lp.P,lp.S)]+1;                vis[NID]=1;                from[NID]=i+1;//warning!!!!                pre[NID]=get(lp.P,lp.S);                node kk;                kk.P=to;kk.S=NS;                q.push(kk);            }        }    }}int len;int tot;char ans[M];char a[M];bool fl;bool cmp(char *a,char *b){
    //a better than b?    for(int i=1;i<=len;i++){        if(a[i]
         
          b[i]) return 0;    }}int main(){    scanf("%d",&n);    for(int i=1;i<=n;i++){        scanf("%s",s+1);        int l=strlen(s+1);        ac.ins(s,l,i);        }    ac.build();    bfs();    len=inf;    for(int i=0;i<=ac.cnt;i++){        int id=get(i,(1<

但是还不够优美！！

为什么bfs之后还要再比较一遍字符串呢？？

bfs中，第一次到达一个(1<<n)-1的点,

这个点就一定是最优解的最后一个节点！！！

因为，bfs分层图保证了最短。

for char A~Z保证了字典序最优。

直接输出即可。

代码：

#include
      
       using namespace std;typedef long long ll;const int N=13;const int M=600;const int U=11*50*((1<<12)-1)+100;const int inf=0x3f3f3f3f;int n;char s[55];struct trie{    int fail[M],ch[M][26];    int has[M];    int cnt;    void ins(char *s,int l,int id){        int now=0;        for(int i=1;i<=l;i++){            int x=s[i]-'A';            if(!ch[now][x]) ch[now][x]=++cnt;            now=ch[now][x];        }        has[now]|=(1<<(id-1));    }    void build(){        queue
       
        q;        for(int i=0;i<=25;i++){            if(ch[0][i]) fail[ch[0][i]]=0,q.push(ch[0][i]);        }        while(!q.empty()){            int x=q.front();q.pop();            has[x]|=has[fail[x]];            for(int i=0;i<=25;i++){                if(ch[x][i]){                    fail[ch[x][i]]=ch[fail[x]][i];                    q.push(ch[x][i]);                }                else ch[x][i]=ch[fail[x]][i];            }        }    }}ac;int get(int ptr,int st){    return ptr*(1<
        
         q;int pre[U];int from[U];int len;char ans[M];void bfs(){    memset(dis,0x3f,sizeof dis);    int str=get(0,0);    dis[str]=0;    vis[str]=1;    pre[str]=-1;//warning!!    node nn;nn.P=0,nn.S=0;    q.push(nn);    while(!q.empty()){        node lp=q.front();q.pop();        for(int i=0;i<=25;i++){            int to=ac.ch[lp.P][i];            int NS=lp.S|ac.has[ac.ch[lp.P][i]];            int NID=get(to,NS);            if(!vis[NID]){                dis[NID]=dis[get(lp.P,lp.S)]+1;                vis[NID]=1;                from[NID]=i+1;//warning!!!!                pre[NID]=get(lp.P,lp.S);                node kk;                kk.P=to;kk.S=NS;                q.push(kk);                if(NS==(1<
         
          =1;i--) printf("%c",ans[i]);    return 0;}