这章主要研究的是一个有限状态机(finite state machine),简称FSM。当其获得一个字符时,FSM将发生转换,即从当前状态转换到另一状态。有限状态机(FSM)是一种抽象的机制,他在任意时刻都处以有限状态集合中的某一个状态。
我们研究的一个FSM的转换规则是(不能上图,口述下): {1,'A',2},{1,'B',3},{1,'C',4},{1,'D',5},{2,'E',2},{2,'I',0},{3,'F',3},{3,'J',0},{3,'M',4},{4,'G',4},{4,'K',0},{5,'H',5},{5,'L',0},{5,'O',2},{5,'N',4},{0,0,0},即在开始状态1时:输入A到状态2,B到状态3,0为结束状态。程序的目的是判断一个字符串是否能在FSM下由开始状态走到结束状态。
程序1 最初的FSM程序
- #include <string.h>
- #include <stdio.h>
- struct parent
- {
- static char* expression;
- static int index;
- static int end_state;
- static int doom_state;
- parent(char* expr);
- virtual parent* transition() {}
- };
- parent::parent(char* expr)
- {
- expression = new char[strlen(expr)];
- strcpy(expression,expr);
- end_state = 0;
- doom_state = 0;
- index = 0;
- }
- struct state1:public parent
- {
- parent *ptr2,*ptr3,*ptr4,*ptr5;
- state1():parent(expression) {}
- parent* transition();
- };
- struct state2:public parent
- {
- parent *ptr2;
- state2():parent(expression) {}
- parent* transition();
- };
- struct state3:public parent
- {
- parent *ptr3,*ptr4;
- state3():parent(expression) {}
- parent* transition();
- };
- struct state4:public parent
- {
- parent *ptr4;
- state4():parent(expression) {}
- parent* transition();
- };
- struct state5:public parent
- {
- parent *ptr2,*ptr4,*ptr5;
- state5():parent(expression) {}
- parent* transition();
- };
- parent* state1::transition()
- {
- switch(expression[index++])
- {
- case 'A':
- return ptr2;
- case 'B':
- return ptr3;
- case 'C':
- return ptr4;
- case 'D':
- return ptr5;
- case '/0':
- doom_state = 1;
- default:
- doom_state = 1;
- }
- }
- parent* state2::transition()
- {
- switch(expression[index++])
- {
- case 'E':
- return ptr2;
- case 'I':
- end_state = 1;
- break;
- case '/0':
- doom_state = 1;
- default:
- doom_state = 1;
- }
- }
- parent* state3::transition()
- {
- switch(expression[index++])
- {
- case 'F':
- return ptr3;
- case 'M':
- return ptr4;
- case 'J':
- end_state = 1;
- break;
- case '/0':
- doom_state = 1;
- default:
- doom_state = 1;
- }
- }
- parent* state4::transition()
- {
- switch(expression[index++])
- {
- case 'G':
- return ptr4;
- case 'K':
- end_state = 1;
- break;
- case '/0':
- doom_state = 1;
- default:
- doom_state = 1;
- }
- }
- parent* state5::transition()
- {
- switch(expression[index++])
- {
- case 'O':
- return ptr2;
- case 'H':
- return ptr5;
- case 'L':
- end_state = 1;
- break;
- case 'N':
- return ptr4;
- case '/0':
- doom_state = 1;
- default:
- doom_state = 1;
- }
- }
- char* parent::expression = NULL;
- int parent::doom_state = 0;
- int parent::end_state = 0;
- int parent::index = 0;
- state1 s1;
- state2 s2;
- state3 s3;
- state4 s4;
- state5 s5;
- void build_state_machine()
- {
- s1.ptr2 = &s2;
- s1.ptr3 = &s3;
- s1.ptr4 = &s4;
- s1.ptr5 = &s5;
- s2.ptr2 = &s2;
- s3.ptr3 = &s3;
- s3.ptr4 = &s4;
- s4.ptr4 = &s4;
- s5.ptr2 = &s2;
- s5.ptr4 = &s4;
- s5.ptr5 = &s5;
- }
- int main()
- {
- build_state_machine();
- char input_string[80];
- printf("Enter input expression: ");
- scanf("%s",input_string);
- parent state_machine(input_string);
- parent *ptr;
- ptr = s1.transition();
- while(ptr->end_state !=1 && ptr->doom_state != 1)
- {
- ptr = ptr->transition();
- }
- if(ptr->end_state == 1)
- printf("/nValid input expression");
- else
- printf("/nInvalid input expression");
- return 0;
- }
程序分析:
- expression = new char[strlen(expr)];这句中字符串后应该有空格存在,应加1.
- 依照执行顺序,最开始执行的是全局对象s1到s5的构造函数。s1的构造函数执行后expression将指向动态开辟的内存的地址,s2执行时,expression又指向另一新开辟内存,原理开辟的内存成为垃圾内存,形成内存泄露。
- s1构造完后,本身的地址值是不变的,因而可用&s1代替ptr1等等。
-
程序中对state_machine的使用仅仅是为了执行它的构造函数,将FSM重置为初始状态。而将FSM重置为初始状态并不是初始化。FSM是一组静态变量。我们应该使用静态成员函数将FSM重置为起始状态。原则:不要使用构造函数来初始化静态数据成员。
基于上面的分析,做如下改动:
- 用静态成员函数来reset()代替parent的构造函数。
- 改正缺1错误。
- 从main()中去掉state_machine,并直接调用parent::reset()。
- 去掉每个statej的构造函数。
- 去掉statej::ptri成员,并用&si代替。
- 去掉build_state_machine()。
程序2 用reset()来代替parent::parent()
- #include <string.h>
- #include <stdio.h>
- struct parent
- {
- static char* expression;
- static int index;
- static int end_state;
- static int doom_state;
- static void reset(char* expr);
- virtual parent* transition() {}
- };
- void parent::reset(char* expr)
- {
- expression = new char[strlen(expr) + 1];
- strcpy(expression,expr);
- end_state = 0;
- doom_state = 0;
- index = 0;
- }
- struct state1:public parent
- {
- parent* transition();
- };
- struct state2:public parent
- {
- parent* transition();
- };
- struct state3:public parent
- {
- parent* transition();
- };
- struct state4:public parent
- {
- parent* transition();
- };
- struct state5:public parent
- {
- parent* transition();
- };
- char* parent::expression = NULL;
- int parent::doom_state = 0;
- int parent::end_state = 0;
- int parent::index = 0;
- state1 s1;
- state2 s2;
- state3 s3;
- state4 s4;
- state5 s5;
- parent* state1::transition()
- {
- switch(expression[index++])
- {
- case 'A':
- return &s2;
- case 'B':
- return &s3;
- case 'C':
- return &s4;
- case 'D':
- return &s5;
- case '/0':
- doom_state = 1;
- default:
- doom_state = 1;
- }
- }
- parent* state2::transition()
- {
- switch(expression[index++])
- {
- case 'E':
- return &s2;
- case 'I':
- end_state = 1;
- break;
- case '/0':
- doom_state = 1;
- default:
- doom_state = 1;
- }
- }
- parent* state3::transition()
- {
- switch(expression[index++])
- {
- case 'F':
- return &s3;
- case 'M':
- return &s4;
- case 'J':
- end_state = 1;
- break;
- case '/0':
- doom_state = 1;
- default:
- doom_state = 1;
- }
- }
- parent* state4::transition()
- {
- switch(expression[index++])
- {
- case 'G':
- return &s4;
- case 'K':
- end_state = 1;
- break;
- case '/0':
- doom_state = 1;
- default:
- doom_state = 1;
- }
- }
- parent* state5::transition()
- {
- switch(expression[index++])
- {
- case 'O':
- return &s2;
- case 'H':
- return &s5;
- case 'L':
- end_state = 1;
- break;
- case 'N':
- return &s4;
- case '/0':
- doom_state = 1;
- default:
- doom_state = 1;
- }
- }
- int main()
- {
- char input_string[80];
- printf("Enter input expression: ");
- scanf("%s",input_string);
- parent::reset(input_string);
- parent *ptr;
- ptr = s1.transition();
- while(ptr->end_state !=1 && ptr->doom_state != 1)
- {
- ptr = ptr->transition();
- }
- if(ptr->end_state == 1)
- printf("/nValid input expression");
- else
- printf("/nInvalid input expression");
- return 0;
- }
程序分析和改进:
-
main()的变量Ptr中记录了FSM的当前状态,而FSM管理着输入的字符串,这些都是对方的职责。由此造成了main和parent之间的高度耦合。我们应做到让二者各司其责:把字符串管理分离出来放到main中,当前状态放到parent中。原则:降低耦合性--将类之间的交互最小化。
- 在parent中引入静态数据成员current指向当前状态:当FSM处于消亡状态时,current=null;当FSM处于结束状态时,current=&s6。这样可以去掉原来的数据成员end_state和doom_state。用end_state()和doom_state()来测试current。
程序3 对parent和main进行解耦
- #include <string.h>
- #include <stdio.h>
- class parent
- {
- private:
- static parent *current;
- protected:
- virtual parent* transition(char) { return NULL;}
- public:
- static void reset();
- static void advance(char);
- static int end_state();
- static int doom_state();
- };
- struct state1:public parent
- {
- parent* transition(char);
- };
- struct state2:public parent
- {
- parent* transition(char);
- };
- struct state3:public parent
- {
- parent* transition(char);
- };
- struct state4:public parent
- {
- parent* transition(char);
- };
- struct state5:public parent
- {
- parent* transition(char);
- };
- struct state6:public parent
- {
- parent* transition(char);
- };
- parent* parent::current = NULL;
- state1 s1;
- state2 s2;
- state3 s3;
- state4 s4;
- state5 s5;
- state6 s6;
- void parent::reset()
- {
- current = &s1;
- }
- void parent::advance(char x)
- {
- if(current)
- current = current->transition(x);
- }
- int parent::end_state()
- {
- return current == &s6;
- }
- int parent::doom_state()
- {
- return current == NULL;
- }
- parent* state1::transition(char x)
- {
- switch(x)
- {
- case 'A':
- return &s2;
- case 'B':
- return &s3;
- case 'C':
- return &s4;
- case 'D':
- return &s5;
- default:
- return NULL;
- }
- }
- parent* state2::transition(char x)
- {
- switch(x)
- {
- case 'E':
- return &s2;
- case 'I':
- return &s6;
- default:
- return NULL;
- }
- }
- parent* state3::transition(char x)
- {
- switch(x)
- {
- case 'F':
- return &s3;
- case 'M':
- return &s4;
- case 'J':
- return &s6;
- default:
- return NULL;
- }
- }
- parent* state4::transition(char x)
- {
- switch(x)
- {
- case 'G':
- return &s4;
- case 'K':
- return &s6;
- default:
- return NULL;
- }
- }
- parent* state5::transition(char x)
- {
- switch(x)
- {
- case 'O':
- return &s2;
- case 'H':
- return &s5;
- case 'L':
- return &s6;
- case 'N':
- return &s4;
- default:
- return NULL;
- }
- }
- parent* state6::transition(char)
- {
- return NULL;
- }
- int main()
- {
- char input_string[80];
- printf("Enter input expression: ");
- scanf("%s",input_string);
- parent::reset();
- int index = 0;
- parent::advance(input_string[index++]);
- while(!parent::end_state() && !parent::doom_state())
- {
- parent::advance(input_string[index++]);
- }
- if(parent::end_state())
- printf("/nValid input expression");
- else
- printf("/nInvalid input expression");
- return 0;
- }
程序的进一步分析和改进:考虑parent和statej结构之间的继承关系:parent是含有FSM状态statej的模块,它们之间不存在"is A"的关系,更确切的说,状态集合更应该是FSM的一部分,二者是一种"has A"关系。程序中parent试图去既表示某个状态,又表示FSM(一种规则),由于parent要实现两种功能,因此程序没有很好的内聚性。解决的办法是提取出两种抽象,并将parent拆分为两个不同的类:state和fsm,即状态和规则。state仅用于表示状态的抽象基类,而fsm仅用于表示FSM模型。
原则:每个类应该只有唯一的、类聚的功能。
程序4 parent被拆分为state和fsm
- #include <string.h>
- #include <stdio.h>
- struct state
- {
- virtual state* transition(char) = 0;
- };
- class fsm
- {
- private:
- static state *current;
- public:
- static void reset();
- static void advance(char);
- static int end_state();
- static int doom_state();
- };
- struct state1:public state
- {
- state* transition(char);
- };
- struct state2:public state
- {
- state* transition(char);
- };
- struct state3:public state
- {
- state* transition(char);
- };
- struct state4:public state
- {
- state* transition(char);
- };
- struct state5:public state
- {
- state* transition(char);
- };
- struct state6:public state
- {
- state* transition(char);
- };
- state* fsm::current = NULL;
- state1 s1;
- state2 s2;
- state3 s3;
- state4 s4;
- state5 s5;
- state6 s6;
- void fsm::reset()
- {
- current = &s1;
- }
- void fsm::advance(char x)
- {
- if(current)
- current = current->transition(x);
- }
- int fsm::end_state()
- {
- return current == &s6;
- }
- int fsm::doom_state()
- {
- return current == NULL;
- }
- state* state1::transition(char x)
- {
- switch(x)
- {
- case 'A':
- return &s2;
- case 'B':
- return &s3;
- case 'C':
- return &s4;
- case 'D':
- return &s5;
- default:
- return NULL;
- }
- }
- state* state2::transition(char x)
- {
- switch(x)
- {
- case 'E':
- return &s2;
- case 'I':
- return &s6;
- default:
- return NULL;
- }
- }
- state* state3::transition(char x)
- {
- switch(x)
- {
- case 'F':
- return &s3;
- case 'M':
- return &s4;
- case 'J':
- return &s6;
- default:
- return NULL;
- }
- }
- state* state4::transition(char x)
- {
- switch(x)
- {
- case 'G':
- return &s4;
- case 'K':
- return &s6;
- default:
- return NULL;
- }
- }
- state* state5::transition(char x)
- {
- switch(x)
- {
- case 'O':
- return &s2;
- case 'H':
- return &s5;
- case 'L':
- return &s6;
- case 'N':
- return &s4;
- default:
- return NULL;
- }
- }
- state* state6::transition(char)
- {
- return NULL;
- }
- int main()
- {
- char input_string[80];
- printf("Enter input expression: ");
- scanf("%s",input_string);
- fsm::reset();
- int index = 0;
- fsm::advance(input_string[index++]);
- while(!fsm::end_state() && !fsm::doom_state())
- {
- fsm::advance(input_string[index++]);
- }
- if(fsm::end_state())
- printf("/nValid input expression");
- else
- printf("/nInvalid input expression");
写到这里,你也许觉得完美了,其实不然:fsm仍然是一个模块类,它包含的是一组通过静态函数来操作的静态变量。如果我们可以很容易的创建一个抽象数据类型(ADT),那么我们就不应该将类设计为模块形式。并且使用了ADT后,我们可以在需要的时候才进行实例化。
原则:我们应该将类设计成抽象数据类型而不是模块类。
程序5 将fsm从模块内转化为ADT
- #include <string.h>
- #include <stdio.h>
- struct state
- {
- virtual state* transition(char) = 0;
- };
- class fsm
- {
- private:
- state *current;
- public:
- void reset();
- void a dvance(char);
- int end_state();
- int doom_state();
- fsm();
- };
- struct state1:public state
- {
- state* transition(char);
- };
- struct state2:public state
- {
- state* transition(char);
- };
- struct state3:public state
- {
- state* transition(char);
- };
- struct state4:public state
- {
- state* transition(char);
- };
- struct state5:public state
- {
- state* transition(char);
- };
- struct state6:public state
- {
- state* transition(char);
- };
- state1 s1;
- state2 s2;
- state3 s3;
- state4 s4;
- state5 s5;
- state6 s6;
- fsm::fsm()
- {
- current = NULL;
- }
- void fsm::reset()
- {
- current = &s1;
- }
- void fsm::advance(char x)
- {
- if(current)
- current = current->transition(x);
- }
- int fsm::end_state()
- {
- return current == &s6;
- }
- int fsm::doom_state()
- {
- return current == NULL;
- }
- state* state1::transition(char x)
- {
- switch(x)
- {
- case 'A':
- return &s2;
- case 'B':
- return &s3;
- case 'C':
- return &s4;
- case 'D':
- return &s5;
- default:
- return NULL;
- }
- }
- state* state2::transition(char x)
- {
- switch(x)
- {
- case 'E':
- return &s2;
- case 'I':
- return &s6;
- default:
- return NULL;
- }
- }
- state* state3::transition(char x)
- {
- switch(x)
- {
- case 'F':
- return &s3;
- case 'M':
- return &s4;
- case 'J':
- return &s6;
- default:
- return NULL;
- }
- }
- state* state4::transition(char x)
- {
- switch(x)
- {
- case 'G':
- return &s4;
- case 'K':
- return &s6;
- default:
- return NULL;
- }
- }
- state* state5::transition(char x)
- {
- switch(x)
- {
- case 'O':
- return &s2;
- case 'H':
- return &s5;
- case 'L':
- return &s6;
- case 'N':
- return &s4;
- default:
- return NULL;
- }
- }
- state* state6::transition(char)
- {
- return NULL;
- }
- int main()
- {
- char input_string[80];
- printf("Enter input expression: ");
- scanf("%s",input_string);
- fsm m;
- m.reset();
- int index = 0;
- m.advance(input_string[index++]);
- while(!m.end_state() && !m.doom_state())
- {
- m.advance(input_string[index++]);
- }
- if(m.end_state())
- printf("/nValid input expression");
- else
- printf("/nInvalid input expression");
- return 0;
- }
现在,程序的设计思路就变得非常清晰了:
- 状态转换图描述了FSM。
- 状态转换图中的每个节点都对应一个状态对象。
- 每个状态对象都通过一个输入字符转换到另一个状态上。
接着,考虑下状态:状态的确切含义是在每一个状态中都提供了一种从输入字符到后继状态的映射,除了映射的值不同外,所有的状态在每次执行映射时的操作都是相似的。在计算机程序中,有两种基本的方法表示映射:一张表或者一个算法,用c++的术语说就是被动数据或者可执行代码。在本程序中,我们选用了可执行代码的方法来表示映射,但这与用被动数据的方法相比,这个程序显得更为复杂,并且通用性也不好。
程序6 用数据表来表示转换映射
- #include <string.h>
- #include <limits.h>
- #include <stdio.h>
- const int range = CHAR_MAX + 1;//0...CHAR_MAX
- struct state
- {
- state* transition[range];
- state();
- };
- class fsm
- {
- private:
- state graph[6];
- state* current;
- public:
- void reset();
- void advance(char);
- int end_state();
- int doom_state();
- fsm();
- };
- state::state()
- {
- for(int i = 0;i < range;++i)
- transition[i] = NULL;
- }
- struct triple
- {
- int from;
- char input;
- int to;
- };
- fsm::fsm()
- {
- static triple edges[] ={
- {1,'A',2},{1,'B',3},{1,'C',4},{1,'D',5},
- {2,'E',2},{2,'I',0},
- {3,'F',3},{3,'J',0},{3,'M',4},
- {4,'G',4},{4,'K',0},
- {5,'H',5},{5,'L',0},{5,'O',2},{5,'N',4},
- {0,0,0}
- };
- for(triple* e = edges;e->from != 0;++e)
- graph[e->from].transition[e->input] = &graph[e->to];
- current = NULL;
- }
- void fsm::reset()
- {
- current = &graph[1];
- }
- void fsm::advance(char x)
- {
- if(current)
- current = current->transition[x];
- }
- int fsm::end_state()
- {
- return current == &graph[0];
- }
- int fsm::doom_state()
- {
- return current == NULL;
- }
- int main()
- {
- char input_string[80];
- printf("Enter input expression:");
- scanf("%s",input_string);
- fsm m;
- m.reset();
- int index = 0;
- m.advance(input_string[index++]);
- while (!m.end_state() && !m.doom_state())
- {
- m.advance(input_string[index++]);
- }
- if(m.end_state())
- printf("Valid input expression!/n");
- else
- printf("Invalid input expression!/n");
- return 0;
- }
在上面的程序中还存在较大缺陷:当我们创建多个fsm对象时,每个fsm对象都是同一个FSM的副本,即规则都一样。我们只需将fsm再抽象化,就可以对这个类进行泛化,以处理任意的状态转换图,这种做法是很有实际意义的。
原则:如果抽象化某个类比较简单时,我们最好不要只实现某种具体的情形。
程序7 从fsm中派生出sample
- #include <string.h>
- #include <limits.h>
- #include <stdio.h>
- const int range = CHAR_MAX + 1;//0..CHAR_MAX
- struct state
- {
- state* transition[range];
- state();
- };
- struct triple
- {
- int from;
- char input;
- int to;
- };
- class fsm
- {
- private:
- state* graph;
- state* current;
- public:
- void reset();
- void advance(char);
- int end_state();
- int doom_state();
- fsm(triple*);
- virtual ~fsm();//公有基类的析构函数应该被声明为虚函数
- };
- state::state()
- {
- for(int i = 0;i < range;++i)
- transition[i] = NULL;
- }
- fsm::fsm(triple* p)
- {
- int max_node = 0;
- for(triple* e = p;e->from;++e)
- {
- if(e->from > max_node)
- max_node = e->from;
- if(e->to > max_node)
- max_node = e->to;
- }
- graph = new state[max_node + 1];
- for(e = p;e->from ;++e)
- graph[e->from].transition[e->input] = &graph[e->to];
- current = NULL;
- }
- fsm::~fsm()
- {
- delete [] graph;
- }
- void fsm::reset()
- {
- current = &graph[1];
- }
- void fsm::advance(char x)
- {
- if(current)
- current = current->transition[x];
- }
- int fsm::end_state()
- {
- return current == &graph[0];
- }
- int fsm::doom_state()
- {
- return current == NULL;
- }
- class sample:public fsm
- {
- static triple edges[];
- public:
- sample();
- };
- triple sample::edges[] ={
- {1,'A',2},{1,'B',3},{1,'C',4},{1,'D',5},
- {2,'E',2},{2,'I',0},
- {3,'F',3},{3,'J',0},{3,'M',4},
- {4,'G',4},{4,'K',0},
- {5,'H',5},{5,'L',0},{5,'O',2},{5,'N',4},
- {0,0,0}
- };
- sample::sample():fsm(edges)
- {
- }
- int main()
- {
- char input_string[80];
- printf("Enter input expression:");
- scanf("%s",input_string);
- sample m;
- m.reset();
- int index = 0;
- m.advance(input_string[index++]);
- while (!m.end_state() && !m.doom_state())
- {
- m.advance(input_string[index++]);
- }
- if(m.end_state())
- printf("Valid input expression!/n");
- else
- printf("Invalid input expression!/n");
- return 0;
- }
备注:
- 有限状态机也叫做有限自动控制机,在许多关于编译器、形式语言、离散数学的书中都有正式的描述。
- 参考文献:《c++编程风格》(c++ programming style )作者 Tom Cargill 译者 聂雪军 机械工业出版社2007.1