现在的位置: 首页 > 综合 > 正文

五子棋AI设计——从门外到门内不得不说的事儿5

2017年12月12日 ⁄ 综合 ⁄ 共 6315字 ⁄ 字号 评论关闭

第五天——可以看得更远,可以思考的更快

看得远,胜算更大;看得远,产生的棋局越多,需要判断的状态就越多,AI的思考时间就越长。从游戏性角度出发,我们不仅希望AI能够胜利,更希望它的思考时间在可以容忍的范围内。


图1 前瞻1步的博弈树

现在的问题是——如何在数量无比庞大的状态中快速的找到最优的走子策略。在博弈搜索中,博弈树叶节点的值为当前状态的得分。如图1所示博弈树中,B节点的值为12,此时A节点的值应该小于等于12。C节点的值为6,小于12,那么A节点的值此时应该小于等于6。计算D节点时,d1节点的值为14,可以推知D节点的值大于等于14与A节点的值小于等于6矛盾,因此将D节点余下的子节点剪去。这就是alpha-beta剪枝。伪代码如下:

function MinMax-Decision(state) returns an action
	inputs: state, current state in game
	v←Min-Value(state, -∞, +∞, k)
	return the action in Successors(state) with value v

function Max-Value(state, α, β, k) return a utility vale
	inputs: state, current state in game
	        α, the value of the best alternative for Max along the path to state
	        β, the value of the best alternative for Min along the path to state
	        k, current depth in game path
	if k == 0 then return UTILITY(state)
	v←-∞
	for s in Successors(state) do
		v←MAX(v, Min-Value(s, α, β, k-1))
		if v≥β then return v
		α←MAX(α, v)
	return v

function Min-Value(state, α, β, k) return a utility vale
	inputs: state, current state in game
	        α, the value of the best alternative for Max along the path to state
	        β, the value of the best alternative for Min along the path to state
	        k, current depth in game path
	if k == 0 then return UTILITY(state)
	v←+∞
	for s in Successors(state) do
		v←MIN(v, Max-Value(s, α, β, k-1))
		if v≤α then return v
		β←MIN(β, v)
	return v

当前瞻d步、每步的合法走子有n种时,剪枝前的时间复杂度为O(n^d),剪枝后为O(n^(d/2))。在相同时间内,剪枝后前瞻的步数是原来的两倍;在相同的步数内,搜索到结果的时间是原来的平方根。刚好符合我们的需求。

现在,我们明确了前瞻步数和AI思考时间之间的关系,就可以根据AI的等级确定前瞻步数,同时约束思考时间。在讨论了如何搜索和如何提高搜索效率后,下面我们将集中讨论如何设计评估器,也就是博弈树中每个叶节点的数值是如何确定的。

--------------------分割线--------------------

void CWuZiQi::MaxMinDecision( const int points[16][16], bool first, int oldp[16][16], int newp[16][16], int *nx, int *ny, int k )
{
	int i, j, t;
	int x, y;
	int cntr;
	MyPoint v[KMAX];
	int numbq, numwq;
	int a1[16][16], a2[16][16];
	//int tiju[16][16][8][2];
	MyPoint b[225], w[225];

	//out = fopen( "mm.txt", "w" );

	for ( i = 1; i <= 15; i++ )
		for ( j = 1; j <= 15; j++ )
		{
			oldp[i][j] = points[i][j];
			newp[i][j] = points[i][j];
		}
	for ( i = 0; i < KMAX; i++ )
		{v[i].val = 0; v[i].x = 0; v[i].y = 0;}

	cntr = QuickCount( oldp );
	if ( cntr == 1 )
	{
		*nx = 7; *ny = 7;
		newp[7][7] = 2;
		return;
	}

	MinValue( oldp, first, -MAX, MAX, k, v );
	
	//
	/*
	FillMatrix( tiju, 0 );
	HowManyInLine( oldp, tiju );
	ValueTheChessboardNaive( oldp, tiju, a1, a2 );
	*/
	FillMatrix( a1, 0 );
	FillMatrix( a2, 0 );
	MaxMinValuationOnEmpty( oldp, a1, a2 );
	FillMatrix( b, a1 );
	FillMatrix( w, a2 );
	SortTheValuation( b, w, &numbq, &numwq );
	PrintMatrix( b, w, numbq, numwq );
	//
	int cur = b[0].val - w[0].val;
	int fur = v[k - 1].val;
	if ( first )
	{
		if ( cur < 0 && fur > 0 )  // both current and future are dangerous
		{
			if ( cur + fur <= 0 )
				{x = w[0].x; y = w[0].y;}  // current is worse
			else  // future is better, but it isn't sure
			{
				t = rand() % 1000;
				if ( t < 100 )
					{x = v[k - 1].x; y = v[k - 1].y;}  // future
				else
					{x = w[0].x; y = w[0].y;}  // current
			}
		}
		else if ( cur >= 0 && fur <= 0 )  // both current and future are ok
		{
			if ( cur + fur > 0 )  // current is better
				{x = b[0].x; y = b[0].y;}  
			else  // future is better, but it isn't sure
			{
				t = rand() % 1000;
				if ( t < 200 )
					{x = v[k - 1].x; y = v[k - 1].y;}  // defence
				else
					{x = b[0].x; y = b[0].y;}  // attack
			}
		}
		else if ( cur < 0 && fur <= 0 )  //current is bad, further is ok
		{
			if ( cur < fur )  // current is more dangerous than future
				{x = w[0].x; y = w[0].y;}
			else  // future is better, but it isn't sure
			{
				t = rand() % 1000;
				if ( t < 100 )
					{x = v[k - 1].x; y = v[k - 1].y;}  // future
				else
					{x = w[0].x; y = w[0].y;}  // current
			}
		}
		else  //current is ok, further is bad
		{
			if ( cur > fur )  // current is better
				{x = b[0].x; y = b[0].y;}
			else  // future is worse, but current is not that bad
			{
				t = rand() % 1000;
				if ( t < 400 )
					{x = v[k - 1].x; y = v[k - 1].y;}  // defence
				else
					{x = b[0].x; y = b[0].y;}  // attack
			}
		}
		newp[x][y] = 1;
	}
	else
	{
		if ( cur >= 0 && fur >= 0 )  // both current and future are dangerous
		{
			if ( cur > fur )
				{x = b[0].x; y = b[0].y;}  // current is worse
			else  // future is better, but it isn't sure
			{
				t = rand() % 1000;
				if ( t < 100 )
					{x = v[k - 1].x; y = v[k - 1].y;}  // future
				else
					{x = b[0].x; y = b[0].y;}  // current
			}
		}
		else if ( cur < 0 && fur < 0 )  // both current and future are ok
		{
			if ( cur < fur )  // current is better
				{x = w[0].x; y = w[0].y;}  
			else  // future is better, but it isn't sure
			{
				t = rand() % 1000;
				if ( t < 200 )
					{x = v[k - 1].x; y = v[k - 1].y;}  // defence
				else
					{x = w[0].x; y = w[0].y;}  // attack
			}
		}
		else if ( cur >= 0 && fur < 0 )  //current is bad, further is ok
		{
			if ( cur + fur > 0 )  // current is more dangerous than future
				{x = b[0].x; y = b[0].y;}
			else  // future is better, but it isn't sure
			{
				t = rand() % 1000;
				if ( t < 100 )
					{x = v[k - 1].x; y = v[k - 1].y;}  // future
				else
					{x = b[0].x; y = b[0].y;}  // current
			}
		}
		else  //current is ok, further is bad
		{
			if ( cur + fur < 0 )  // current is better
				{x = w[0].x; y = w[0].y;}
			else  // future is worse, but current is not that bad
			{
				t = rand() % 1000;
				if ( t < 400 )
					{x = v[k - 1].x; y = v[k - 1].y;}  // defence
				else
					{x = w[0].x; y = w[0].y;}  // attack
			}
		}
		newp[x][y] = 2;
	}
	//fclose( out );
	*nx = x; *ny = y;
}

void CWuZiQi::MaxValue( const int qp[16][16], bool first, int alpha, int beta, int k, MyPoint res[KMAX] )
{
	int i, j, val;
	int tqp[16][16], t[16][16];
	MyPoint v;
	//
	FillMatrix( t, 0 );
	for ( i = 1; i <= 15; i++ )
		for ( j = 1; j <= 15; j++ )
			t[i][j] = qp[i][j];
	//
	if ( k == 0 )
	{
		//PrintMatrix( qp );
		val = MaxMinValuationOnNonempty( t, first );
		res[k].val = val;
		//fprintf( out, "%d\t%d\t%d\t%d------(%d, %d)MAX0\n", k, res[k].x, res[k].y, res[k].val, alpha, beta );
		return;
	}
	v.val = -MAX;
	for ( i = 1; i <= 15; i++ )
		for ( j = 1; j <= 15; j++ )
		{
			
			if ( qp[i][j] != 0 )
				continue;
			FillMatrix( tqp, 0 );
			FillMatrix( tqp, t );
			if ( first )
				tqp[i][j] = 2;
			else
				tqp[i][j] = 1;
			res[k - 1].x = i; res[k - 1].y = j;
			MinValue( tqp, first, alpha, beta, k - 1, res );
			if ( v.val < res[k - 1].val )
				{v.val = res[k - 1].val; v.x = res[k - 1].x; v.y = res[k - 1].y;}
			if ( v.val >= beta )
			{
				res[k - 1].val = v.val; res[k - 1].x = v.x; res[k - 1].y = v.y;
				//fprintf( out, "%d\t%d\t%d\t%d------(%d, %d)MAX1\n", k, v.x, v.y, v.val, alpha, beta );
				return;
			}
			if ( v.val > alpha )
			{
				alpha = v.val;
				//fprintf( out, "%d\t%d\t%d\t%d------(%d, %d)MAX2\n", k, v.x, v.y, v.val, alpha, beta );
			}
		}
	if ( k == KMAX )
		{res[k - 1].val = v.val; res[k - 1].x = v.x; res[k - 1].y = v.y;}
	else
		{res[k].val = v.val; res[k].x = v.x; res[k].y = v.y;}
	//fprintf( out, "%d\t%d\t%d\t%d------(%d, %d)MAX3\n", k, v.x, v.y, v.val, alpha, beta );
}

void CWuZiQi::MinValue( const int qp[16][16], bool first, int alpha, int beta, int k, MyPoint res[KMAX] )
{
	int i, j, val;
	int tqp[16][16], t[16][16];
	MyPoint v;
	//
	FillMatrix( t, 0 );
	for ( i = 1; i <= 15; i++ )
		for ( j = 1; j <= 15; j++ )
			t[i][j] = qp[i][j];
	//
	if ( k == 0 )
	{
		//PrintMatrix( qp );
		val = MaxMinValuationOnNonempty( t, first );
		res[k].val = val;
		//fprintf( out, "%d\t%d\t%d\t%d------(%d, %d)MIN0\n", k, res[k].x, res[k].y, res[k].val, alpha, beta );
		return;
	}
	v.val = MAX;
	for ( i = 1; i <= 15; i++ )
		for ( j = 1; j <= 15; j++ )
		{
			if ( t[i][j] != 0 )
				continue;
			FillMatrix( tqp, 0 );
			FillMatrix( tqp, t );
			if ( first )
				tqp[i][j] = 1;
			else
				tqp[i][j] = 2;
			res[k - 1].x = i; res[k - 1].y = j;
			MaxValue( tqp, first, alpha, beta, k - 1, res );
			if ( v.val > res[k - 1].val )
				{v.val = res[k - 1].val; v.x = res[k - 1].x; v.y = res[k - 1].y;}
			if ( v.val <= alpha )
			{
				res[k - 1].val = v.val; res[k - 1].x = v.x; res[k - 1].y = v.y;
				//fprintf( out, "%d\t%d\t%d\t%d------(%d, %d)MIN1\n", k, v.x, v.y, v.val, alpha, beta );
				return;
			}
			if ( v.val < beta )
			{
				beta = v.val;
				//fprintf( out, "%d\t%d\t%d\t%d------(%d, %d)MIN2\n", k, v.x, v.y, v.val, alpha, beta );
			}
		}
	if ( k == KMAX )
		{res[k - 1].val = v.val; res[k - 1].x = v.x; res[k - 1].y = v.y;}
	else
		{res[k].val = v.val; res[k].x = v.x; res[k].y = v.y;}
	//fprintf( out, "%d\t%d\t%d\t%d------(%d, %d)MIN3\n", k, v.x, v.y, v.val, alpha, beta );
}

抱歉!评论已关闭.