代码之家  ›  专栏  ›  技术社区  ›  floatingCatsAndDogs

极小极大算法的意外行为

  •  0
  • floatingCatsAndDogs  · 技术社区  · 6 月前

    #include <stdbool.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #define BOARDSIZE 9
    
    const unsigned char WINNINGSTATES[8][3] = {{0, 1, 2}, {3, 4, 5}, {6, 7, 8},
                                               {0, 3, 6}, {1, 4, 7}, {2, 5, 8},
                                               {0, 4, 8}, {2, 4, 6}};
    
    enum XOE { EMPTY, X, O };
    struct Board {
      enum XOE current_player;
      enum XOE grid[BOARDSIZE];
    };
    
    struct legal_moves {
      int count;
      int moves[];
    };
    
    struct board_evaluation {
      _Bool x_won;
      _Bool o_won;
      _Bool draw;
      _Bool inplay;
    };
    
    struct Board empty_board() {
    
      struct Board newboard;
      newboard.current_player = X;
      memset(newboard.grid, 0, sizeof(newboard.grid));
    
      return newboard;
    }
    
    struct legal_moves *get_legal(struct Board board) {
    
      int count = 0;
      for (int i = 0; i < BOARDSIZE; i++) {
        if (board.grid[i] == EMPTY) {
          count += 1;
        }
      }
    
      if (count == 0) {
        return malloc(sizeof(struct legal_moves));
      }
    
      struct legal_moves *legal =
          malloc(sizeof(struct legal_moves) + sizeof(int) * count);
      legal->count = count;
    
      count = 0;
      for (int i = 0; i < BOARDSIZE; i++) {
        if (board.grid[i] == EMPTY) {
          legal->moves[count] = i;
          count += 1;
        }
      }
    
      return legal;
    }
    
    struct Board move(struct Board board, int index) {
      board.grid[index] = board.current_player;
    
      if (board.current_player == X) {
        board.current_player = O;
        return board;
      }
      board.current_player = X;
      return board;
    }
    
    struct board_evaluation evaluate_board(struct Board board) {
    
      enum XOE possible_winner;
      _Bool winner_found;
    
      for (int w = 0; w < 8; w++) {
        possible_winner = board.grid[WINNINGSTATES[w][0]];
        if (possible_winner == EMPTY) {
          continue;
        }
    
        winner_found = true;
        for (int s = 0; s < 3; s++) {
          if (board.grid[WINNINGSTATES[w][s]] != possible_winner) {
            winner_found = false;
            break;
          }
        }
        if (winner_found) {
          if (possible_winner == X) {
            return (struct board_evaluation){true, false, false, false};
          }
          return (struct board_evaluation){false, true, false, false};
        }
      }
    
      struct legal_moves *moves = get_legal(board);
    
      if (moves->count == 0) {
        free(moves);
        return (struct board_evaluation){false, false, true, false};
      }
      free(moves);
      return (struct board_evaluation){false, false, false, true};
    }
    
    void display_board(struct Board board) {
      for (int i = 0; i < 9; i++) {
        if (board.grid[i] == X) {
          printf("X");
        }
        if (board.grid[i] == O) {
          printf("O");
        }
        if (board.grid[i] == EMPTY) {
          printf("-");
        }
      }
      printf("\n");
    }
    
    int get_score(struct board_evaluation evaluation) {
      if (evaluation.x_won) {
        return 1;
      }
      if (evaluation.draw) {
        return 0;
      }
      if (evaluation.o_won) {
        return -1;
      }
      return 0;
    }
    
    int minimax(struct Board board, int depth) {
      struct board_evaluation test = evaluate_board(board);
      _Bool ismaximizing = board.current_player == X;
    
      if (!test.inplay) {
        return get_score(test);
      }
      if (depth > 3) {
        return get_score(evaluate_board(board));
      }
    
      struct legal_moves *possible_moves = get_legal(board);
      struct Board new_board;
    
      int current_score = 0;
      int best_score = ismaximizing ? -10 : 10;
    
      for (int i = 0; i < possible_moves->count; i++) {
        new_board = move(board, possible_moves->moves[i]);
        current_score = minimax(new_board, depth + 1);
        if (ismaximizing) {
          if (current_score >= best_score) {
            best_score = current_score;
          }
          continue;
        }
    
        if (current_score <= best_score) {
          best_score = current_score;
        }
      }
      free(possible_moves);
      return best_score;
    }
    
    int get_best(struct Board board) {
      struct legal_moves *possible_moves = get_legal(board);
      struct Board new_board;
      _Bool ismaximizing = board.current_player == X;
    
      int current_score = 0;
      int best_score = ismaximizing ? -10 : 10;
      int best_move = possible_moves->moves[0];
    
      for (int i = 0; i < possible_moves->count; i++) {
        new_board = move(board, possible_moves->moves[i]);
        current_score = minimax(new_board, 0);
        printf("%d -> %d\n", possible_moves->moves[i], current_score);
        if (ismaximizing) {
          if (current_score > best_score) {
            best_score = current_score;
            best_move = possible_moves->moves[i];
            display_board(new_board);
          }
          continue;
        }
    
        if (current_score < best_score) {
          best_score = current_score;
          best_move = possible_moves->moves[i];
        }
      }
      free(possible_moves);
      return best_move;
    }
    
    int main() {
      struct Board new_board = {
          X, {EMPTY, EMPTY, EMPTY, EMPTY, X, X, EMPTY, EMPTY, EMPTY}};
      printf("%d\n", get_best(new_board));
    }
    

    [ ][ ][ ]
    [ ][X][X]
    [ ][ ][ ]
    

    显然,输出应该是3,因为它会导致玩家X立即获胜,但代码输出0,这是由于最小最大函数对每个根移动返回1,我不知道为什么。非常感谢您的帮助!

    2 回复  |  直到 6 月前
        1
  •  5
  •   trincot    6 月前

    该算法可能会建议不会导致 最快的 赢,只是为了一定的胜利。

    在你的例子中,当X在0号位置比赛时,它仍然会赢,因为O无法阻止所有威胁。当你的算法找到多个评估为最佳值的移动时,它将选择第一个,这就是为什么在你的例子中,它建议点0,即使它已经确定点3的移动“同样好”。

    如果你想让算法建议一个能带来最快胜利的动作,那么一种可能的方法是 减小绝对值 你从递归极小极大调用中返回,这使得输赢在递归树中越深入,就越不有趣。要实现这一想法,请对代码进行以下更改:

    • get_score 分别返回-9和9,而不是-1和1,这样你以后就有一些空间来减小该值的大小。

    • minimax ,在递归调用之后,添加以下语句:

      current_score += (current_score < 0) - (current_score > 0);
      

    现在,该示例将输出3。

    其他备注

    要获得正确的游戏,您需要将搜索深度从3增加到4。例如,在深度为3的情况下,当第一个玩家在0点开始游戏时,第二个玩家将无法找到正确的移动。

    为了避免未定义的行为,请删除以下代码:

    if (count == 0) {
      return malloc(sizeof(struct legal_moves));
    }
    

    count 成员。

        2
  •  5
  •   chux    6 月前

    代码至少存在以下问题:

    未初始化数据

    代码返回一个指向未初始化数据的指针。

    if (count == 0) {
        return malloc(sizeof(struct legal_moves));
      }
    

    然而,调用代码会使用该指针指向未初始化的数据。

    struct legal_moves *moves = get_legal(board);
    if (moves->count == 0) {  // `moves->count` may be garbage.
    

    初始化数据(成员 .count )在返回之前。

    代码似乎可以正常执行 if (count == 0) { return malloc(sizeof(struct legal_moves)); } 淘汰。

    推荐文章