diff --git a/main.ipynb b/main.ipynb index d350229..c196bde 100644 --- a/main.ipynb +++ b/main.ipynb @@ -159,7 +159,10 @@ "BOARD_SIZE: Final[int] = 8 # defines the board side length as 8\n", "PLAYER: Final[int] = 1 # defines the number symbolising the player as 1\n", "ENEMY: Final[int] = -1 # defines the number symbolising the enemy as -1\n", - "EXAMPLE_STACK_SIZE: Final[int] = 1000 # defines the game stack size for examples" + "EXAMPLE_STACK_SIZE: Final[int] = 1000 # defines the game stack size for examples\n", + "IMPOSSIBLE: Final[np.ndarray] = np.array([-1, -1], dtype=int)\n", + "IMPOSSIBLE.setflags(write=False)\n", + "SIMULATE_TURNS: Final[int] = 70" ] }, { @@ -588,13 +591,71 @@ "assert move_possible(np.ones((8, 8)) * 0, np.array([-1, -1])) is True" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def moves_possible(boards: np.ndarray, moves: np.ndarray) -> np.ndarray:\n", + " \"\"\"Checks if a stack of moves can be executed on a stack of boards.\n", + "\n", + " Args:\n", + " boards: A board where the next stone should be placed.\n", + " moves: A stack stones to be placed. Each move is formatted as an array in the form of [x, y] if no turn is possible the value [-1, -1] is expected.\n", + "\n", + " Returns:\n", + " An array marking for each and every game and move in the stack if the move can be executed.\n", + " \"\"\"\n", + " arr_moves_possible = np.zeros(boards.shape[0], dtype=bool)\n", + " for game in range(boards.shape[0]):\n", + " if np.all(\n", + " moves[game] == -1\n", + " ): # can be all or any. All should be faster since most times neither value will be -1.\n", + " arr_moves_possible[game] = not np.any(\n", + " get_possible_turns(np.reshape(boards[game], (1, 8, 8)))\n", + " )\n", + " else:\n", + " arr_moves_possible[game] = any(\n", + " _recursive_steps(boards[game, :, :], direction, moves[game])\n", + " for direction in DIRECTIONS\n", + " )\n", + " return arr_moves_possible\n", + "\n", + "\n", + "np.testing.assert_array_equal(\n", + " moves_possible(np.ones((3, 8, 8)) * 1, np.array([[-1, -1]] * 3)),\n", + " np.array([True] * 3),\n", + ")\n", + "\n", + "np.testing.assert_array_equal(\n", + " moves_possible(get_new_games(3), np.array([[2, 3], [3, 2], [3, 2]])),\n", + " np.array([True] * 3),\n", + ")\n", + "np.testing.assert_array_equal(\n", + " moves_possible(get_new_games(3), np.array([[2, 2], [1, 1], [0, 0]])),\n", + " np.array([False] * 3),\n", + ")\n", + "np.testing.assert_array_equal(\n", + " moves_possible(np.ones((3, 8, 8)) * -1, np.array([[-1, -1]] * 3)),\n", + " np.array([True] * 3),\n", + ")\n", + "np.testing.assert_array_equal(\n", + " moves_possible(np.zeros((3, 8, 8)), np.array([[-1, -1]] * 3)),\n", + " np.array([True] * 3),\n", + ")" + ] + }, { "cell_type": "markdown", "source": [ "## Reword functions\n", "\n", - "For any kind of reinforcement learning is a reword function needed. For otello this would be the final score, the information who won or changes to the score. A combination of those three would also be possible.\n", - "It is probably not be possible to weight the current score to high in a reword function since that would be to close to a classic greedy algorithm. But some influce would increase learning behavior.\n", + "For any kind of reinforcement learning is a reword function needed.\n", + "For otello this would be the final score, the information who won or changes to the score.\n", + "A combination of those three would also be possible.\n", + "It is probably not be possible to weight the current score to high in a reword function since that would be to close to a classic greedy algorithm.\n", + "But some direct influence would increase the learning speed.\n", "In the next section are all three reword functions implemented to be combined and weight later on as needed." ], "metadata": { @@ -674,48 +735,16 @@ } }, { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "cell_type": "markdown", "source": [ - "def moves_possible(boards: np.ndarray, moves: np.ndarray) -> np.ndarray:\n", - " arr_moves_possible = np.zeros(boards.shape[0], dtype=bool)\n", - " for game in range(boards.shape[0]):\n", - " if np.all(moves[game] == -1):\n", - " arr_moves_possible[game] = not np.any(\n", - " get_possible_turns(np.reshape(boards[game], (1, 8, 8)))\n", - " )\n", - " else:\n", - " arr_moves_possible[game] = any(\n", - " _recursive_steps(boards[game, :, :], direction, moves[game])\n", - " for direction in DIRECTIONS\n", - " )\n", - " return arr_moves_possible\n", + "## Execute a chosen action\n", "\n", - "\n", - "np.testing.assert_array_equal(\n", - " moves_possible(np.ones((3, 8, 8)) * 1, np.array([[-1, -1]] * 3)),\n", - " np.array([True] * 3),\n", - ")\n", - "\n", - "np.testing.assert_array_equal(\n", - " moves_possible(get_new_games(3), np.array([[2, 3], [3, 2], [3, 2]])),\n", - " np.array([True] * 3),\n", - ")\n", - "np.testing.assert_array_equal(\n", - " moves_possible(get_new_games(3), np.array([[2, 2], [1, 1], [0, 0]])),\n", - " np.array([False] * 3),\n", - ")\n", - "np.testing.assert_array_equal(\n", - " moves_possible(np.ones((3, 8, 8)) * -1, np.array([[-1, -1]] * 3)),\n", - " np.array([True] * 3),\n", - ")\n", - "np.testing.assert_array_equal(\n", - " moves_possible(np.zeros((3, 8, 8)), np.array([[-1, -1]] * 3)),\n", - " np.array([True] * 3),\n", - ")" - ] + "After an evaluation what turns are possible there needs to be a function that executes a turn.\n", + "This next sections does that." + ], + "metadata": { + "collapsed": false + } }, { "cell_type": "code", @@ -724,13 +753,60 @@ "outputs": [], "source": [ "class InvalidTurn(ValueError):\n", - " pass\n", - "\n", - "\n", + " \"\"\"\n", + " This error is thrown if a given turn is not valid.\n", + " \"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "95.1 ms ± 3.5 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" + ] + }, + { + "data": { + "text/plain": "
", + "image/png": "\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ "def do_moves(boards: np.ndarray, moves: np.ndarray) -> np.ndarray:\n", + " \"\"\"Executes a single move on a stack o Othello boards.\n", + "\n", + " Args:\n", + " boards: A stack of Othello boards where the next stone should be placed.\n", + " moves: A stack of stone placement orders for the game. Formatted as coordinates in an array [x, y] of the place where the stone should be placed. Should contain [-1,-1] if no new placement is possible.\n", + "\n", + " Returns:\n", + " The new state of the board.\n", + " \"\"\"\n", + "\n", " def _do_directional_move(\n", " board: np.ndarray, rec_move: np.ndarray, rev_direction, step_one=True\n", " ) -> bool:\n", + " \"\"\"Changes the color of enemy stones in one direction.\n", + "\n", + " This function works recursive. The argument step_one should always be used in its default value.\n", + "\n", + " Args:\n", + " board: A bord on which a stone was placed.\n", + " rec_move: The position on the board in x and y where this function is called from. Will be moved by recursive called.\n", + " rev_direction: The position where the stone was placed. Inside this recursion it will also be the last step that was checked.\n", + " step_one: Set to true if this is the first step in the recursion. False later on.\n", + "\n", + " Returns:\n", + " True if a stone could be flipped.\n", + " All changes are made on the view of the numpy array and therefore not included in the return value.\n", + " \"\"\"\n", " rec_position = rec_move + rev_direction\n", " if np.any((rec_position >= 8) | (rec_position < 0)):\n", " return False\n", @@ -746,16 +822,32 @@ " return False\n", "\n", " def _do_move(_board: np.ndarray, move: np.ndarray) -> None:\n", + " \"\"\"Executes a turn on a board.\n", + "\n", + " Args:\n", + " _board: The game board on wich to place a stone.\n", + " move: The coordinates of a stone that should be placed. Should be formatted as an array of the form [x, y]. The value [-1, -1] is expected if no turn is possible.\n", + "\n", + " Returns:\n", + " All changes are made on the view of the numpy array.\n", + " \"\"\"\n", " if np.all(move == -1):\n", + " if not move_possible(_board, move):\n", + " raise InvalidTurn(\"An action should be taken. A turn is possible.\")\n", " return\n", + "\n", + " # noinspection PyTypeChecker\n", " if _board[tuple(move.tolist())] != 0:\n", - " raise InvalidTurn\n", + " raise InvalidTurn(\"This turn is not possible.\")\n", + "\n", " action = False\n", " for direction in DIRECTIONS:\n", " if _do_directional_move(_board, move, direction):\n", " action = True\n", " if not action:\n", - " raise InvalidTurn()\n", + " raise InvalidTurn(\"This turn is not possible.\")\n", + "\n", + " # noinspection PyTypeChecker\n", " _board[tuple(move.tolist())] = 1\n", "\n", " boards = boards.copy()\n", @@ -764,8 +856,28 @@ " return boards\n", "\n", "\n", - "do_moves(get_new_games(10), np.array([[2, 3]] * 10))[0]" - ] + "%timeit do_moves(get_new_games(EXAMPLE_STACK_SIZE), np.array([[2, 3]] * EXAMPLE_STACK_SIZE))[0]\n", + "plot_othello_board(\n", + " do_moves(\n", + " get_new_games(EXAMPLE_STACK_SIZE), np.array([[2, 3]] * EXAMPLE_STACK_SIZE)\n", + " )[0]\n", + ")" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "## An abstract reversi game policy\n", + "\n", + "For an easy use of policies an abstract class containing the policy generation / requests an action in an inherited instance of this class.\n", + "This class filters the policy to only propose valid actions. Inherited instance do not need to care about this." + ], + "metadata": { + "collapsed": false + } }, { "cell_type": "code", @@ -774,33 +886,66 @@ "outputs": [], "source": [ "class GamePolicy(ABC):\n", - "\n", - " IMPOSSIBLE: np.ndarray = np.array([-1, -1], dtype=int)\n", + " \"\"\"\n", + " A game policy. Proposes where to place a stone next.\n", + " \"\"\"\n", "\n", " @property\n", " @abc.abstractmethod\n", " def policy_name(self) -> str:\n", + " \"\"\"The name of this policy\"\"\"\n", " raise NotImplementedError()\n", "\n", " @abc.abstractmethod\n", - " def internal_policy(self, boards: np.ndarray) -> np.ndarray:\n", + " def _internal_policy(self, boards: np.ndarray) -> np.ndarray:\n", + " \"\"\"The internal policy is an unfiltered policy. It should only be called from inside this function\n", + "\n", + " Args:\n", + " boards: A board where a policy should be calculated for.\n", + "\n", + " Returns:\n", + " The policy for this board. Should have the same size as the boards array.\n", + " \"\"\"\n", " raise NotImplementedError()\n", "\n", - " def get_policy(self, boards: np.ndarray) -> np.ndarray:\n", - " policies = self.internal_policy(boards)\n", + " def get_policy(\n", + " self, boards: np.ndarray, epsilon: float = 1\n", + " ) -> tuple[np.ndarray, np.ndarray]:\n", + " assert len(boards.shape) == 3\n", + " assert boards.shape == (BOARD_SIZE, BOARD_SIZE)\n", + "\n", + " # todo possibly change this function to only validate the purpose turn and\n", + "\n", + " policies = self._internal_policy(boards)\n", + " raw_policy = policies.copy()\n", + " if epsilon < 1:\n", + " policies = policies + np.random.rand(*boards.shape)\n", + "\n", + " # todo talk to team about backpropagation epsilon for greedy factor\n", + "\n", " possible_turns = get_possible_turns(boards)\n", " policies[possible_turns == False] = -1.0\n", " max_indices = [\n", " np.unravel_index(policy.argmax(), policy.shape) for policy in policies\n", " ]\n", " policy_vector = np.array(max_indices)\n", - "\n", + " max_policy = policy_vector\n", " no_turn_possible = np.all(policy_vector == 0, 1) & (policies[:, 0, 0] == -1.0)\n", "\n", - " policy_vector[no_turn_possible] = GamePolicy.IMPOSSIBLE\n", - " return policy_vector" + " policy_vector[no_turn_possible] = IMPOSSIBLE\n", + " max_policy[no_turn_possible] = 0\n", + " return policy_vector, raw_policy" ] }, + { + "cell_type": "markdown", + "source": [ + "## A first policy" + ], + "metadata": { + "collapsed": false + } + }, { "cell_type": "code", "execution_count": null, @@ -854,7 +999,7 @@ "metadata": {}, "outputs": [], "source": [ - "SIMULATE_TURNS = 70\n", + "\n", "\n", "\n", "def simulate_game(\n",