From fc7f9dbb4d143dbd586f2a62865db6e216163609 Mon Sep 17 00:00:00 2001 From: Philipp Horstenkamp Date: Sun, 12 Feb 2023 18:54:28 +0100 Subject: [PATCH] Added a docstring to the function checking if a move is possible. --- main.ipynb | 166 +++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 129 insertions(+), 37 deletions(-) diff --git a/main.ipynb b/main.ipynb index 51c4e53..d350229 100644 --- a/main.ipynb +++ b/main.ipynb @@ -152,13 +152,14 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "BOARD_SIZE: Final[int] = 8 # defines the board side length as 8\n", "PLAYER: Final[int] = 1 # defines the number symbolising the player as 1\n", - "ENEMY: Final[int] = -1 # defines the number symbolising the enemy as -1" + "ENEMY: Final[int] = -1 # defines the number symbolising the enemy as -1\n", + "EXAMPLE_STACK_SIZE: Final[int] = 1000 # defines the game stack size for examples" ] }, { @@ -450,22 +451,22 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "8.78 ms ± 868 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", - "82.7 ms ± 585 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" + "9.43 ms ± 1 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", + "1 s ± 179 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] }, { "data": { "text/plain": "array([[[False, False, False, False, False, False, False, False],\n [False, False, False, False, False, False, False, False],\n [False, False, False, True, False, False, False, False],\n [False, False, True, False, False, False, False, False],\n [False, False, False, False, False, True, False, False],\n [False, False, False, False, True, False, False, False],\n [False, False, False, False, False, False, False, False],\n [False, False, False, False, False, False, False, False]]])" }, - "execution_count": 16, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -504,8 +505,11 @@ " Returns:\n", " A stack of game boards containing boolean values showing where turns are possible for the player.\n", " \"\"\"\n", - " assert len(boards.shape) == 3\n", - " assert boards.shape[:2] == (BOARD_SIZE, BOARD_SIZE)\n", + " assert len(boards.shape) == 3, \"The number fo input dimensions does not fit.\"\n", + " assert boards.shape[1:] == (\n", + " BOARD_SIZE,\n", + " BOARD_SIZE,\n", + " ), \"The input dimensions do not fit.\"\n", "\n", " _poss_turns = boards == 0 # checks where fields are empty.\n", " _poss_turns &= binary_dilation(\n", @@ -523,40 +527,31 @@ " return _poss_turns\n", "\n", "\n", + "# some simple testing to ensure the function works after simple changes\n", + "# this testing is complete, its more of a smoke-test\n", + "test_array = get_new_games(3)\n", + "expected_result = np.zeros_like(test_array, dtype=bool)\n", + "expected_result[:, 4, 5] = expected_result[:, 2, 3] = True\n", + "expected_result[:, 5, 4] = expected_result[:, 3, 2] = True\n", + "np.testing.assert_equal(get_possible_turns(test_array), expected_result)\n", + "\n", + "\n", "%timeit get_possible_turns(get_new_games(10)) # checks turn possibility evaluation time for 10 initial games\n", - "%timeit get_possible_turns(get_new_games(100)) # check turn possibility evaluation time for 100 initial games\n", - "get_possible_turns(get_new_games(3))[:1] # shows a singe game" + "%timeit get_possible_turns(get_new_games(EXAMPLE_STACK_SIZE)) # check turn possibility evaluation time for EXAMPLE_STACK_SIZE initial games\n", + "\n", + "# shows a singe game\n", + "get_possible_turns(get_new_games(3))[:1]" ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "cell_type": "markdown", "source": [ - "def board_evaluation_final(boards: np.ndarray) -> tuple[np.ndarray, np.ndarray]:\n", - " score1, score2 = np.sum(boards == 1, axis=(1, 2)), np.sum(boards == -1, axis=(1, 2))\n", - " player_1_won = score1 > score2\n", - " player_2_won = score1 < score2\n", - " score1_final = 64 - score2[player_1_won]\n", - " score2_final = 64 - score1[player_2_won]\n", - " score1[player_1_won] = score1_final\n", - " score2[player_2_won] = score2_final\n", - " return score1, score2\n", - "\n", - "\n", - "def board_evaluation(boards: np.ndarray) -> tuple[np.ndarray, np.ndarray]:\n", - " score1, score2 = np.sum(boards == 1, axis=(1, 2)), np.sum(boards == -1, axis=(1, 2))\n", - " return score1, score2\n", - "\n", - "\n", - "def board_score(boards: np.ndarray) -> tuple[np.ndarray]:\n", - " return np.sign(np.sum(boards, axis=(1, 2)))\n", - "\n", - "\n", - "board_evaluation(get_new_games(3))\n", - "board_evaluation_final(get_new_games(3))" - ] + "Besides the ability to generate an array of possible turns there needs to be a functions that check if a given turn is possible.\n", + "On is needed for the action space validation. The other is for validating a players turn." + ], + "metadata": { + "collapsed": false + } }, { "cell_type": "code", @@ -565,6 +560,17 @@ "outputs": [], "source": [ "def move_possible(board: np.ndarray, move: np.ndarray) -> bool:\n", + " \"\"\"Checks if a turn is possible.\n", + "\n", + " Checks if a turn is possible. If no turn is possible to input array [-1, -1] is expected.\n", + "\n", + " Args:\n", + " board: A board where it should be checkt if a turn is possible.\n", + " move: The move that should be taken. Expected is the index of the filed where a stone should be placed [x, y]. If no placement is possible [-1, -1] is expected as an input.\n", + "\n", + " Returns:\n", + " True if the move is possible\n", + " \"\"\"\n", " if np.all(move == -1):\n", " return not np.any(get_possible_turns(np.reshape(board, (1, 8, 8))))\n", " return any(\n", @@ -572,6 +578,7 @@ " )\n", "\n", "\n", + "# Some testing for this function and the underlying recursive functions that are called.\n", "assert move_possible(get_new_games(1)[0], np.array([2, 3])) is True\n", "assert move_possible(get_new_games(1)[0], np.array([3, 2])) is True\n", "assert move_possible(get_new_games(1)[0], np.array([2, 2])) is False\n", @@ -581,6 +588,91 @@ "assert move_possible(np.ones((8, 8)) * 0, np.array([-1, -1])) is True" ] }, + { + "cell_type": "markdown", + "source": [ + "## Reword functions\n", + "\n", + "For any kind of reinforcement learning is a reword function needed. For otello this would be the final score, the information who won or changes to the score. A combination of those three would also be possible.\n", + "It is probably not be possible to weight the current score to high in a reword function since that would be to close to a classic greedy algorithm. But some influce would increase learning behavior.\n", + "In the next section are all three reword functions implemented to be combined and weight later on as needed." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 24, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "177 µs ± 3.97 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n", + "29.7 µs ± 106 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n", + "31.2 µs ± 269 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n" + ] + } + ], + "source": [ + "def final_boards_evaluation(boards: np.ndarray) -> np.ndarray:\n", + " \"\"\"Evaluates the board at the end of the game.\n", + "\n", + " All unused fields are added to the score of the player that has more stones with his color up.\n", + " This score only applies to the end of the game.\n", + " Normally the score is represented by the number of stones each player has.\n", + " In this case the score was combined by building the difference.\n", + "\n", + " Args:\n", + " boards: A stack of game bords ot the end of the game.\n", + "\n", + " Returns:\n", + " the combined score for both player.\n", + " \"\"\"\n", + " score1, score2 = np.sum(boards == 1, axis=(1, 2)), np.sum(boards == -1, axis=(1, 2))\n", + " player_1_won = score1 > score2\n", + " player_2_won = score1 < score2\n", + " score1_final = 64 - score2[player_1_won]\n", + " score2_final = 64 - score1[player_2_won]\n", + " score1[player_1_won] = score1_final\n", + " score2[player_2_won] = score2_final\n", + " return score1 - score2\n", + "\n", + "\n", + "def evaluate_boards(boards: np.ndarray) -> np.ndarray:\n", + " \"\"\"Counts the stones each player has on the board.\n", + "\n", + " Args:\n", + " boards: A stack of boards for evaluation.\n", + "\n", + " Returns:\n", + " the combined score for both player.\n", + " \"\"\"\n", + " return np.sum(boards, axis=(1, 2))\n", + "\n", + "\n", + "def evaluate_who_won(boards: np.ndarray) -> np.ndarray:\n", + " \"\"\"Checks who won or is winning a game.\n", + "\n", + " Args:\n", + " boards: A stack of boards for evaluation.\n", + "\n", + " Returns:\n", + " The information who won for both player. 1 meaning the player won, -1 means the opponent lost. 0 represents a patt.\n", + " \"\"\"\n", + " return np.sign(np.sum(boards, axis=(1, 2)))\n", + "\n", + "\n", + "_boards = get_new_games(EXAMPLE_STACK_SIZE)\n", + "%timeit final_boards_evaluation(_boards)\n", + "%timeit evaluate_boards(_boards)\n", + "%timeit evaluate_who_won(_boards)" + ], + "metadata": { + "collapsed": false + } + }, { "cell_type": "code", "execution_count": null,