Added a docstring to the function checking if a move is possible.

This commit is contained in:
Philipp Horstenkamp 2023-02-12 18:54:28 +01:00
parent a9e65564c4
commit fc7f9dbb4d
Signed by: Philipp
GPG Key ID: DD53EAC36AFB61B4

View File

@ -152,13 +152,14 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"BOARD_SIZE: Final[int] = 8 # defines the board side length as 8\n",
"PLAYER: Final[int] = 1 # defines the number symbolising the player as 1\n",
"ENEMY: Final[int] = -1 # defines the number symbolising the enemy as -1"
"ENEMY: Final[int] = -1 # defines the number symbolising the enemy as -1\n",
"EXAMPLE_STACK_SIZE: Final[int] = 1000 # defines the game stack size for examples"
]
},
{
@ -450,22 +451,22 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"8.78 ms ± 868 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n",
"82.7 ms ± 585 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
"9.43 ms ± 1 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)\n",
"1 s ± 179 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
},
{
"data": {
"text/plain": "array([[[False, False, False, False, False, False, False, False],\n [False, False, False, False, False, False, False, False],\n [False, False, False, True, False, False, False, False],\n [False, False, True, False, False, False, False, False],\n [False, False, False, False, False, True, False, False],\n [False, False, False, False, True, False, False, False],\n [False, False, False, False, False, False, False, False],\n [False, False, False, False, False, False, False, False]]])"
},
"execution_count": 16,
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
@ -504,8 +505,11 @@
" Returns:\n",
" A stack of game boards containing boolean values showing where turns are possible for the player.\n",
" \"\"\"\n",
" assert len(boards.shape) == 3\n",
" assert boards.shape[:2] == (BOARD_SIZE, BOARD_SIZE)\n",
" assert len(boards.shape) == 3, \"The number fo input dimensions does not fit.\"\n",
" assert boards.shape[1:] == (\n",
" BOARD_SIZE,\n",
" BOARD_SIZE,\n",
" ), \"The input dimensions do not fit.\"\n",
"\n",
" _poss_turns = boards == 0 # checks where fields are empty.\n",
" _poss_turns &= binary_dilation(\n",
@ -523,40 +527,31 @@
" return _poss_turns\n",
"\n",
"\n",
"# some simple testing to ensure the function works after simple changes\n",
"# this testing is complete, its more of a smoke-test\n",
"test_array = get_new_games(3)\n",
"expected_result = np.zeros_like(test_array, dtype=bool)\n",
"expected_result[:, 4, 5] = expected_result[:, 2, 3] = True\n",
"expected_result[:, 5, 4] = expected_result[:, 3, 2] = True\n",
"np.testing.assert_equal(get_possible_turns(test_array), expected_result)\n",
"\n",
"\n",
"%timeit get_possible_turns(get_new_games(10)) # checks turn possibility evaluation time for 10 initial games\n",
"%timeit get_possible_turns(get_new_games(100)) # check turn possibility evaluation time for 100 initial games\n",
"get_possible_turns(get_new_games(3))[:1] # shows a singe game"
"%timeit get_possible_turns(get_new_games(EXAMPLE_STACK_SIZE)) # check turn possibility evaluation time for EXAMPLE_STACK_SIZE initial games\n",
"\n",
"# shows a singe game\n",
"get_possible_turns(get_new_games(3))[:1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"cell_type": "markdown",
"source": [
"def board_evaluation_final(boards: np.ndarray) -> tuple[np.ndarray, np.ndarray]:\n",
" score1, score2 = np.sum(boards == 1, axis=(1, 2)), np.sum(boards == -1, axis=(1, 2))\n",
" player_1_won = score1 > score2\n",
" player_2_won = score1 < score2\n",
" score1_final = 64 - score2[player_1_won]\n",
" score2_final = 64 - score1[player_2_won]\n",
" score1[player_1_won] = score1_final\n",
" score2[player_2_won] = score2_final\n",
" return score1, score2\n",
"\n",
"\n",
"def board_evaluation(boards: np.ndarray) -> tuple[np.ndarray, np.ndarray]:\n",
" score1, score2 = np.sum(boards == 1, axis=(1, 2)), np.sum(boards == -1, axis=(1, 2))\n",
" return score1, score2\n",
"\n",
"\n",
"def board_score(boards: np.ndarray) -> tuple[np.ndarray]:\n",
" return np.sign(np.sum(boards, axis=(1, 2)))\n",
"\n",
"\n",
"board_evaluation(get_new_games(3))\n",
"board_evaluation_final(get_new_games(3))"
]
"Besides the ability to generate an array of possible turns there needs to be a functions that check if a given turn is possible.\n",
"On is needed for the action space validation. The other is for validating a players turn."
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
@ -565,6 +560,17 @@
"outputs": [],
"source": [
"def move_possible(board: np.ndarray, move: np.ndarray) -> bool:\n",
" \"\"\"Checks if a turn is possible.\n",
"\n",
" Checks if a turn is possible. If no turn is possible to input array [-1, -1] is expected.\n",
"\n",
" Args:\n",
" board: A board where it should be checkt if a turn is possible.\n",
" move: The move that should be taken. Expected is the index of the filed where a stone should be placed [x, y]. If no placement is possible [-1, -1] is expected as an input.\n",
"\n",
" Returns:\n",
" True if the move is possible\n",
" \"\"\"\n",
" if np.all(move == -1):\n",
" return not np.any(get_possible_turns(np.reshape(board, (1, 8, 8))))\n",
" return any(\n",
@ -572,6 +578,7 @@
" )\n",
"\n",
"\n",
"# Some testing for this function and the underlying recursive functions that are called.\n",
"assert move_possible(get_new_games(1)[0], np.array([2, 3])) is True\n",
"assert move_possible(get_new_games(1)[0], np.array([3, 2])) is True\n",
"assert move_possible(get_new_games(1)[0], np.array([2, 2])) is False\n",
@ -581,6 +588,91 @@
"assert move_possible(np.ones((8, 8)) * 0, np.array([-1, -1])) is True"
]
},
{
"cell_type": "markdown",
"source": [
"## Reword functions\n",
"\n",
"For any kind of reinforcement learning is a reword function needed. For otello this would be the final score, the information who won or changes to the score. A combination of those three would also be possible.\n",
"It is probably not be possible to weight the current score to high in a reword function since that would be to close to a classic greedy algorithm. But some influce would increase learning behavior.\n",
"In the next section are all three reword functions implemented to be combined and weight later on as needed."
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 24,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"177 µs ± 3.97 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n",
"29.7 µs ± 106 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n",
"31.2 µs ± 269 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n"
]
}
],
"source": [
"def final_boards_evaluation(boards: np.ndarray) -> np.ndarray:\n",
" \"\"\"Evaluates the board at the end of the game.\n",
"\n",
" All unused fields are added to the score of the player that has more stones with his color up.\n",
" This score only applies to the end of the game.\n",
" Normally the score is represented by the number of stones each player has.\n",
" In this case the score was combined by building the difference.\n",
"\n",
" Args:\n",
" boards: A stack of game bords ot the end of the game.\n",
"\n",
" Returns:\n",
" the combined score for both player.\n",
" \"\"\"\n",
" score1, score2 = np.sum(boards == 1, axis=(1, 2)), np.sum(boards == -1, axis=(1, 2))\n",
" player_1_won = score1 > score2\n",
" player_2_won = score1 < score2\n",
" score1_final = 64 - score2[player_1_won]\n",
" score2_final = 64 - score1[player_2_won]\n",
" score1[player_1_won] = score1_final\n",
" score2[player_2_won] = score2_final\n",
" return score1 - score2\n",
"\n",
"\n",
"def evaluate_boards(boards: np.ndarray) -> np.ndarray:\n",
" \"\"\"Counts the stones each player has on the board.\n",
"\n",
" Args:\n",
" boards: A stack of boards for evaluation.\n",
"\n",
" Returns:\n",
" the combined score for both player.\n",
" \"\"\"\n",
" return np.sum(boards, axis=(1, 2))\n",
"\n",
"\n",
"def evaluate_who_won(boards: np.ndarray) -> np.ndarray:\n",
" \"\"\"Checks who won or is winning a game.\n",
"\n",
" Args:\n",
" boards: A stack of boards for evaluation.\n",
"\n",
" Returns:\n",
" The information who won for both player. 1 meaning the player won, -1 means the opponent lost. 0 represents a patt.\n",
" \"\"\"\n",
" return np.sign(np.sum(boards, axis=(1, 2)))\n",
"\n",
"\n",
"_boards = get_new_games(EXAMPLE_STACK_SIZE)\n",
"%timeit final_boards_evaluation(_boards)\n",
"%timeit evaluate_boards(_boards)\n",
"%timeit evaluate_who_won(_boards)"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,