Added a docstring to the function checking if a move is possible.
This commit is contained in:
parent
a9e65564c4
commit
fc7f9dbb4d
166
main.ipynb
166
main.ipynb
@ -152,13 +152,14 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 3,
|
"execution_count": 22,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"BOARD_SIZE: Final[int] = 8 # defines the board side length as 8\n",
|
"BOARD_SIZE: Final[int] = 8 # defines the board side length as 8\n",
|
||||||
"PLAYER: Final[int] = 1 # defines the number symbolising the player as 1\n",
|
"PLAYER: Final[int] = 1 # defines the number symbolising the player as 1\n",
|
||||||
"ENEMY: Final[int] = -1 # defines the number symbolising the enemy as -1"
|
"ENEMY: Final[int] = -1 # defines the number symbolising the enemy as -1\n",
|
||||||
|
"EXAMPLE_STACK_SIZE: Final[int] = 1000 # defines the game stack size for examples"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -450,22 +451,22 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 16,
|
"execution_count": 23,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"8.78 ms ± 868 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n",
|
"9.43 ms ± 1 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)\n",
|
||||||
"82.7 ms ± 585 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
|
"1 s ± 179 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"text/plain": "array([[[False, False, False, False, False, False, False, False],\n [False, False, False, False, False, False, False, False],\n [False, False, False, True, False, False, False, False],\n [False, False, True, False, False, False, False, False],\n [False, False, False, False, False, True, False, False],\n [False, False, False, False, True, False, False, False],\n [False, False, False, False, False, False, False, False],\n [False, False, False, False, False, False, False, False]]])"
|
"text/plain": "array([[[False, False, False, False, False, False, False, False],\n [False, False, False, False, False, False, False, False],\n [False, False, False, True, False, False, False, False],\n [False, False, True, False, False, False, False, False],\n [False, False, False, False, False, True, False, False],\n [False, False, False, False, True, False, False, False],\n [False, False, False, False, False, False, False, False],\n [False, False, False, False, False, False, False, False]]])"
|
||||||
},
|
},
|
||||||
"execution_count": 16,
|
"execution_count": 23,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@ -504,8 +505,11 @@
|
|||||||
" Returns:\n",
|
" Returns:\n",
|
||||||
" A stack of game boards containing boolean values showing where turns are possible for the player.\n",
|
" A stack of game boards containing boolean values showing where turns are possible for the player.\n",
|
||||||
" \"\"\"\n",
|
" \"\"\"\n",
|
||||||
" assert len(boards.shape) == 3\n",
|
" assert len(boards.shape) == 3, \"The number fo input dimensions does not fit.\"\n",
|
||||||
" assert boards.shape[:2] == (BOARD_SIZE, BOARD_SIZE)\n",
|
" assert boards.shape[1:] == (\n",
|
||||||
|
" BOARD_SIZE,\n",
|
||||||
|
" BOARD_SIZE,\n",
|
||||||
|
" ), \"The input dimensions do not fit.\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
" _poss_turns = boards == 0 # checks where fields are empty.\n",
|
" _poss_turns = boards == 0 # checks where fields are empty.\n",
|
||||||
" _poss_turns &= binary_dilation(\n",
|
" _poss_turns &= binary_dilation(\n",
|
||||||
@ -523,40 +527,31 @@
|
|||||||
" return _poss_turns\n",
|
" return _poss_turns\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"# some simple testing to ensure the function works after simple changes\n",
|
||||||
|
"# this testing is complete, its more of a smoke-test\n",
|
||||||
|
"test_array = get_new_games(3)\n",
|
||||||
|
"expected_result = np.zeros_like(test_array, dtype=bool)\n",
|
||||||
|
"expected_result[:, 4, 5] = expected_result[:, 2, 3] = True\n",
|
||||||
|
"expected_result[:, 5, 4] = expected_result[:, 3, 2] = True\n",
|
||||||
|
"np.testing.assert_equal(get_possible_turns(test_array), expected_result)\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
"%timeit get_possible_turns(get_new_games(10)) # checks turn possibility evaluation time for 10 initial games\n",
|
"%timeit get_possible_turns(get_new_games(10)) # checks turn possibility evaluation time for 10 initial games\n",
|
||||||
"%timeit get_possible_turns(get_new_games(100)) # check turn possibility evaluation time for 100 initial games\n",
|
"%timeit get_possible_turns(get_new_games(EXAMPLE_STACK_SIZE)) # check turn possibility evaluation time for EXAMPLE_STACK_SIZE initial games\n",
|
||||||
"get_possible_turns(get_new_games(3))[:1] # shows a singe game"
|
"\n",
|
||||||
|
"# shows a singe game\n",
|
||||||
|
"get_possible_turns(get_new_games(3))[:1]"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "markdown",
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
"source": [
|
||||||
"def board_evaluation_final(boards: np.ndarray) -> tuple[np.ndarray, np.ndarray]:\n",
|
"Besides the ability to generate an array of possible turns there needs to be a functions that check if a given turn is possible.\n",
|
||||||
" score1, score2 = np.sum(boards == 1, axis=(1, 2)), np.sum(boards == -1, axis=(1, 2))\n",
|
"On is needed for the action space validation. The other is for validating a players turn."
|
||||||
" player_1_won = score1 > score2\n",
|
],
|
||||||
" player_2_won = score1 < score2\n",
|
"metadata": {
|
||||||
" score1_final = 64 - score2[player_1_won]\n",
|
"collapsed": false
|
||||||
" score2_final = 64 - score1[player_2_won]\n",
|
}
|
||||||
" score1[player_1_won] = score1_final\n",
|
|
||||||
" score2[player_2_won] = score2_final\n",
|
|
||||||
" return score1, score2\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"def board_evaluation(boards: np.ndarray) -> tuple[np.ndarray, np.ndarray]:\n",
|
|
||||||
" score1, score2 = np.sum(boards == 1, axis=(1, 2)), np.sum(boards == -1, axis=(1, 2))\n",
|
|
||||||
" return score1, score2\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"def board_score(boards: np.ndarray) -> tuple[np.ndarray]:\n",
|
|
||||||
" return np.sign(np.sum(boards, axis=(1, 2)))\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"board_evaluation(get_new_games(3))\n",
|
|
||||||
"board_evaluation_final(get_new_games(3))"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
@ -565,6 +560,17 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"def move_possible(board: np.ndarray, move: np.ndarray) -> bool:\n",
|
"def move_possible(board: np.ndarray, move: np.ndarray) -> bool:\n",
|
||||||
|
" \"\"\"Checks if a turn is possible.\n",
|
||||||
|
"\n",
|
||||||
|
" Checks if a turn is possible. If no turn is possible to input array [-1, -1] is expected.\n",
|
||||||
|
"\n",
|
||||||
|
" Args:\n",
|
||||||
|
" board: A board where it should be checkt if a turn is possible.\n",
|
||||||
|
" move: The move that should be taken. Expected is the index of the filed where a stone should be placed [x, y]. If no placement is possible [-1, -1] is expected as an input.\n",
|
||||||
|
"\n",
|
||||||
|
" Returns:\n",
|
||||||
|
" True if the move is possible\n",
|
||||||
|
" \"\"\"\n",
|
||||||
" if np.all(move == -1):\n",
|
" if np.all(move == -1):\n",
|
||||||
" return not np.any(get_possible_turns(np.reshape(board, (1, 8, 8))))\n",
|
" return not np.any(get_possible_turns(np.reshape(board, (1, 8, 8))))\n",
|
||||||
" return any(\n",
|
" return any(\n",
|
||||||
@ -572,6 +578,7 @@
|
|||||||
" )\n",
|
" )\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"# Some testing for this function and the underlying recursive functions that are called.\n",
|
||||||
"assert move_possible(get_new_games(1)[0], np.array([2, 3])) is True\n",
|
"assert move_possible(get_new_games(1)[0], np.array([2, 3])) is True\n",
|
||||||
"assert move_possible(get_new_games(1)[0], np.array([3, 2])) is True\n",
|
"assert move_possible(get_new_games(1)[0], np.array([3, 2])) is True\n",
|
||||||
"assert move_possible(get_new_games(1)[0], np.array([2, 2])) is False\n",
|
"assert move_possible(get_new_games(1)[0], np.array([2, 2])) is False\n",
|
||||||
@ -581,6 +588,91 @@
|
|||||||
"assert move_possible(np.ones((8, 8)) * 0, np.array([-1, -1])) is True"
|
"assert move_possible(np.ones((8, 8)) * 0, np.array([-1, -1])) is True"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"## Reword functions\n",
|
||||||
|
"\n",
|
||||||
|
"For any kind of reinforcement learning is a reword function needed. For otello this would be the final score, the information who won or changes to the score. A combination of those three would also be possible.\n",
|
||||||
|
"It is probably not be possible to weight the current score to high in a reword function since that would be to close to a classic greedy algorithm. But some influce would increase learning behavior.\n",
|
||||||
|
"In the next section are all three reword functions implemented to be combined and weight later on as needed."
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 24,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"177 µs ± 3.97 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n",
|
||||||
|
"29.7 µs ± 106 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n",
|
||||||
|
"31.2 µs ± 269 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"def final_boards_evaluation(boards: np.ndarray) -> np.ndarray:\n",
|
||||||
|
" \"\"\"Evaluates the board at the end of the game.\n",
|
||||||
|
"\n",
|
||||||
|
" All unused fields are added to the score of the player that has more stones with his color up.\n",
|
||||||
|
" This score only applies to the end of the game.\n",
|
||||||
|
" Normally the score is represented by the number of stones each player has.\n",
|
||||||
|
" In this case the score was combined by building the difference.\n",
|
||||||
|
"\n",
|
||||||
|
" Args:\n",
|
||||||
|
" boards: A stack of game bords ot the end of the game.\n",
|
||||||
|
"\n",
|
||||||
|
" Returns:\n",
|
||||||
|
" the combined score for both player.\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" score1, score2 = np.sum(boards == 1, axis=(1, 2)), np.sum(boards == -1, axis=(1, 2))\n",
|
||||||
|
" player_1_won = score1 > score2\n",
|
||||||
|
" player_2_won = score1 < score2\n",
|
||||||
|
" score1_final = 64 - score2[player_1_won]\n",
|
||||||
|
" score2_final = 64 - score1[player_2_won]\n",
|
||||||
|
" score1[player_1_won] = score1_final\n",
|
||||||
|
" score2[player_2_won] = score2_final\n",
|
||||||
|
" return score1 - score2\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"def evaluate_boards(boards: np.ndarray) -> np.ndarray:\n",
|
||||||
|
" \"\"\"Counts the stones each player has on the board.\n",
|
||||||
|
"\n",
|
||||||
|
" Args:\n",
|
||||||
|
" boards: A stack of boards for evaluation.\n",
|
||||||
|
"\n",
|
||||||
|
" Returns:\n",
|
||||||
|
" the combined score for both player.\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" return np.sum(boards, axis=(1, 2))\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"def evaluate_who_won(boards: np.ndarray) -> np.ndarray:\n",
|
||||||
|
" \"\"\"Checks who won or is winning a game.\n",
|
||||||
|
"\n",
|
||||||
|
" Args:\n",
|
||||||
|
" boards: A stack of boards for evaluation.\n",
|
||||||
|
"\n",
|
||||||
|
" Returns:\n",
|
||||||
|
" The information who won for both player. 1 meaning the player won, -1 means the opponent lost. 0 represents a patt.\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" return np.sign(np.sum(boards, axis=(1, 2)))\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"_boards = get_new_games(EXAMPLE_STACK_SIZE)\n",
|
||||||
|
"%timeit final_boards_evaluation(_boards)\n",
|
||||||
|
"%timeit evaluate_boards(_boards)\n",
|
||||||
|
"%timeit evaluate_who_won(_boards)"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false
|
||||||
|
}
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user