From fc7f9dbb4d143dbd586f2a62865db6e216163609 Mon Sep 17 00:00:00 2001
From: Philipp Horstenkamp <philipp@horstenkamp.de>
Date: Sun, 12 Feb 2023 18:54:28 +0100
Subject: [PATCH] Added a docstring to the function checking if a move is
 possible.

---
 main.ipynb | 166 +++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 129 insertions(+), 37 deletions(-)

diff --git a/main.ipynb b/main.ipynb
index 51c4e53..d350229 100644
--- a/main.ipynb
+++ b/main.ipynb
@@ -152,13 +152,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [],
    "source": [
     "BOARD_SIZE: Final[int] = 8  # defines the board side length as 8\n",
     "PLAYER: Final[int] = 1  # defines the number symbolising the player as 1\n",
-    "ENEMY: Final[int] = -1  # defines the number symbolising the enemy as -1"
+    "ENEMY: Final[int] = -1  # defines the number symbolising the enemy as -1\n",
+    "EXAMPLE_STACK_SIZE: Final[int] = 1000  # defines the game stack size for examples"
    ]
   },
   {
@@ -450,22 +451,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "8.78 ms ± 868 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n",
-      "82.7 ms ± 585 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
+      "9.43 ms ± 1 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)\n",
+      "1 s ± 179 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
      ]
     },
     {
      "data": {
       "text/plain": "array([[[False, False, False, False, False, False, False, False],\n        [False, False, False, False, False, False, False, False],\n        [False, False, False,  True, False, False, False, False],\n        [False, False,  True, False, False, False, False, False],\n        [False, False, False, False, False,  True, False, False],\n        [False, False, False, False,  True, False, False, False],\n        [False, False, False, False, False, False, False, False],\n        [False, False, False, False, False, False, False, False]]])"
      },
-     "execution_count": 16,
+     "execution_count": 23,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -504,8 +505,11 @@
     "    Returns:\n",
     "        A stack of game boards containing boolean values showing where turns are possible for the player.\n",
     "    \"\"\"\n",
-    "    assert len(boards.shape) == 3\n",
-    "    assert boards.shape[:2] == (BOARD_SIZE, BOARD_SIZE)\n",
+    "    assert len(boards.shape) == 3, \"The number fo input dimensions does not fit.\"\n",
+    "    assert boards.shape[1:] == (\n",
+    "        BOARD_SIZE,\n",
+    "        BOARD_SIZE,\n",
+    "    ), \"The input dimensions do not fit.\"\n",
     "\n",
     "    _poss_turns = boards == 0  # checks where fields are empty.\n",
     "    _poss_turns &= binary_dilation(\n",
@@ -523,40 +527,31 @@
     "    return _poss_turns\n",
     "\n",
     "\n",
+    "# some simple testing to ensure the function works after simple changes\n",
+    "# this testing is complete, its more of a smoke-test\n",
+    "test_array = get_new_games(3)\n",
+    "expected_result = np.zeros_like(test_array, dtype=bool)\n",
+    "expected_result[:, 4, 5] = expected_result[:, 2, 3] = True\n",
+    "expected_result[:, 5, 4] = expected_result[:, 3, 2] = True\n",
+    "np.testing.assert_equal(get_possible_turns(test_array), expected_result)\n",
+    "\n",
+    "\n",
     "%timeit get_possible_turns(get_new_games(10))  # checks turn possibility evaluation time for 10 initial games\n",
-    "%timeit get_possible_turns(get_new_games(100))  # check turn possibility evaluation time for 100 initial games\n",
-    "get_possible_turns(get_new_games(3))[:1]  # shows a singe game"
+    "%timeit get_possible_turns(get_new_games(EXAMPLE_STACK_SIZE))  # check turn possibility evaluation time for EXAMPLE_STACK_SIZE initial games\n",
+    "\n",
+    "# shows a singe game\n",
+    "get_possible_turns(get_new_games(3))[:1]"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "cell_type": "markdown",
    "source": [
-    "def board_evaluation_final(boards: np.ndarray) -> tuple[np.ndarray, np.ndarray]:\n",
-    "    score1, score2 = np.sum(boards == 1, axis=(1, 2)), np.sum(boards == -1, axis=(1, 2))\n",
-    "    player_1_won = score1 > score2\n",
-    "    player_2_won = score1 < score2\n",
-    "    score1_final = 64 - score2[player_1_won]\n",
-    "    score2_final = 64 - score1[player_2_won]\n",
-    "    score1[player_1_won] = score1_final\n",
-    "    score2[player_2_won] = score2_final\n",
-    "    return score1, score2\n",
-    "\n",
-    "\n",
-    "def board_evaluation(boards: np.ndarray) -> tuple[np.ndarray, np.ndarray]:\n",
-    "    score1, score2 = np.sum(boards == 1, axis=(1, 2)), np.sum(boards == -1, axis=(1, 2))\n",
-    "    return score1, score2\n",
-    "\n",
-    "\n",
-    "def board_score(boards: np.ndarray) -> tuple[np.ndarray]:\n",
-    "    return np.sign(np.sum(boards, axis=(1, 2)))\n",
-    "\n",
-    "\n",
-    "board_evaluation(get_new_games(3))\n",
-    "board_evaluation_final(get_new_games(3))"
-   ]
+    "Besides the ability to generate an array of possible turns there needs to be a functions that check if a given turn is possible.\n",
+    "On is needed for the action space validation. The other is for validating a players turn."
+   ],
+   "metadata": {
+    "collapsed": false
+   }
   },
   {
    "cell_type": "code",
@@ -565,6 +560,17 @@
    "outputs": [],
    "source": [
     "def move_possible(board: np.ndarray, move: np.ndarray) -> bool:\n",
+    "    \"\"\"Checks if a turn is possible.\n",
+    "\n",
+    "    Checks if a turn is possible. If no turn is possible to input array [-1, -1] is expected.\n",
+    "\n",
+    "    Args:\n",
+    "        board: A board where it should be checkt if a turn is possible.\n",
+    "        move: The move that should be taken. Expected is the index of the filed where a stone should be placed [x, y]. If no placement is possible [-1, -1] is expected as an input.\n",
+    "\n",
+    "    Returns:\n",
+    "        True if the move is possible\n",
+    "    \"\"\"\n",
     "    if np.all(move == -1):\n",
     "        return not np.any(get_possible_turns(np.reshape(board, (1, 8, 8))))\n",
     "    return any(\n",
@@ -572,6 +578,7 @@
     "    )\n",
     "\n",
     "\n",
+    "# Some testing for this function and the underlying recursive functions that are called.\n",
     "assert move_possible(get_new_games(1)[0], np.array([2, 3])) is True\n",
     "assert move_possible(get_new_games(1)[0], np.array([3, 2])) is True\n",
     "assert move_possible(get_new_games(1)[0], np.array([2, 2])) is False\n",
@@ -581,6 +588,91 @@
     "assert move_possible(np.ones((8, 8)) * 0, np.array([-1, -1])) is True"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "## Reword functions\n",
+    "\n",
+    "For any kind of reinforcement learning is a reword function needed. For otello this would be the final score, the information who won or changes to the score. A combination of those three would also be possible.\n",
+    "It is probably not be possible to weight the current score to high in a reword function since that would be to close to a classic greedy algorithm. But some influce would increase learning behavior.\n",
+    "In the next section are all three reword functions implemented to be combined and weight later on as needed."
+   ],
+   "metadata": {
+    "collapsed": false
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "177 µs ± 3.97 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n",
+      "29.7 µs ± 106 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n",
+      "31.2 µs ± 269 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n"
+     ]
+    }
+   ],
+   "source": [
+    "def final_boards_evaluation(boards: np.ndarray) -> np.ndarray:\n",
+    "    \"\"\"Evaluates the board at the end of the game.\n",
+    "\n",
+    "    All unused fields are added to the score of the player that has more stones with his color up.\n",
+    "    This score only applies to the end of the game.\n",
+    "    Normally the score is represented by the number of stones each player has.\n",
+    "    In this case the score was combined by building the difference.\n",
+    "\n",
+    "    Args:\n",
+    "        boards: A stack of game bords ot the end of the game.\n",
+    "\n",
+    "    Returns:\n",
+    "        the combined score for both player.\n",
+    "    \"\"\"\n",
+    "    score1, score2 = np.sum(boards == 1, axis=(1, 2)), np.sum(boards == -1, axis=(1, 2))\n",
+    "    player_1_won = score1 > score2\n",
+    "    player_2_won = score1 < score2\n",
+    "    score1_final = 64 - score2[player_1_won]\n",
+    "    score2_final = 64 - score1[player_2_won]\n",
+    "    score1[player_1_won] = score1_final\n",
+    "    score2[player_2_won] = score2_final\n",
+    "    return score1 - score2\n",
+    "\n",
+    "\n",
+    "def evaluate_boards(boards: np.ndarray) -> np.ndarray:\n",
+    "    \"\"\"Counts the stones each player has on the board.\n",
+    "\n",
+    "    Args:\n",
+    "        boards: A stack of boards for evaluation.\n",
+    "\n",
+    "    Returns:\n",
+    "        the combined score for both player.\n",
+    "    \"\"\"\n",
+    "    return np.sum(boards, axis=(1, 2))\n",
+    "\n",
+    "\n",
+    "def evaluate_who_won(boards: np.ndarray) -> np.ndarray:\n",
+    "    \"\"\"Checks who won or is winning a game.\n",
+    "\n",
+    "    Args:\n",
+    "        boards: A stack of boards for evaluation.\n",
+    "\n",
+    "    Returns:\n",
+    "        The information who won for both player. 1 meaning the player won, -1 means the opponent lost. 0 represents a patt.\n",
+    "    \"\"\"\n",
+    "    return np.sign(np.sum(boards, axis=(1, 2)))\n",
+    "\n",
+    "\n",
+    "_boards = get_new_games(EXAMPLE_STACK_SIZE)\n",
+    "%timeit final_boards_evaluation(_boards)\n",
+    "%timeit evaluate_boards(_boards)\n",
+    "%timeit evaluate_who_won(_boards)"
+   ],
+   "metadata": {
+    "collapsed": false
+   }
+  },
   {
    "cell_type": "code",
    "execution_count": null,