aki_prj23_transparenzregister/Jupyter/connection-counter.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {
    "collapsed": true,
    "ExecuteTime": {
     "end_time": "2023-06-03T01:36:32.345509400Z",
     "start_time": "2023-06-03T01:36:32.332130700Z"
    }
   },
   "outputs": [],
   "source": [
    "from typing import Final\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "outputs": [
    {
     "data": {
      "text/plain": "     Company 1  Connection Weight  Company 2\n0           21                 83         58\n1           37                 88         86\n2           40                  6         83\n3           60                 35          2\n4           11                 22         10\n..         ...                ...        ...\n695         62                 37         11\n696         10                 24         27\n697         97                 40         55\n698         14                 87         66\n699         50                 55         82\n\n[693 rows x 3 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Company 1</th>\n      <th>Connection Weight</th>\n      <th>Company 2</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>21</td>\n      <td>83</td>\n      <td>58</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>37</td>\n      <td>88</td>\n      <td>86</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>40</td>\n      <td>6</td>\n      <td>83</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>60</td>\n      <td>35</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>11</td>\n      <td>22</td>\n      <td>10</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>695</th>\n      <td>62</td>\n      <td>37</td>\n      <td>11</td>\n    </tr>\n    <tr>\n      <th>696</th>\n      <td>10</td>\n      <td>24</td>\n      <td>27</td>\n    </tr>\n    <tr>\n      <th>697</th>\n      <td>97</td>\n      <td>40</td>\n      <td>55</td>\n    </tr>\n    <tr>\n      <th>698</th>\n      <td>14</td>\n      <td>87</td>\n      <td>66</td>\n    </tr>\n    <tr>\n      <th>699</th>\n      <td>50</td>\n      <td>55</td>\n      <td>82</td>\n    </tr>\n  </tbody>\n</table>\n<p>693 rows × 3 columns</p>\n</div>"
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from typing import Final\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "number_of_entries = 100\n",
    "number_of_contacts = 10\n",
    "ids: Final = [_ for _ in range(number_of_entries)]\n",
    "companies = pd.DataFrame(columns=[], index=pd.Index(ids, name=\"company_id\"))\n",
    "companies\n",
    "\n",
    "\n",
    "id1 = (\n",
    "    pd.Series(ids * number_of_contacts, name=\"Company 1\")\n",
    "    .sample(frac=0.7, random_state=42)\n",
    "    .reset_index(drop=True)\n",
    ")\n",
    "id2 = (\n",
    "    pd.Series(ids * number_of_contacts, name=\"Company 2\")\n",
    "    .sample(frac=0.7, random_state=43)\n",
    "    .reset_index(drop=True)\n",
    ")\n",
    "connections = (\n",
    "    pd.DataFrame(\n",
    "        [\n",
    "            id1,\n",
    "            pd.Series(\n",
    "                np.random.randint(0, 100, size=(max(len(id1), len(id2)))),\n",
    "                name=\"Connection Weight\",\n",
    "            ),\n",
    "            id2,\n",
    "        ]\n",
    "    )\n",
    "    .T.dropna()\n",
    "    .astype(int)\n",
    ")\n",
    "connections = connections.loc[(connections[\"Company 1\"] != connections[\"Company 2\"])]\n",
    "connections"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-06-03T10:15:42.647508100Z",
     "start_time": "2023-06-03T10:15:40.656713900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "outputs": [
    {
     "data": {
      "text/plain": "     Company 1  Connection Weight  Company 2\n0           21                 36         58\n1           37                 59         86\n2           40                 26         83\n3           60                 21          2\n4           11                  2         10\n..         ...                ...        ...\n695         62                 45         11\n696         10                 64         27\n697         97                 24         55\n698         14                 51         66\n699         50                 93         82\n\n[693 rows x 3 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Company 1</th>\n      <th>Connection Weight</th>\n      <th>Company 2</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>21</td>\n      <td>36</td>\n      <td>58</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>37</td>\n      <td>59</td>\n      <td>86</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>40</td>\n      <td>26</td>\n      <td>83</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>60</td>\n      <td>21</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>11</td>\n      <td>2</td>\n      <td>10</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>695</th>\n      <td>62</td>\n      <td>45</td>\n      <td>11</td>\n    </tr>\n    <tr>\n      <th>696</th>\n      <td>10</td>\n      <td>64</td>\n      <td>27</td>\n    </tr>\n    <tr>\n      <th>697</th>\n      <td>97</td>\n      <td>24</td>\n      <td>55</td>\n    </tr>\n    <tr>\n      <th>698</th>\n      <td>14</td>\n      <td>51</td>\n      <td>66</td>\n    </tr>\n    <tr>\n      <th>699</th>\n      <td>50</td>\n      <td>93</td>\n      <td>82</td>\n    </tr>\n  </tbody>\n</table>\n<p>693 rows × 3 columns</p>\n</div>"
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "id1 = (\n",
    "    pd.Series(ids * number_of_contacts, name=\"Company 1\")\n",
    "    .sample(frac=0.7, random_state=42)\n",
    "    .reset_index(drop=True)\n",
    ")\n",
    "id2 = (\n",
    "    pd.Series(ids * number_of_contacts, name=\"Company 2\")\n",
    "    .sample(frac=0.7, random_state=43)\n",
    "    .reset_index(drop=True)\n",
    ")\n",
    "connections = (\n",
    "    pd.DataFrame(\n",
    "        [\n",
    "            id1,\n",
    "            pd.Series(\n",
    "                np.random.randint(0, 100, size=(max(len(id1), len(id2)))),\n",
    "                name=\"Connection Weight\",\n",
    "            ),\n",
    "            id2,\n",
    "        ]\n",
    "    )\n",
    "    .T.dropna()\n",
    "    .astype(int)\n",
    ")\n",
    "connections = connections.loc[(connections[\"Company 1\"] != connections[\"Company 2\"])]\n",
    "connections"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-06-03T01:40:08.441882700Z",
     "start_time": "2023-06-03T01:40:08.406876900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "outputs": [
    {
     "data": {
      "text/plain": "           Company 2\nCompany 1           \n0                  6\n1                  6\n2                  5\n3                  9\n4                  7\n...              ...\n95                 7\n96                 8\n97                 7\n98                 6\n99                 8\n\n[100 rows x 1 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Company 2</th>\n    </tr>\n    <tr>\n      <th>Company 1</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>6</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>6</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>5</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>9</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>7</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>95</th>\n      <td>7</td>\n    </tr>\n    <tr>\n      <th>96</th>\n      <td>8</td>\n    </tr>\n    <tr>\n      <th>97</th>\n      <td>7</td>\n    </tr>\n    <tr>\n      <th>98</th>\n      <td>6</td>\n    </tr>\n    <tr>\n      <th>99</th>\n      <td>8</td>\n    </tr>\n  </tbody>\n</table>\n<p>100 rows × 1 columns</p>\n</div>"
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "connections[[\"Company 1\", \"Company 2\"]].groupby(\"Company 1\").count()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-06-03T01:44:23.433333600Z",
     "start_time": "2023-06-03T01:44:23.424841700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "outputs": [
    {
     "data": {
      "text/plain": "            Analysis-d0  Analysis-d1\ncompany_id                          \n0                     1            6\n1                     1            6\n2                     1            5\n3                     1            9\n4                     1            7\n...                 ...          ...\n95                    1            7\n96                    1            8\n97                    1            7\n98                    1            6\n99                    1            8\n\n[100 rows x 2 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Analysis-d0</th>\n      <th>Analysis-d1</th>\n    </tr>\n    <tr>\n      <th>company_id</th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>1</td>\n      <td>6</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1</td>\n      <td>6</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>1</td>\n      <td>5</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>1</td>\n      <td>9</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>1</td>\n      <td>7</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>95</th>\n      <td>1</td>\n      <td>7</td>\n    </tr>\n    <tr>\n      <th>96</th>\n      <td>1</td>\n      <td>8</td>\n    </tr>\n    <tr>\n      <th>97</th>\n      <td>1</td>\n      <td>7</td>\n    </tr>\n    <tr>\n      <th>98</th>\n      <td>1</td>\n      <td>6</td>\n    </tr>\n    <tr>\n      <th>99</th>\n      <td>1</td>\n      <td>8</td>\n    </tr>\n  </tbody>\n</table>\n<p>100 rows × 2 columns</p>\n</div>"
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "companies[\"Analysis-d0\"] = 1\n",
    "companies[\"Analysis-d1\"] = connections[[\"Company 1\", \"Company 2\"]].groupby(\"Company 1\").count()\n",
    "connection_sum = connections.join(connections.set_index(\"Company 2\"), on=)\n",
    "companies[\"Analysis-d1\"] = connections[[\"Company 1\", \"Company 2\"]].groupby(\"Company 1\").count()\n",
    "# for tiers in range(5):\n",
    "companies"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-06-03T01:43:25.341850700Z",
     "start_time": "2023-06-03T01:43:25.318015500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "companies"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2023-06-03T01:36:32.382091200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2023-06-03T01:36:32.385093700Z"
    }
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}