refactor: Move Auditor dataclass to models

This commit is contained in:
TrisNol
2023-08-18 14:20:34 +02:00
parent 309755383e
commit eb0962e1be
2 changed files with 142 additions and 114 deletions

View File

@ -18,21 +18,9 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 4,
"metadata": {},
"outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'deutschland'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[1], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mpandas\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mpd\u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mdeutschland\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mbundesanzeiger\u001b[39;00m \u001b[39mimport\u001b[39;00m Bundesanzeiger\n",
"\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'deutschland'"
]
}
],
"outputs": [],
"source": [
"import pandas as pd\n",
"from deutschland.bundesanzeiger import Bundesanzeiger"
@ -40,14 +28,14 @@
},
{
"cell_type": "code",
"execution_count": 33,
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"dict_keys(['c1051233030a8e0232523052fd4a2310', '57d129e6fd7505d567fa13919e5e6bdd'])\n"
"dict_keys(['7e53c9211957c6a4c17264ab86946c3b', 'c1051233030a8e0232523052fd4a2310', '57d129e6fd7505d567fa13919e5e6bdd'])\n"
]
}
],
@ -61,7 +49,7 @@
},
{
"cell_type": "code",
"execution_count": 34,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
@ -72,7 +60,7 @@
},
{
"cell_type": "code",
"execution_count": 35,
"execution_count": 7,
"metadata": {},
"outputs": [
{
@ -106,7 +94,7 @@
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2023-05-25</td>\n",
" <td>2023-07-11</td>\n",
" <td>Jahresabschluss zum Geschäftsjahr vom 01.01.20...</td>\n",
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
" <td>\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se...</td>\n",
@ -114,6 +102,14 @@
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2023-05-25</td>\n",
" <td>Jahresabschluss zum Geschäftsjahr vom 01.01.20...</td>\n",
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
" <td>\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se...</td>\n",
" <td>&lt;div class=\"publication_container\"&gt;\\n &lt;div cla...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2023-05-24</td>\n",
" <td>Jahresabschluss zum Geschäftsjahr vom 01.01.20...</td>\n",
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
@ -126,23 +122,27 @@
],
"text/plain": [
" date name \\\n",
"0 2023-05-25 Jahresabschluss zum Geschäftsjahr vom 01.01.20... \n",
"1 2023-05-24 Jahresabschluss zum Geschäftsjahr vom 01.01.20... \n",
"0 2023-07-11 Jahresabschluss zum Geschäftsjahr vom 01.01.20... \n",
"1 2023-05-25 Jahresabschluss zum Geschäftsjahr vom 01.01.20... \n",
"2 2023-05-24 Jahresabschluss zum Geschäftsjahr vom 01.01.20... \n",
"\n",
" company \\\n",
"0 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
"1 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
"2 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
"\n",
" report \\\n",
"0 \\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se... \n",
"1 \\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se... \n",
"2 \\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se... \n",
"\n",
" raw_report \n",
"0 <div class=\"publication_container\">\\n <div cla... \n",
"1 <div class=\"publication_container\">\\n <div cla... "
"1 <div class=\"publication_container\">\\n <div cla... \n",
"2 <div class=\"publication_container\">\\n <div cla... "
]
},
"execution_count": 35,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
@ -154,7 +154,7 @@
},
{
"cell_type": "code",
"execution_count": 36,
"execution_count": 8,
"metadata": {},
"outputs": [
{
@ -189,7 +189,7 @@
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2023-05-25</td>\n",
" <td>2023-07-11</td>\n",
" <td>Jahresabschluss zum Geschäftsjahr vom 01.01.20...</td>\n",
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
" <td>\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se...</td>\n",
@ -198,6 +198,15 @@
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2023-05-25</td>\n",
" <td>Jahresabschluss zum Geschäftsjahr vom 01.01.20...</td>\n",
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
" <td>\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se...</td>\n",
" <td>&lt;div class=\"publication_container\"&gt;\\n &lt;div cla...</td>\n",
" <td>Jahresabschluss</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2023-05-24</td>\n",
" <td>Jahresabschluss zum Geschäftsjahr vom 01.01.20...</td>\n",
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
@ -211,23 +220,27 @@
],
"text/plain": [
" date name \\\n",
"0 2023-05-25 Jahresabschluss zum Geschäftsjahr vom 01.01.20... \n",
"1 2023-05-24 Jahresabschluss zum Geschäftsjahr vom 01.01.20... \n",
"0 2023-07-11 Jahresabschluss zum Geschäftsjahr vom 01.01.20... \n",
"1 2023-05-25 Jahresabschluss zum Geschäftsjahr vom 01.01.20... \n",
"2 2023-05-24 Jahresabschluss zum Geschäftsjahr vom 01.01.20... \n",
"\n",
" company \\\n",
"0 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
"1 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
"2 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
"\n",
" report \\\n",
"0 \\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se... \n",
"1 \\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se... \n",
"2 \\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se... \n",
"\n",
" raw_report type \n",
"0 <div class=\"publication_container\">\\n <div cla... Jahresabschluss \n",
"1 <div class=\"publication_container\">\\n <div cla... Jahresabschluss "
"1 <div class=\"publication_container\">\\n <div cla... Jahresabschluss \n",
"2 <div class=\"publication_container\">\\n <div cla... Jahresabschluss "
]
},
"execution_count": 36,
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
@ -239,7 +252,7 @@
},
{
"cell_type": "code",
"execution_count": 37,
"execution_count": 9,
"metadata": {},
"outputs": [
{
@ -272,13 +285,20 @@
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2023-07-11</td>\n",
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
" <td>&lt;div class=\"publication_container\"&gt;\\n &lt;div cla...</td>\n",
" <td>2021</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2023-05-25</td>\n",
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
" <td>&lt;div class=\"publication_container\"&gt;\\n &lt;div cla...</td>\n",
" <td>2020</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <td>2023-05-24</td>\n",
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
" <td>&lt;div class=\"publication_container\"&gt;\\n &lt;div cla...</td>\n",
@ -290,15 +310,17 @@
],
"text/plain": [
" date company \\\n",
"0 2023-05-25 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
"1 2023-05-24 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
"0 2023-07-11 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
"1 2023-05-25 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
"2 2023-05-24 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
"\n",
" raw_report jahr \n",
"0 <div class=\"publication_container\">\\n <div cla... 2020 \n",
"1 <div class=\"publication_container\">\\n <div cla... 2019 "
"0 <div class=\"publication_container\">\\n <div cla... 2021 \n",
"1 <div class=\"publication_container\">\\n <div cla... 2020 \n",
"2 <div class=\"publication_container\">\\n <div cla... 2019 "
]
},
"execution_count": 37,
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
@ -322,7 +344,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
@ -332,7 +354,7 @@
},
{
"cell_type": "code",
"execution_count": 39,
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
@ -350,18 +372,12 @@
},
{
"cell_type": "code",
"execution_count": 40,
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"import re\n",
"from dataclasses import dataclass\n",
"\n",
"\n",
"@dataclass\n",
"class Auditor:\n",
" name: str\n",
" company: str\n",
"from aki_prj23_transparenzregister.models.auditor import Auditor\n",
"\n",
"\n",
"def extract_auditor_company(report: str) -> str:\n",
@ -386,7 +402,7 @@
},
{
"cell_type": "code",
"execution_count": 41,
"execution_count": 13,
"metadata": {},
"outputs": [
{
@ -395,7 +411,7 @@
"[]"
]
},
"execution_count": 41,
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
@ -430,16 +446,16 @@
},
{
"cell_type": "code",
"execution_count": 42,
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'net_income': 23484.67, 'equity': 65083.84, 'current_assets': 357613.61}"
"{'net_income': 100238.5, 'equity': 165322.34, 'current_assets': 435344.07}"
]
},
"execution_count": 42,
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
@ -514,7 +530,7 @@
},
{
"cell_type": "code",
"execution_count": 43,
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
@ -530,7 +546,7 @@
},
{
"cell_type": "code",
"execution_count": 46,
"execution_count": 16,
"metadata": {},
"outputs": [
{
@ -538,24 +554,30 @@
"output_type": "stream",
"text": [
"MultiIndex([('Aktiva', 'Unnamed: 0_level_1'),\n",
" ('Aktiva', '31.12.2020 EUR'),\n",
" ('Aktiva', '31.12.2019 EUR')],\n",
" ('Aktiva', '31.12.2021 EUR'),\n",
" ('Aktiva', '31.12.2020 EUR')],\n",
" )\n",
"Aktiva Unnamed: 0_level_1 object\n",
" 31.12.2021 EUR object\n",
" 31.12.2020 EUR object\n",
" 31.12.2019 EUR object\n",
"dtype: object\n",
"MultiIndex([('Passiva', 'Unnamed: 0_level_1'),\n",
" ('Passiva', '31.12.2020 EUR'),\n",
" ('Passiva', '31.12.2019 EUR')],\n",
" ('Passiva', '31.12.2021 EUR'),\n",
" ('Passiva', '31.12.2020 EUR')],\n",
" )\n",
"Passiva Unnamed: 0_level_1 object\n",
" 31.12.2021 EUR object\n",
" 31.12.2020 EUR object\n",
" 31.12.2019 EUR object\n",
"dtype: object\n",
"Index(['Angaben zur Identifikation der Gesellschaft laut Registergericht', 'Angaben zur Identifikation der Gesellschaft laut Registergericht.1'], dtype='object')\n",
"Angaben zur Identifikation der Gesellschaft laut Registergericht object\n",
"Angaben zur Identifikation der Gesellschaft laut Registergericht.1 object\n",
"dtype: object\n",
"MultiIndex([('Kreditentwicklung', 'Unnamed: 0_level_1'),\n",
" ( 'Betrag', 'EUR')],\n",
" )\n",
"Kreditentwicklung Unnamed: 0_level_1 object\n",
"Betrag EUR object\n",
"dtype: object\n"
]
},
@ -565,7 +587,7 @@
"{}"
]
},
"execution_count": 46,
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
@ -586,19 +608,46 @@
},
{
"cell_type": "code",
"execution_count": 45,
"execution_count": 22,
"metadata": {},
"outputs": [
{
"ename": "KeyError",
"evalue": "'Passiva'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32mc:\\Users\\trist\\Documents\\Code\\M.Sc\\aki_prj23_transparenzregister\\Jupyter\\API-tests\\Bundesanzeiger\\notebook.ipynb Cell 21\u001b[0m in \u001b[0;36m1\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/trist/Documents/Code/M.Sc/aki_prj23_transparenzregister/Jupyter/API-tests/Bundesanzeiger/notebook.ipynb#X26sZmlsZQ%3D%3D?line=10'>11</a>\u001b[0m \u001b[39mreturn\u001b[39;00m result\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/trist/Documents/Code/M.Sc/aki_prj23_transparenzregister/Jupyter/API-tests/Bundesanzeiger/notebook.ipynb#X26sZmlsZQ%3D%3D?line=13'>14</a>\u001b[0m bilanz \u001b[39m=\u001b[39m get_bilanz(sample_report)\n\u001b[1;32m---> <a href='vscode-notebook-cell:/c%3A/Users/trist/Documents/Code/M.Sc/aki_prj23_transparenzregister/Jupyter/API-tests/Bundesanzeiger/notebook.ipynb#X26sZmlsZQ%3D%3D?line=14'>15</a>\u001b[0m bilanz[\u001b[39m\"\u001b[39;49m\u001b[39mPassiva\u001b[39;49m\u001b[39m\"\u001b[39;49m]\u001b[39m.\u001b[39mhead()\n",
"\u001b[1;31mKeyError\u001b[0m: 'Passiva'"
]
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: []\n",
"Index: []"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
@ -612,6 +661,8 @@
" StringIO(str(tag.findNext(\"table\", {\"class\": \"std_table\"})))\n",
" )[0]\n",
" result[pos] = pos_results\n",
" else:\n",
" result[pos] = pd.DataFrame([])\n",
" return result\n",
"\n",
"\n",
@ -621,58 +672,25 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Int64Index([0, 1], dtype='int64')\n",
"Index(['Unnamed: 0', 'Anhang', '31.12.2021 TEUR', 'Vorjahr TEUR'], dtype='object')\n",
"Index(['Unnamed: 0', 'Anhang', '2021 TEUR', 'Vorjahr TEUR'], dtype='object')\n",
"Index(['Aufgliederung nach Tätigkeitsbereichen', '2021 TEUR',\n",
" 'Vorjahr TEUR'],\n",
" dtype='object')\n",
"Index(['Aufgliederung nach Inland und Ausland', '2021 TEUR', 'Vorjahr TEUR'], dtype='object')\n",
"Index(['Unnamed: 0', '31.12.2021 TEUR', 'Vorjahr TEUR'], dtype='object')\n",
"Index(['Unnamed: 0', '31.12.2021 TEUR', 'Vorjahr TEUR'], dtype='object')\n",
"Index(['Unnamed: 0', '31.12.2021'], dtype='object')\n",
"Index(['Unnamed: 0', 'TEUR'], dtype='object')\n",
"Index(['Unnamed: 0', 'TEUR'], dtype='object')\n",
"Index(['Unnamed: 0', 'TEUR'], dtype='object')\n",
"Int64Index([0, 1, 2], dtype='int64')\n",
"Index(['Unnamed: 0', 'TEUR'], dtype='object')\n",
"Index(['Unnamed: 0', '31.12.2021 TEUR', 'Vorjahr TEUR'], dtype='object')\n",
"Index(['Unnamed: 0', '2021 Anzahl MA', 'Vorjahr Anzahl MA'], dtype='object')\n",
"MultiIndex([('Art des Geschäfts', 'Unnamed: 0_level_1'),\n",
" ('Art der Beziehung', 'Gesellschafterin TEUR'),\n",
" ('Art der Beziehung', 'Verbundene Unternehmen TEUR')],\n",
"MultiIndex([('Aktiva', 'Unnamed: 0_level_1'),\n",
" ('Aktiva', '31.12.2021 EUR'),\n",
" ('Aktiva', '31.12.2020 EUR')],\n",
" )\n",
"Int64Index([0, 1], dtype='int64')\n",
"MultiIndex([( 'Unnamed: 0_level_0', ...),\n",
" ('Anschaffungs- oder Herstellungskosten', ...),\n",
" ('Anschaffungs- oder Herstellungskosten', ...),\n",
" ('Anschaffungs- oder Herstellungskosten', ...),\n",
" ('Anschaffungs- oder Herstellungskosten', ...)],\n",
"MultiIndex([('Passiva', 'Unnamed: 0_level_1'),\n",
" ('Passiva', '31.12.2021 EUR'),\n",
" ('Passiva', '31.12.2020 EUR')],\n",
" )\n",
"MultiIndex([('Unnamed: 0_level_0', ...),\n",
" ( 'Abschreibungen', ...),\n",
" ( 'Abschreibungen', ...),\n",
" ( 'Abschreibungen', ...),\n",
" ( 'Abschreibungen', ...)],\n",
" )\n",
"MultiIndex([('Unnamed: 0_level_0', 'Unnamed: 0_level_1'),\n",
" ( 'Buchwerte', 'Stand 31.12.2021 EUR'),\n",
" ( 'Buchwerte', 'Stand 31.12.2020 EUR')],\n",
" )\n",
"Index(['Nichtfinanzieller Leistungsindikator', 'Unnamed: 1', '2021', '2020',\n",
" '2019'],\n",
" dtype='object')\n",
"Index(['Gewinn- und Verlustrechnung', '2021 TEUR', 'Vorjahr TEUR',\n",
" 'Veränderung TEUR'],\n",
" dtype='object')\n",
"Index(['Bilanz', '31.12.2021 TEUR', 'Vorjahr TEUR', 'Veränderung TEUR'], dtype='object')\n"
"Index(['Angaben zur Identifikation der Gesellschaft laut Registergericht', 'Angaben zur Identifikation der Gesellschaft laut Registergericht.1'], dtype='object')\n",
"MultiIndex([('Kreditentwicklung', 'Unnamed: 0_level_1'),\n",
" ( 'Betrag', 'EUR')],\n",
" )\n"
]
}
],