Feat/fetch financials (#79)

This commit is contained in:
Tristan Nolde 2023-09-09 17:28:35 +02:00 committed by GitHub
commit 2cd8def200
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 6343 additions and 5129 deletions

View File

@ -18,216 +18,125 @@
},
{
"cell_type": "code",
"execution_count": 32,
"execution_count": 2,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>company</th>\n",
" <th>raw_report</th>\n",
" <th>jahr</th>\n",
" <th>auditors</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2023-07-07</td>\n",
" <td>Atos IT-Dienstleistung und Beratung GmbH</td>\n",
" <td>&lt;div class=\"publication_container\"&gt;\\n &lt;div cla...</td>\n",
" <td>2021</td>\n",
" <td>[]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2023-05-10</td>\n",
" <td>Atos IT-Dienstleistung und Beratung GmbH</td>\n",
" <td>&lt;div class=\"publication_container\"&gt;\\n &lt;div cla...</td>\n",
" <td>2021</td>\n",
" <td>[Auditor(name='Eckhard Lewe', company='Grant T...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2022-03-25</td>\n",
" <td>Atos IT-Dienstleistung und Beratung GmbH</td>\n",
" <td>&lt;div class=\"publication_container\"&gt;\\n &lt;div cla...</td>\n",
" <td>2020</td>\n",
" <td>[Auditor(name='Eckhard Lewe', company='Warth &amp;...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>2021-03-11</td>\n",
" <td>Atos IT-Dienstleistung und Beratung GmbH</td>\n",
" <td>&lt;div class=\"publication_container\"&gt;\\n &lt;div cla...</td>\n",
" <td>2019</td>\n",
" <td>[Auditor(name='Eckhard Lewe', company='Warth &amp;...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>2020-03-24</td>\n",
" <td>Atos IT-Dienstleistung und Beratung GmbH</td>\n",
" <td>&lt;div class=\"publication_container\"&gt;\\n &lt;div cla...</td>\n",
" <td>2018</td>\n",
" <td>[Auditor(name='Ulrich Diersch', company='Warth...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" date company \\\n",
"0 2023-07-07 Atos IT-Dienstleistung und Beratung GmbH \n",
"2 2023-05-10 Atos IT-Dienstleistung und Beratung GmbH \n",
"4 2022-03-25 Atos IT-Dienstleistung und Beratung GmbH \n",
"5 2021-03-11 Atos IT-Dienstleistung und Beratung GmbH \n",
"6 2020-03-24 Atos IT-Dienstleistung und Beratung GmbH \n",
"\n",
" raw_report jahr \\\n",
"0 <div class=\"publication_container\">\\n <div cla... 2021 \n",
"2 <div class=\"publication_container\">\\n <div cla... 2021 \n",
"4 <div class=\"publication_container\">\\n <div cla... 2020 \n",
"5 <div class=\"publication_container\">\\n <div cla... 2019 \n",
"6 <div class=\"publication_container\">\\n <div cla... 2018 \n",
"\n",
" auditors \n",
"0 [] \n",
"2 [Auditor(name='Eckhard Lewe', company='Grant T... \n",
"4 [Auditor(name='Eckhard Lewe', company='Warth &... \n",
"5 [Auditor(name='Eckhard Lewe', company='Warth &... \n",
"6 [Auditor(name='Ulrich Diersch', company='Warth... "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"from deutschland.bundesanzeiger import Bundesanzeiger"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"dict_keys(['c1051233030a8e0232523052fd4a2310', '57d129e6fd7505d567fa13919e5e6bdd'])\n"
]
}
],
"source": [
"ba = Bundesanzeiger()\n",
"reports = ba.get_reports(\n",
" \"Volkswagen Economy Service Erdle Bernhard Erdle GmbH\"\n",
") # \"Atos IT-Dienstleistung und Beratung GmbH\")\n",
"print(reports.keys())"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"report_contents = []\n",
"for key in reports.keys():\n",
" report_contents.append(reports[key])"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>name</th>\n",
" <th>company</th>\n",
" <th>report</th>\n",
" <th>raw_report</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2023-05-25</td>\n",
" <td>Jahresabschluss zum Geschäftsjahr vom 01.01.20...</td>\n",
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
" <td>\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se...</td>\n",
" <td>&lt;div class=\"publication_container\"&gt;\\n &lt;div cla...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2023-05-24</td>\n",
" <td>Jahresabschluss zum Geschäftsjahr vom 01.01.20...</td>\n",
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
" <td>\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se...</td>\n",
" <td>&lt;div class=\"publication_container\"&gt;\\n &lt;div cla...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" date name \\\n",
"0 2023-05-25 Jahresabschluss zum Geschäftsjahr vom 01.01.20... \n",
"1 2023-05-24 Jahresabschluss zum Geschäftsjahr vom 01.01.20... \n",
"\n",
" company \\\n",
"0 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
"1 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
"\n",
" report \\\n",
"0 \\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se... \n",
"1 \\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se... \n",
"\n",
" raw_report \n",
"0 <div class=\"publication_container\">\\n <div cla... \n",
"1 <div class=\"publication_container\">\\n <div cla... "
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_reports = pd.DataFrame(report_contents)\n",
"\n",
"from aki_prj23_transparenzregister.utils.data_extraction.bundesanzeiger import (\n",
" Bundesanzeiger,\n",
")\n",
"\n",
"ba_wrapper = Bundesanzeiger()\n",
"df_reports = ba_wrapper.get_information(\"Atos IT-Dienstleistung und Beratung GmbH\")\n",
"df_reports.head()"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>name</th>\n",
" <th>company</th>\n",
" <th>report</th>\n",
" <th>raw_report</th>\n",
" <th>type</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2023-05-25</td>\n",
" <td>Jahresabschluss zum Geschäftsjahr vom 01.01.20...</td>\n",
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
" <td>\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se...</td>\n",
" <td>&lt;div class=\"publication_container\"&gt;\\n &lt;div cla...</td>\n",
" <td>Jahresabschluss</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2023-05-24</td>\n",
" <td>Jahresabschluss zum Geschäftsjahr vom 01.01.20...</td>\n",
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
" <td>\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se...</td>\n",
" <td>&lt;div class=\"publication_container\"&gt;\\n &lt;div cla...</td>\n",
" <td>Jahresabschluss</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" date name \\\n",
"0 2023-05-25 Jahresabschluss zum Geschäftsjahr vom 01.01.20... \n",
"1 2023-05-24 Jahresabschluss zum Geschäftsjahr vom 01.01.20... \n",
"\n",
" company \\\n",
"0 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
"1 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
"\n",
" report \\\n",
"0 \\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se... \n",
"1 \\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se... \n",
"\n",
" raw_report type \n",
"0 <div class=\"publication_container\">\\n <div cla... Jahresabschluss \n",
"1 <div class=\"publication_container\">\\n <div cla... Jahresabschluss "
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_reports[\"type\"] = df_reports.name.apply(lambda name: name.split(\" \")[0])\n",
"df_reports.head()"
]
},
{
"cell_type": "code",
"execution_count": 37,
"execution_count": 9,
"metadata": {},
"outputs": [
{
@ -260,13 +169,20 @@
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2023-07-11</td>\n",
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
" <td>&lt;div class=\"publication_container\"&gt;\\n &lt;div cla...</td>\n",
" <td>2021</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2023-05-25</td>\n",
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
" <td>&lt;div class=\"publication_container\"&gt;\\n &lt;div cla...</td>\n",
" <td>2020</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <td>2023-05-24</td>\n",
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
" <td>&lt;div class=\"publication_container\"&gt;\\n &lt;div cla...</td>\n",
@ -278,15 +194,17 @@
],
"text/plain": [
" date company \\\n",
"0 2023-05-25 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
"1 2023-05-24 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
"0 2023-07-11 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
"1 2023-05-25 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
"2 2023-05-24 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
"\n",
" raw_report jahr \n",
"0 <div class=\"publication_container\">\\n <div cla... 2020 \n",
"1 <div class=\"publication_container\">\\n <div cla... 2019 "
"0 <div class=\"publication_container\">\\n <div cla... 2021 \n",
"1 <div class=\"publication_container\">\\n <div cla... 2020 \n",
"2 <div class=\"publication_container\">\\n <div cla... 2019 "
]
},
"execution_count": 37,
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
@ -310,7 +228,7 @@
},
{
"cell_type": "code",
"execution_count": 38,
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
@ -320,7 +238,7 @@
},
{
"cell_type": "code",
"execution_count": 39,
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
@ -338,18 +256,12 @@
},
{
"cell_type": "code",
"execution_count": 40,
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"import re\n",
"from dataclasses import dataclass\n",
"\n",
"\n",
"@dataclass\n",
"class Auditor:\n",
" name: str\n",
" company: str\n",
"from aki_prj23_transparenzregister.models.auditor import Auditor\n",
"\n",
"\n",
"def extract_auditor_company(report: str) -> str:\n",
@ -374,7 +286,7 @@
},
{
"cell_type": "code",
"execution_count": 41,
"execution_count": 13,
"metadata": {},
"outputs": [
{
@ -383,7 +295,7 @@
"[]"
]
},
"execution_count": 41,
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
@ -418,16 +330,16 @@
},
{
"cell_type": "code",
"execution_count": 42,
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'net_income': 23484.67, 'equity': 65083.84, 'current_assets': 357613.61}"
"{'net_income': 100238.5, 'equity': 165322.34, 'current_assets': 435344.07}"
]
},
"execution_count": 42,
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
@ -502,7 +414,7 @@
},
{
"cell_type": "code",
"execution_count": 43,
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
@ -518,7 +430,7 @@
},
{
"cell_type": "code",
"execution_count": 46,
"execution_count": 16,
"metadata": {},
"outputs": [
{
@ -526,24 +438,30 @@
"output_type": "stream",
"text": [
"MultiIndex([('Aktiva', 'Unnamed: 0_level_1'),\n",
" ('Aktiva', '31.12.2020 EUR'),\n",
" ('Aktiva', '31.12.2019 EUR')],\n",
" ('Aktiva', '31.12.2021 EUR'),\n",
" ('Aktiva', '31.12.2020 EUR')],\n",
" )\n",
"Aktiva Unnamed: 0_level_1 object\n",
" 31.12.2021 EUR object\n",
" 31.12.2020 EUR object\n",
" 31.12.2019 EUR object\n",
"dtype: object\n",
"MultiIndex([('Passiva', 'Unnamed: 0_level_1'),\n",
" ('Passiva', '31.12.2020 EUR'),\n",
" ('Passiva', '31.12.2019 EUR')],\n",
" ('Passiva', '31.12.2021 EUR'),\n",
" ('Passiva', '31.12.2020 EUR')],\n",
" )\n",
"Passiva Unnamed: 0_level_1 object\n",
" 31.12.2021 EUR object\n",
" 31.12.2020 EUR object\n",
" 31.12.2019 EUR object\n",
"dtype: object\n",
"Index(['Angaben zur Identifikation der Gesellschaft laut Registergericht', 'Angaben zur Identifikation der Gesellschaft laut Registergericht.1'], dtype='object')\n",
"Angaben zur Identifikation der Gesellschaft laut Registergericht object\n",
"Angaben zur Identifikation der Gesellschaft laut Registergericht.1 object\n",
"dtype: object\n",
"MultiIndex([('Kreditentwicklung', 'Unnamed: 0_level_1'),\n",
" ( 'Betrag', 'EUR')],\n",
" )\n",
"Kreditentwicklung Unnamed: 0_level_1 object\n",
"Betrag EUR object\n",
"dtype: object\n"
]
},
@ -553,7 +471,7 @@
"{}"
]
},
"execution_count": 46,
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
@ -574,19 +492,46 @@
},
{
"cell_type": "code",
"execution_count": 45,
"execution_count": 22,
"metadata": {},
"outputs": [
{
"ename": "KeyError",
"evalue": "'Passiva'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32mc:\\Users\\trist\\Documents\\Code\\M.Sc\\aki_prj23_transparenzregister\\Jupyter\\API-tests\\Bundesanzeiger\\notebook.ipynb Cell 21\u001b[0m in \u001b[0;36m1\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/trist/Documents/Code/M.Sc/aki_prj23_transparenzregister/Jupyter/API-tests/Bundesanzeiger/notebook.ipynb#X26sZmlsZQ%3D%3D?line=10'>11</a>\u001b[0m \u001b[39mreturn\u001b[39;00m result\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/trist/Documents/Code/M.Sc/aki_prj23_transparenzregister/Jupyter/API-tests/Bundesanzeiger/notebook.ipynb#X26sZmlsZQ%3D%3D?line=13'>14</a>\u001b[0m bilanz \u001b[39m=\u001b[39m get_bilanz(sample_report)\n\u001b[1;32m---> <a href='vscode-notebook-cell:/c%3A/Users/trist/Documents/Code/M.Sc/aki_prj23_transparenzregister/Jupyter/API-tests/Bundesanzeiger/notebook.ipynb#X26sZmlsZQ%3D%3D?line=14'>15</a>\u001b[0m bilanz[\u001b[39m\"\u001b[39;49m\u001b[39mPassiva\u001b[39;49m\u001b[39m\"\u001b[39;49m]\u001b[39m.\u001b[39mhead()\n",
"\u001b[1;31mKeyError\u001b[0m: 'Passiva'"
]
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: []\n",
"Index: []"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
@ -600,6 +545,8 @@
" StringIO(str(tag.findNext(\"table\", {\"class\": \"std_table\"})))\n",
" )[0]\n",
" result[pos] = pos_results\n",
" else:\n",
" result[pos] = pd.DataFrame([])\n",
" return result\n",
"\n",
"\n",
@ -609,58 +556,25 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Int64Index([0, 1], dtype='int64')\n",
"Index(['Unnamed: 0', 'Anhang', '31.12.2021 TEUR', 'Vorjahr TEUR'], dtype='object')\n",
"Index(['Unnamed: 0', 'Anhang', '2021 TEUR', 'Vorjahr TEUR'], dtype='object')\n",
"Index(['Aufgliederung nach Tätigkeitsbereichen', '2021 TEUR',\n",
" 'Vorjahr TEUR'],\n",
" dtype='object')\n",
"Index(['Aufgliederung nach Inland und Ausland', '2021 TEUR', 'Vorjahr TEUR'], dtype='object')\n",
"Index(['Unnamed: 0', '31.12.2021 TEUR', 'Vorjahr TEUR'], dtype='object')\n",
"Index(['Unnamed: 0', '31.12.2021 TEUR', 'Vorjahr TEUR'], dtype='object')\n",
"Index(['Unnamed: 0', '31.12.2021'], dtype='object')\n",
"Index(['Unnamed: 0', 'TEUR'], dtype='object')\n",
"Index(['Unnamed: 0', 'TEUR'], dtype='object')\n",
"Index(['Unnamed: 0', 'TEUR'], dtype='object')\n",
"Int64Index([0, 1, 2], dtype='int64')\n",
"Index(['Unnamed: 0', 'TEUR'], dtype='object')\n",
"Index(['Unnamed: 0', '31.12.2021 TEUR', 'Vorjahr TEUR'], dtype='object')\n",
"Index(['Unnamed: 0', '2021 Anzahl MA', 'Vorjahr Anzahl MA'], dtype='object')\n",
"MultiIndex([('Art des Geschäfts', 'Unnamed: 0_level_1'),\n",
" ('Art der Beziehung', 'Gesellschafterin TEUR'),\n",
" ('Art der Beziehung', 'Verbundene Unternehmen TEUR')],\n",
"MultiIndex([('Aktiva', 'Unnamed: 0_level_1'),\n",
" ('Aktiva', '31.12.2021 EUR'),\n",
" ('Aktiva', '31.12.2020 EUR')],\n",
" )\n",
"Int64Index([0, 1], dtype='int64')\n",
"MultiIndex([( 'Unnamed: 0_level_0', ...),\n",
" ('Anschaffungs- oder Herstellungskosten', ...),\n",
" ('Anschaffungs- oder Herstellungskosten', ...),\n",
" ('Anschaffungs- oder Herstellungskosten', ...),\n",
" ('Anschaffungs- oder Herstellungskosten', ...)],\n",
"MultiIndex([('Passiva', 'Unnamed: 0_level_1'),\n",
" ('Passiva', '31.12.2021 EUR'),\n",
" ('Passiva', '31.12.2020 EUR')],\n",
" )\n",
"MultiIndex([('Unnamed: 0_level_0', ...),\n",
" ( 'Abschreibungen', ...),\n",
" ( 'Abschreibungen', ...),\n",
" ( 'Abschreibungen', ...),\n",
" ( 'Abschreibungen', ...)],\n",
" )\n",
"MultiIndex([('Unnamed: 0_level_0', 'Unnamed: 0_level_1'),\n",
" ( 'Buchwerte', 'Stand 31.12.2021 EUR'),\n",
" ( 'Buchwerte', 'Stand 31.12.2020 EUR')],\n",
" )\n",
"Index(['Nichtfinanzieller Leistungsindikator', 'Unnamed: 1', '2021', '2020',\n",
" '2019'],\n",
" dtype='object')\n",
"Index(['Gewinn- und Verlustrechnung', '2021 TEUR', 'Vorjahr TEUR',\n",
" 'Veränderung TEUR'],\n",
" dtype='object')\n",
"Index(['Bilanz', '31.12.2021 TEUR', 'Vorjahr TEUR', 'Veränderung TEUR'], dtype='object')\n"
"Index(['Angaben zur Identifikation der Gesellschaft laut Registergericht', 'Angaben zur Identifikation der Gesellschaft laut Registergericht.1'], dtype='object')\n",
"MultiIndex([('Kreditentwicklung', 'Unnamed: 0_level_1'),\n",
" ( 'Betrag', 'EUR')],\n",
" )\n"
]
}
],
@ -698,7 +612,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.7"
"version": "3.11.3"
},
"orig_nbformat": 4
},

10377
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -38,6 +38,7 @@ version = "0.1.0"
SQLAlchemy = {version = "^1.4.46", extras = ["mypy"]}
dash = "^2.11.1"
dash-bootstrap-components = "^1.4.2"
deutschland = {git = "https://github.com/TrisNol/deutschland.git", branch = "hotfix/python-3.11-support"}
loguru = "^0.7.0"
matplotlib = "^3.7.1"
plotly = "^5.14.1"
@ -48,6 +49,10 @@ seaborn = "^0.12.2"
selenium = "^4.10.0"
tqdm = "^4.65.0"
# TODO Add dependent libraries (i.e., deutshcland, plotly, etc)
[tool.poetry.extras]
ingest = ["selenium"]
[tool.poetry.group.develop.dependencies]
black = {extras = ["jupyter"], version = "^23.3.0"}
jupyterlab = "^4.0.0"

View File

@ -0,0 +1 @@
"""Main applications."""

View File

@ -0,0 +1,63 @@
"""Add financial data to companies."""
import typing
from aki_prj23_transparenzregister.config.config_providers import JsonFileConfigProvider
from aki_prj23_transparenzregister.utils.data_extraction.bundesanzeiger import (
Bundesanzeiger,
)
from aki_prj23_transparenzregister.utils.mongo.company_mongo_service import (
CompanyMongoService,
)
from aki_prj23_transparenzregister.utils.mongo.connector import MongoConnector
def work(company: typing.Any, company_service: CompanyMongoService) -> None:
"""Process company regarding financials.
Args:
company (dict): Company to process
company_service (CompanyMongoService): Interface to Company collection on MongoDB
"""
yearly_results = Bundesanzeiger().get_information(
company["name"], company["location"]["city"]
)
yearly_results_data = {}
for _index, row in yearly_results.iterrows():
yearly_results_data[row.jahr] = {
"auditors": [auditor.to_dict() for auditor in row.auditors],
"financials": row.financial_results,
}
company_service.add_yearly_results(company["_id"], yearly_results_data)
if __name__ == "__main__":
import concurrent.futures
from loguru import logger
config_provider = JsonFileConfigProvider("./secrets.json")
mongo_connector = MongoConnector(config_provider.get_mongo_connection_string())
company_service = CompanyMongoService(mongo_connector)
num_threads = 25
companies = company_service.get_where_no_financial_results()
with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
# Submit tasks for each entry in the list
future_to_entry = {
executor.submit(work, entry, company_service): entry for entry in companies
}
# with tqdm(total=len(companies)) as pbar:
# Wait for all tasks to complete
for future in concurrent.futures.as_completed(future_to_entry):
entry = future_to_entry[future]
logger.info(entry["name"])
try:
# Get the result of the completed task (if needed)
result = future.result()
# pbar.set_description(entry["name"])
# pbar.update(1)
except Exception as e:
logger.error(f"Error processing entry {e}")

View File

@ -0,0 +1,18 @@
"""Auditor model."""
from dataclasses import asdict, dataclass
@dataclass
class Auditor:
"""Auditor."""
name: str
company: str | None
def to_dict(self) -> dict:
"""_summary_.
Returns:
dict: _description_
"""
return asdict(self)

View File

@ -45,19 +45,61 @@ class CompanyRelationship(ABC):
location: Location
class FinancialKPIEnum(Enum):
"""Financial KPI keys."""
# Umsatz || Erlöse
REVENUE = "revenue"
# Jahresüberschuss || Nettoeinkommen
NET_INCOME = "net_income"
# Ebit
EBIT = "ebit"
# Ebitda
EBITDA = "ebitda"
# Bruttogewinn
GROSS_PROFIT = "gross_profit"
# Betriebsgewinn
OPERATING_PROFIT = "operating_profit"
# Bilanzsumme
ASSETS = "assets"
# Gesamtverbindlichkeiten
LIABILITIES = "liabilities"
# Eigenkapital
EQUITY = "equity"
# Umlaufvermögen
CURRENT_ASSETS = "current_assets"
# Kurzfristige Verbindlichkeiten
CURRENT_LIABILITIES = "current_liabilities"
# Langfristige Verbindlichkeiten
LONG_TERM_DEBT = "long_term_debt"
# Kurzfristige Verbindlichkeiten
SHORT_TERM_DEBT = "short_term_debt"
# Barmittel
CASH_AND_CASH_EQUIVALENTS = "cash_and_cash_equivalents"
# Dividende
DIVIDENDS = "dividends"
# Cash Flow
CASH_FLOW = "cash_flow"
@dataclass
class YearlyResult:
"""Company yearly result."""
year: int
kpis: dict[FinancialKPIEnum, float]
@dataclass
class Company:
"""_summary_.
Returns:
_type_: _description_
"""
"""Company dataclass."""
id: CompanyID
location: Location
name: str
last_update: str
relationships: list[CompanyRelationship]
# yearly_results: list[FinancialResults]
def to_dict(self) -> dict:
"""_summary_.

View File

@ -0,0 +1 @@
"""Everything regarding data extraction from various sources."""

View File

@ -0,0 +1,183 @@
"""Fetch data from Bundesanzeiger."""
import re
import pandas as pd
from bs4 import BeautifulSoup
from deutschland.bundesanzeiger import Bundesanzeiger as Ba
from aki_prj23_transparenzregister.models.auditor import Auditor
from aki_prj23_transparenzregister.models.company import FinancialKPIEnum
pd.options.mode.chained_assignment = None # type: ignore
class Bundesanzeiger:
"""Bundesanzeiger wrapper to export relevant information."""
def get_information(self, company_name: str, city: str | None) -> pd.DataFrame:
"""Extract relevant information from all found yearly results for the given company.
Args:
company_name (str): Name of the company to search for
city (Optional[str]): City where the company is registered
Returns:
pd.DataFrame: Result
"""
ba = Ba()
# Get Bundesanzeiger entries for company
reports = ba.get_reports(f"{company_name} {city}")
# Transform to list of data
report_contents = []
for key in reports:
report_contents.append(reports[key])
if len(report_contents) == 0:
return pd.DataFrame()
# Transform to DataFrame and filter out irrelevant entries
df_data = pd.DataFrame(report_contents)
df_data = self.filter_reports(df_data)
# Filter out entries of different companies
df_data = df_data.loc[df_data.company == company_name]
# Add Auditor information
df_data["auditors"] = df_data.raw_report.apply(self.extract_auditors)
# Add Financial information
df_data["financial_results"] = df_data.raw_report.apply(
self.extract_financial_results
)
# Remove irrelevant columns
return df_data.drop(["raw_report"], axis=1)
def filter_reports(self, df_reports: pd.DataFrame) -> pd.DataFrame:
"""Returns only reports of type `Jahresabschluss` and extracts the year of the report.
Args:
df_reports (pd.DataFrame): DataFrame containing list of reports
Returns:
pd.DataFrame: Filtered and pruned DataFrame
"""
df_reports["type"] = df_reports.name.apply(lambda name: name.split(" ")[0])
df_reports = df_reports.loc[df_reports.type == "Jahresabschluss"]
df_reports["jahr"] = df_reports.name.apply(
lambda name: name.split(" ")[-1].split(".")[-1]
)
return df_reports.drop(["name", "report", "type"], axis=1)
def extract_auditor_company(self, report: str) -> str | None:
"""Extract the name of an auditor company from the given yearly results report.
Args:
report (str): Yearly results report as raw string
Returns:
str | None: Name of the auditor company if found, otherwise None
"""
soup = BeautifulSoup(report, features="html.parser")
temp = soup.find_all("b")
for elem in temp:
br = elem.findChildren("br")
if len(br) > 0:
return elem.text.split("\n")[1].strip()
return None
def extract_auditors(self, report: str) -> list:
"""Find the list of auditors involved in the given yearly results report.
Args:
report (str): Yearly results report as raw string
Returns:
list[Auditor]: List of Auditors found in the given report
"""
auditor_company = self.extract_auditor_company(report)
auditor_regex = r"[a-z A-Z,.'-]+, Wirtschaftsprüfer"
hits = re.findall(auditor_regex, report)
return [
Auditor(hit.replace(", Wirtschaftsprüfer", "").lstrip(), auditor_company)
for hit in hits
]
def __extract_kpis__(self, report: str) -> dict:
"""Source: https://github.com/bundesAPI/deutschland/pull/87/files#diff-f5b9db5384cf523fcc677056065041e7793bfc4da9cf74c4eebd6fab732739bd.
Extracts Key Performance Indicators (KPIs) from the financial reports.
Args:
report (str): The yearly report as a parsed string
Returns:
dict: A dictionary containing the extracted KPIs with their report hash as keys and KPIs as values.
"""
kpis = {}
# Define KPI patterns to search for
kpi_patterns = {
FinancialKPIEnum.REVENUE.value: r"(?:revenue|umsatz|erlöse)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.NET_INCOME.value: r"(?:net income|jahresüberschuss|nettoeinkommen|Ergebnis nach Steuern)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.EBIT.value: r"(?:ebit|operating income)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.EBITDA.value: r"(?:ebitda)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.GROSS_PROFIT.value: r"(?:gross profit|bruttogewinn)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.OPERATING_PROFIT.value: r"(?:operating profit|betriebsgewinn)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.ASSETS.value: r"(?:total assets|bilanzsumme)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.LIABILITIES.value: r"(?:total liabilities|gesamtverbindlichkeiten)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.EQUITY.value: r"(?:shareholders'? equity|eigenkapital)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.CURRENT_ASSETS.value: r"(?:current assets|umlaufvermögen)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.CURRENT_LIABILITIES.value: r"(?:current liabilities|kurzfristige verbindlichkeiten)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.LONG_TERM_DEBT.value: r"(?:long[-\s]?term debt|langfristige verbindlichkeiten)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.SHORT_TERM_DEBT.value: r"(?:short[-\s]?term debt|kurzfristige verbindlichkeiten)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.CASH_AND_CASH_EQUIVALENTS.value: r"(?:cash (?:and cash equivalents)?|barmittel)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.DIVIDENDS.value: r"(?:dividends?|dividende)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.CASH_FLOW.value: r"(?:cash flow|cashflow|cash flow from operating activities)[:\s]*([\d,.]+[mmb]?)",
}
for kpi, pattern in kpi_patterns.items():
match = re.search(pattern, report, flags=re.IGNORECASE | re.UNICODE)
if match:
value = match.group(1)
# Clean and validate the extracted number
try:
if not value: # Check if value is empty
cleaned_value = None
else:
multiplier = 1
if value[-1].lower() == "m":
value = value[:-1]
multiplier = 1_000_000
elif value[-1].lower() == "b":
value = value[:-1]
multiplier = 1_000_000_000
# Remove commas after checking for multipliers
value = value.replace(".", "").replace(",", ".").strip()
cleaned_value = float(value) * multiplier
except ValueError:
cleaned_value = None
if cleaned_value is not None:
kpis[kpi] = cleaned_value
return kpis
def extract_financial_results(self, report: str) -> dict:
"""Extract financial data from given report.
Args:
report (str): Report to be analyzed
Returns:
dict: Results
"""
report_parsed = (
BeautifulSoup(report, features="html.parser").get_text().replace("\n", " ")
)
return self.__extract_kpis__(report_parsed)
if __name__ == "__main__":
ba_wrapper = Bundesanzeiger()
ba_wrapper.get_information("Atos IT-Dienstleistung und Beratung GmbH", None)

View File

@ -1,7 +1,10 @@
"""CompanyMongoService."""
from pymongo.results import InsertOneResult
from threading import Lock
from aki_prj23_transparenzregister.models.company import Company, CompanyID
from bson.objectid import ObjectId
from pymongo.results import InsertOneResult, UpdateResult
from aki_prj23_transparenzregister.models.company import Company
from aki_prj23_transparenzregister.utils.mongo.connector import MongoConnector
@ -15,6 +18,7 @@ class CompanyMongoService:
connector (MongoConnector): _description_
"""
self.collection = connector.database["companies"]
self.lock = Lock() # Create a lock for synchronization
def get_all(self) -> list[Company]:
"""_summary_.
@ -22,10 +26,11 @@ class CompanyMongoService:
Returns:
list[Company]: _description_
"""
result = self.collection.find()
return list(result)
with self.lock:
result = self.collection.find()
return list(result)
def get_by_id(self, id: CompanyID) -> Company | None:
def get_by_id(self, id: str) -> Company | None:
"""_summary_.
Args:
@ -34,10 +39,46 @@ class CompanyMongoService:
Returns:
Company | None: _description_
"""
result = list(self.collection.find({"id": id}))
if len(result) == 1:
return result[0]
return None
with self.lock:
result = list(self.collection.find({"id": id}))
if len(result) == 1:
return result[0]
return None
def get_by_object_id(self, _id: str) -> dict | None:
"""Find an object by given _id.
Args:
_id (str): ID
Returns:
Company | None: Entry if found, otherwise None
"""
with self.lock:
result = list(self.collection.find({"_id": ObjectId(_id)}))
if len(result) == 1:
return result[0]
return None
def get_where_no_financial_results(self) -> list[dict]:
"""Get all entries that have no yearly_results.
Returns:
list[dict]: List of companies found
"""
with self.lock:
return list(
self.collection.find({"$or": [{"yearly_results": {"$exists": False}}]})
)
def get_where_yearly_results(self) -> list[dict]:
"""Get a list of all companies with valid yearly_results (interesting entries for data loader).
Returns:
list[dict]: List of companies
"""
with self.lock:
return list(self.collection.find({"yearly_results": {"$gt": {}}}))
def insert(self, company: Company) -> InsertOneResult:
"""_summary_.
@ -48,4 +89,20 @@ class CompanyMongoService:
Returns:
_type_: _description_
"""
return self.collection.insert_one(company.to_dict())
with self.lock:
return self.collection.insert_one(company.to_dict())
def add_yearly_results(self, _id: str, yearly_results: dict) -> UpdateResult:
"""Add the `yearly_results` field to a Company entry.
Args:
_id (str): ID of the object
yearly_results (dict): Yearly results dictionary
Returns:
UpdateResult: Result
"""
with self.lock:
return self.collection.update_one(
{"_id": ObjectId(_id)}, {"$set": {"yearly_results": yearly_results}}
)

View File

@ -0,0 +1,36 @@
"""Tests for the enrich_company_financials module."""
from unittest.mock import Mock, patch
import pandas as pd
from aki_prj23_transparenzregister.apps import enrich_company_financials
from aki_prj23_transparenzregister.models.auditor import Auditor
def test_import_enrich_company_financials() -> None:
"""Testing if the enrich_company_financials can be imported."""
assert enrich_company_financials
@patch(
"aki_prj23_transparenzregister.apps.enrich_company_financials.Bundesanzeiger.get_information"
)
@patch(
"aki_prj23_transparenzregister.apps.enrich_company_financials.CompanyMongoService"
)
def test_work(mock_compnay_service: Mock, mock_bundesanzeiger: Mock) -> None:
mock_bundesanzeiger.return_value = pd.DataFrame(
[
{
"jahr": "2042",
"auditors": [Auditor(name="", company="")],
"financial_results": [],
}
]
)
# mock_compnay_service.add_yearly_resreturn_value
enrich_company_financials.work(
{"_id": "", "name": "ABC AG", "location": {"city": "Haltern am See"}},
mock_compnay_service,
)
assert enrich_company_financials

View File

@ -0,0 +1 @@
"""Tests for data_extraction."""

View File

@ -0,0 +1,111 @@
from unittest.mock import Mock, patch
import pandas as pd
from aki_prj23_transparenzregister.models.company import FinancialKPIEnum
from aki_prj23_transparenzregister.utils.data_extraction.bundesanzeiger import (
Bundesanzeiger,
)
def test_extract_auditor_company_no_hits() -> None:
input_data = """
<b>
Nothing to see here
</b>
"""
ba = Bundesanzeiger()
result = ba.extract_auditor_company(input_data)
assert result is None
def test_extract_auditor_company() -> None:
company_name = "Korrupte Wirtschaftsprüfer GmbH & Co. KG"
input_data = f"""
<b>
{company_name}
<br>
Max Mustermann
</b>
"""
ba = Bundesanzeiger()
result = ba.extract_auditor_company(input_data)
assert result == company_name
def test_extract_kpis() -> None:
input_data = """
Die Prj23_Transparenzregister GmbH erwirtschaftete einen Jahresüberschuss 10.000,43 .
Des Weiteren sanken die Gesamtverbindlichkeiten 42,00
"""
ba = Bundesanzeiger()
result = ba.__extract_kpis__(input_data)
net_income = 10000.43
liabilities = 42.00
assert result[FinancialKPIEnum.NET_INCOME.value] == net_income
assert result[FinancialKPIEnum.LIABILITIES.value] == liabilities
def test_extracct_financial_results() -> None:
input_data = """
<br>
Die Prj23_Transparenzregister GmbH erwirtschaftete einen Jahresüberschuss 10.000,43 .
</br>
<h2>Dies ist ein Platzhalter, der ignoriert werden soll</h2>
<b>Des Weiteren sanken die Gesamtverbindlichkeiten 42,00 </b>
"""
ba = Bundesanzeiger()
result = ba.extract_financial_results(input_data)
net_income = 10000.43
liabilities = 42.00
assert result[FinancialKPIEnum.NET_INCOME.value] == net_income
assert result[FinancialKPIEnum.LIABILITIES.value] == liabilities
def test_filter_reports() -> None:
test_data = [
{"name": "Bedienungsanleitung", "report": "", "raw_report": ""},
{"name": "Jahresabschluss 1998", "report": "", "raw_report": ""},
]
test_df = pd.DataFrame(test_data)
ba = Bundesanzeiger()
result = ba.filter_reports(test_df)
assert len(result) == 1
assert result.iloc[0].jahr == "1998"
@patch(
"aki_prj23_transparenzregister.utils.data_extraction.bundesanzeiger.Ba.get_reports"
)
def test_get_information(mock_bundesanzeiger: Mock) -> None:
mock_bundesanzeiger.return_value = {
"1": {
"name": "Bedienungsanleitung",
"report": "",
"company": "",
"raw_report": "",
},
"2": {
"name": "Jahresabschluss 1998",
"report": "",
"company": "PRJ 23 Transparenzregister GmbH",
"raw_report": "",
},
}
ba = Bundesanzeiger()
result = ba.get_information("PRJ 23 Transparenzregister GmbH", "Iserlohn")
assert len(result) == 1
@patch(
"aki_prj23_transparenzregister.utils.data_extraction.bundesanzeiger.Ba.get_reports"
)
def test_get_information_no_results(mock_bundesanzeiger: Mock) -> None:
mock_bundesanzeiger.return_value = {}
ba = Bundesanzeiger()
result = ba.get_information("PRJ 23 Transparenzregister GmbH", "Iserlohn")
assert len(result) == 0

View File

@ -3,7 +3,7 @@ from unittest.mock import Mock
import pytest
from aki_prj23_transparenzregister.models.company import Company
from aki_prj23_transparenzregister.models.company import Company, CompanyID, Location
from aki_prj23_transparenzregister.utils.mongo.company_mongo_service import (
CompanyMongoService,
)
@ -73,7 +73,7 @@ def test_by_id_no_result(mock_mongo_connector: Mock, mock_collection: Mock) -> N
mock_mongo_connector.database = {"companies": mock_collection}
service = CompanyMongoService(mock_mongo_connector)
mock_collection.find.return_value = []
assert service.get_by_id("Does not exist") is None # type: ignore
assert service.get_by_id("Does not exist") is None
def test_by_id_result(mock_mongo_connector: Mock, mock_collection: Mock) -> None:
@ -87,7 +87,7 @@ def test_by_id_result(mock_mongo_connector: Mock, mock_collection: Mock) -> None
service = CompanyMongoService(mock_mongo_connector)
mock_entry = {"id": "Does exist", "vaue": 42}
mock_collection.find.return_value = [mock_entry]
assert service.get_by_id("Does exist") == mock_entry # type: ignore
assert service.get_by_id("Does exist") == mock_entry
def test_insert(mock_mongo_connector: Mock, mock_collection: Mock) -> None:
@ -101,4 +101,55 @@ def test_insert(mock_mongo_connector: Mock, mock_collection: Mock) -> None:
service = CompanyMongoService(mock_mongo_connector)
mock_result = 42
mock_collection.insert_one.return_value = mock_result
assert service.insert(Company(None, None, "", "", [])) == mock_result # type: ignore
assert (
service.insert(
Company(CompanyID("", ""), Location("Hier und Dort"), "", "", [])
)
== mock_result
)
def test_get_by_object_id_no_result(
mock_mongo_connector: Mock, mock_collection: Mock
) -> None:
mock_mongo_connector.database = {"companies": mock_collection}
service = CompanyMongoService(mock_mongo_connector)
mock_result: list = []
mock_collection.find.return_value = mock_result
assert service.get_by_object_id("649f16a1e198338c3b44299e") is None
def test_get_by_object_id(mock_mongo_connector: Mock, mock_collection: Mock) -> None:
mock_mongo_connector.database = {"companies": mock_collection}
service = CompanyMongoService(mock_mongo_connector)
mock_result: list = [{"_id": "abc", "brille?": "Fielmann", "Hotel?": "Trivago"}]
mock_collection.find.return_value = mock_result
assert service.get_by_object_id("612316a1e198338c3b44299e") == mock_result[0]
def test_get_where_financial_no_results(
mock_mongo_connector: Mock, mock_collection: Mock
) -> None:
mock_mongo_connector.database = {"companies": mock_collection}
service = CompanyMongoService(mock_mongo_connector)
mock_result: list = [{"_id": "abc", "brille?": "Fielmann", "Hotel?": "Trivago"}]
mock_collection.find.return_value = mock_result
assert service.get_where_no_financial_results() == mock_result
def test_get_where_financial_results(
mock_mongo_connector: Mock, mock_collection: Mock
) -> None:
mock_mongo_connector.database = {"companies": mock_collection}
service = CompanyMongoService(mock_mongo_connector)
mock_result: list = [{"_id": "abc", "brille?": "Fielmann", "Hotel?": "Trivago"}]
mock_collection.find.return_value = mock_result
assert service.get_where_yearly_results() == mock_result
def test_add_yearly_reslults(mock_mongo_connector: Mock, mock_collection: Mock) -> None:
mock_mongo_connector.database = {"companies": mock_collection}
service = CompanyMongoService(mock_mongo_connector)
mock_result: list = [{"_id": "abc", "brille?": "Fielmann", "Hotel?": "Trivago"}]
mock_collection.update_one.return_value = mock_result
assert service.add_yearly_results("612316a1e198338c3b44299e", {}) == mock_result