Feat/fetch financials (#79)

This commit is contained in:
Tristan Nolde 2023-09-09 17:28:35 +02:00 committed by GitHub
commit 2cd8def200
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 6343 additions and 5129 deletions

View File

@ -18,216 +18,125 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 32, "execution_count": 2,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>company</th>\n",
" <th>raw_report</th>\n",
" <th>jahr</th>\n",
" <th>auditors</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2023-07-07</td>\n",
" <td>Atos IT-Dienstleistung und Beratung GmbH</td>\n",
" <td>&lt;div class=\"publication_container\"&gt;\\n &lt;div cla...</td>\n",
" <td>2021</td>\n",
" <td>[]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2023-05-10</td>\n",
" <td>Atos IT-Dienstleistung und Beratung GmbH</td>\n",
" <td>&lt;div class=\"publication_container\"&gt;\\n &lt;div cla...</td>\n",
" <td>2021</td>\n",
" <td>[Auditor(name='Eckhard Lewe', company='Grant T...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2022-03-25</td>\n",
" <td>Atos IT-Dienstleistung und Beratung GmbH</td>\n",
" <td>&lt;div class=\"publication_container\"&gt;\\n &lt;div cla...</td>\n",
" <td>2020</td>\n",
" <td>[Auditor(name='Eckhard Lewe', company='Warth &amp;...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>2021-03-11</td>\n",
" <td>Atos IT-Dienstleistung und Beratung GmbH</td>\n",
" <td>&lt;div class=\"publication_container\"&gt;\\n &lt;div cla...</td>\n",
" <td>2019</td>\n",
" <td>[Auditor(name='Eckhard Lewe', company='Warth &amp;...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>2020-03-24</td>\n",
" <td>Atos IT-Dienstleistung und Beratung GmbH</td>\n",
" <td>&lt;div class=\"publication_container\"&gt;\\n &lt;div cla...</td>\n",
" <td>2018</td>\n",
" <td>[Auditor(name='Ulrich Diersch', company='Warth...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" date company \\\n",
"0 2023-07-07 Atos IT-Dienstleistung und Beratung GmbH \n",
"2 2023-05-10 Atos IT-Dienstleistung und Beratung GmbH \n",
"4 2022-03-25 Atos IT-Dienstleistung und Beratung GmbH \n",
"5 2021-03-11 Atos IT-Dienstleistung und Beratung GmbH \n",
"6 2020-03-24 Atos IT-Dienstleistung und Beratung GmbH \n",
"\n",
" raw_report jahr \\\n",
"0 <div class=\"publication_container\">\\n <div cla... 2021 \n",
"2 <div class=\"publication_container\">\\n <div cla... 2021 \n",
"4 <div class=\"publication_container\">\\n <div cla... 2020 \n",
"5 <div class=\"publication_container\">\\n <div cla... 2019 \n",
"6 <div class=\"publication_container\">\\n <div cla... 2018 \n",
"\n",
" auditors \n",
"0 [] \n",
"2 [Auditor(name='Eckhard Lewe', company='Grant T... \n",
"4 [Auditor(name='Eckhard Lewe', company='Warth &... \n",
"5 [Auditor(name='Eckhard Lewe', company='Warth &... \n",
"6 [Auditor(name='Ulrich Diersch', company='Warth... "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"import pandas as pd\n", "import pandas as pd\n",
"from deutschland.bundesanzeiger import Bundesanzeiger" "\n",
] "from aki_prj23_transparenzregister.utils.data_extraction.bundesanzeiger import (\n",
}, " Bundesanzeiger,\n",
{ ")\n",
"cell_type": "code", "\n",
"execution_count": 33, "ba_wrapper = Bundesanzeiger()\n",
"metadata": {}, "df_reports = ba_wrapper.get_information(\"Atos IT-Dienstleistung und Beratung GmbH\")\n",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"dict_keys(['c1051233030a8e0232523052fd4a2310', '57d129e6fd7505d567fa13919e5e6bdd'])\n"
]
}
],
"source": [
"ba = Bundesanzeiger()\n",
"reports = ba.get_reports(\n",
" \"Volkswagen Economy Service Erdle Bernhard Erdle GmbH\"\n",
") # \"Atos IT-Dienstleistung und Beratung GmbH\")\n",
"print(reports.keys())"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"report_contents = []\n",
"for key in reports.keys():\n",
" report_contents.append(reports[key])"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>name</th>\n",
" <th>company</th>\n",
" <th>report</th>\n",
" <th>raw_report</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2023-05-25</td>\n",
" <td>Jahresabschluss zum Geschäftsjahr vom 01.01.20...</td>\n",
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
" <td>\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se...</td>\n",
" <td>&lt;div class=\"publication_container\"&gt;\\n &lt;div cla...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2023-05-24</td>\n",
" <td>Jahresabschluss zum Geschäftsjahr vom 01.01.20...</td>\n",
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
" <td>\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se...</td>\n",
" <td>&lt;div class=\"publication_container\"&gt;\\n &lt;div cla...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" date name \\\n",
"0 2023-05-25 Jahresabschluss zum Geschäftsjahr vom 01.01.20... \n",
"1 2023-05-24 Jahresabschluss zum Geschäftsjahr vom 01.01.20... \n",
"\n",
" company \\\n",
"0 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
"1 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
"\n",
" report \\\n",
"0 \\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se... \n",
"1 \\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se... \n",
"\n",
" raw_report \n",
"0 <div class=\"publication_container\">\\n <div cla... \n",
"1 <div class=\"publication_container\">\\n <div cla... "
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_reports = pd.DataFrame(report_contents)\n",
"df_reports.head()" "df_reports.head()"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 36, "execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>name</th>\n",
" <th>company</th>\n",
" <th>report</th>\n",
" <th>raw_report</th>\n",
" <th>type</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2023-05-25</td>\n",
" <td>Jahresabschluss zum Geschäftsjahr vom 01.01.20...</td>\n",
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
" <td>\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se...</td>\n",
" <td>&lt;div class=\"publication_container\"&gt;\\n &lt;div cla...</td>\n",
" <td>Jahresabschluss</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2023-05-24</td>\n",
" <td>Jahresabschluss zum Geschäftsjahr vom 01.01.20...</td>\n",
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
" <td>\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se...</td>\n",
" <td>&lt;div class=\"publication_container\"&gt;\\n &lt;div cla...</td>\n",
" <td>Jahresabschluss</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" date name \\\n",
"0 2023-05-25 Jahresabschluss zum Geschäftsjahr vom 01.01.20... \n",
"1 2023-05-24 Jahresabschluss zum Geschäftsjahr vom 01.01.20... \n",
"\n",
" company \\\n",
"0 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
"1 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
"\n",
" report \\\n",
"0 \\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se... \n",
"1 \\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se... \n",
"\n",
" raw_report type \n",
"0 <div class=\"publication_container\">\\n <div cla... Jahresabschluss \n",
"1 <div class=\"publication_container\">\\n <div cla... Jahresabschluss "
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_reports[\"type\"] = df_reports.name.apply(lambda name: name.split(\" \")[0])\n",
"df_reports.head()"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -260,13 +169,20 @@
" <tbody>\n", " <tbody>\n",
" <tr>\n", " <tr>\n",
" <th>0</th>\n", " <th>0</th>\n",
" <td>2023-07-11</td>\n",
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
" <td>&lt;div class=\"publication_container\"&gt;\\n &lt;div cla...</td>\n",
" <td>2021</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2023-05-25</td>\n", " <td>2023-05-25</td>\n",
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n", " <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
" <td>&lt;div class=\"publication_container\"&gt;\\n &lt;div cla...</td>\n", " <td>&lt;div class=\"publication_container\"&gt;\\n &lt;div cla...</td>\n",
" <td>2020</td>\n", " <td>2020</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1</th>\n", " <th>2</th>\n",
" <td>2023-05-24</td>\n", " <td>2023-05-24</td>\n",
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n", " <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
" <td>&lt;div class=\"publication_container\"&gt;\\n &lt;div cla...</td>\n", " <td>&lt;div class=\"publication_container\"&gt;\\n &lt;div cla...</td>\n",
@ -278,15 +194,17 @@
], ],
"text/plain": [ "text/plain": [
" date company \\\n", " date company \\\n",
"0 2023-05-25 Volkswagen Economy Service Erdle Bernhard Erdl... \n", "0 2023-07-11 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
"1 2023-05-24 Volkswagen Economy Service Erdle Bernhard Erdl... \n", "1 2023-05-25 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
"2 2023-05-24 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
"\n", "\n",
" raw_report jahr \n", " raw_report jahr \n",
"0 <div class=\"publication_container\">\\n <div cla... 2020 \n", "0 <div class=\"publication_container\">\\n <div cla... 2021 \n",
"1 <div class=\"publication_container\">\\n <div cla... 2019 " "1 <div class=\"publication_container\">\\n <div cla... 2020 \n",
"2 <div class=\"publication_container\">\\n <div cla... 2019 "
] ]
}, },
"execution_count": 37, "execution_count": 9,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -310,7 +228,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 38, "execution_count": 10,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -320,7 +238,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 39, "execution_count": 11,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -338,18 +256,12 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 40, "execution_count": 18,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"import re\n", "import re\n",
"from dataclasses import dataclass\n", "from aki_prj23_transparenzregister.models.auditor import Auditor\n",
"\n",
"\n",
"@dataclass\n",
"class Auditor:\n",
" name: str\n",
" company: str\n",
"\n", "\n",
"\n", "\n",
"def extract_auditor_company(report: str) -> str:\n", "def extract_auditor_company(report: str) -> str:\n",
@ -374,7 +286,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 41, "execution_count": 13,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -383,7 +295,7 @@
"[]" "[]"
] ]
}, },
"execution_count": 41, "execution_count": 13,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -418,16 +330,16 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 42, "execution_count": 14,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"{'net_income': 23484.67, 'equity': 65083.84, 'current_assets': 357613.61}" "{'net_income': 100238.5, 'equity': 165322.34, 'current_assets': 435344.07}"
] ]
}, },
"execution_count": 42, "execution_count": 14,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -502,7 +414,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 43, "execution_count": 15,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -518,7 +430,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 46, "execution_count": 16,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -526,24 +438,30 @@
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"MultiIndex([('Aktiva', 'Unnamed: 0_level_1'),\n", "MultiIndex([('Aktiva', 'Unnamed: 0_level_1'),\n",
" ('Aktiva', '31.12.2020 EUR'),\n", " ('Aktiva', '31.12.2021 EUR'),\n",
" ('Aktiva', '31.12.2019 EUR')],\n", " ('Aktiva', '31.12.2020 EUR')],\n",
" )\n", " )\n",
"Aktiva Unnamed: 0_level_1 object\n", "Aktiva Unnamed: 0_level_1 object\n",
" 31.12.2021 EUR object\n",
" 31.12.2020 EUR object\n", " 31.12.2020 EUR object\n",
" 31.12.2019 EUR object\n",
"dtype: object\n", "dtype: object\n",
"MultiIndex([('Passiva', 'Unnamed: 0_level_1'),\n", "MultiIndex([('Passiva', 'Unnamed: 0_level_1'),\n",
" ('Passiva', '31.12.2020 EUR'),\n", " ('Passiva', '31.12.2021 EUR'),\n",
" ('Passiva', '31.12.2019 EUR')],\n", " ('Passiva', '31.12.2020 EUR')],\n",
" )\n", " )\n",
"Passiva Unnamed: 0_level_1 object\n", "Passiva Unnamed: 0_level_1 object\n",
" 31.12.2021 EUR object\n",
" 31.12.2020 EUR object\n", " 31.12.2020 EUR object\n",
" 31.12.2019 EUR object\n",
"dtype: object\n", "dtype: object\n",
"Index(['Angaben zur Identifikation der Gesellschaft laut Registergericht', 'Angaben zur Identifikation der Gesellschaft laut Registergericht.1'], dtype='object')\n", "Index(['Angaben zur Identifikation der Gesellschaft laut Registergericht', 'Angaben zur Identifikation der Gesellschaft laut Registergericht.1'], dtype='object')\n",
"Angaben zur Identifikation der Gesellschaft laut Registergericht object\n", "Angaben zur Identifikation der Gesellschaft laut Registergericht object\n",
"Angaben zur Identifikation der Gesellschaft laut Registergericht.1 object\n", "Angaben zur Identifikation der Gesellschaft laut Registergericht.1 object\n",
"dtype: object\n",
"MultiIndex([('Kreditentwicklung', 'Unnamed: 0_level_1'),\n",
" ( 'Betrag', 'EUR')],\n",
" )\n",
"Kreditentwicklung Unnamed: 0_level_1 object\n",
"Betrag EUR object\n",
"dtype: object\n" "dtype: object\n"
] ]
}, },
@ -553,7 +471,7 @@
"{}" "{}"
] ]
}, },
"execution_count": 46, "execution_count": 16,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -574,19 +492,46 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 45, "execution_count": 22,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"ename": "KeyError", "data": {
"evalue": "'Passiva'", "text/html": [
"output_type": "error", "<div>\n",
"traceback": [ "<style scoped>\n",
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", " .dataframe tbody tr th:only-of-type {\n",
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", " vertical-align: middle;\n",
"\u001b[1;32mc:\\Users\\trist\\Documents\\Code\\M.Sc\\aki_prj23_transparenzregister\\Jupyter\\API-tests\\Bundesanzeiger\\notebook.ipynb Cell 21\u001b[0m in \u001b[0;36m1\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/trist/Documents/Code/M.Sc/aki_prj23_transparenzregister/Jupyter/API-tests/Bundesanzeiger/notebook.ipynb#X26sZmlsZQ%3D%3D?line=10'>11</a>\u001b[0m \u001b[39mreturn\u001b[39;00m result\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/trist/Documents/Code/M.Sc/aki_prj23_transparenzregister/Jupyter/API-tests/Bundesanzeiger/notebook.ipynb#X26sZmlsZQ%3D%3D?line=13'>14</a>\u001b[0m bilanz \u001b[39m=\u001b[39m get_bilanz(sample_report)\n\u001b[1;32m---> <a href='vscode-notebook-cell:/c%3A/Users/trist/Documents/Code/M.Sc/aki_prj23_transparenzregister/Jupyter/API-tests/Bundesanzeiger/notebook.ipynb#X26sZmlsZQ%3D%3D?line=14'>15</a>\u001b[0m bilanz[\u001b[39m\"\u001b[39;49m\u001b[39mPassiva\u001b[39;49m\u001b[39m\"\u001b[39;49m]\u001b[39m.\u001b[39mhead()\n", " }\n",
"\u001b[1;31mKeyError\u001b[0m: 'Passiva'" "\n",
] " .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: []\n",
"Index: []"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
} }
], ],
"source": [ "source": [
@ -600,6 +545,8 @@
" StringIO(str(tag.findNext(\"table\", {\"class\": \"std_table\"})))\n", " StringIO(str(tag.findNext(\"table\", {\"class\": \"std_table\"})))\n",
" )[0]\n", " )[0]\n",
" result[pos] = pos_results\n", " result[pos] = pos_results\n",
" else:\n",
" result[pos] = pd.DataFrame([])\n",
" return result\n", " return result\n",
"\n", "\n",
"\n", "\n",
@ -609,58 +556,25 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 23,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Int64Index([0, 1], dtype='int64')\n", "MultiIndex([('Aktiva', 'Unnamed: 0_level_1'),\n",
"Index(['Unnamed: 0', 'Anhang', '31.12.2021 TEUR', 'Vorjahr TEUR'], dtype='object')\n", " ('Aktiva', '31.12.2021 EUR'),\n",
"Index(['Unnamed: 0', 'Anhang', '2021 TEUR', 'Vorjahr TEUR'], dtype='object')\n", " ('Aktiva', '31.12.2020 EUR')],\n",
"Index(['Aufgliederung nach Tätigkeitsbereichen', '2021 TEUR',\n",
" 'Vorjahr TEUR'],\n",
" dtype='object')\n",
"Index(['Aufgliederung nach Inland und Ausland', '2021 TEUR', 'Vorjahr TEUR'], dtype='object')\n",
"Index(['Unnamed: 0', '31.12.2021 TEUR', 'Vorjahr TEUR'], dtype='object')\n",
"Index(['Unnamed: 0', '31.12.2021 TEUR', 'Vorjahr TEUR'], dtype='object')\n",
"Index(['Unnamed: 0', '31.12.2021'], dtype='object')\n",
"Index(['Unnamed: 0', 'TEUR'], dtype='object')\n",
"Index(['Unnamed: 0', 'TEUR'], dtype='object')\n",
"Index(['Unnamed: 0', 'TEUR'], dtype='object')\n",
"Int64Index([0, 1, 2], dtype='int64')\n",
"Index(['Unnamed: 0', 'TEUR'], dtype='object')\n",
"Index(['Unnamed: 0', '31.12.2021 TEUR', 'Vorjahr TEUR'], dtype='object')\n",
"Index(['Unnamed: 0', '2021 Anzahl MA', 'Vorjahr Anzahl MA'], dtype='object')\n",
"MultiIndex([('Art des Geschäfts', 'Unnamed: 0_level_1'),\n",
" ('Art der Beziehung', 'Gesellschafterin TEUR'),\n",
" ('Art der Beziehung', 'Verbundene Unternehmen TEUR')],\n",
" )\n", " )\n",
"Int64Index([0, 1], dtype='int64')\n", "MultiIndex([('Passiva', 'Unnamed: 0_level_1'),\n",
"MultiIndex([( 'Unnamed: 0_level_0', ...),\n", " ('Passiva', '31.12.2021 EUR'),\n",
" ('Anschaffungs- oder Herstellungskosten', ...),\n", " ('Passiva', '31.12.2020 EUR')],\n",
" ('Anschaffungs- oder Herstellungskosten', ...),\n",
" ('Anschaffungs- oder Herstellungskosten', ...),\n",
" ('Anschaffungs- oder Herstellungskosten', ...)],\n",
" )\n", " )\n",
"MultiIndex([('Unnamed: 0_level_0', ...),\n", "Index(['Angaben zur Identifikation der Gesellschaft laut Registergericht', 'Angaben zur Identifikation der Gesellschaft laut Registergericht.1'], dtype='object')\n",
" ( 'Abschreibungen', ...),\n", "MultiIndex([('Kreditentwicklung', 'Unnamed: 0_level_1'),\n",
" ( 'Abschreibungen', ...),\n", " ( 'Betrag', 'EUR')],\n",
" ( 'Abschreibungen', ...),\n", " )\n"
" ( 'Abschreibungen', ...)],\n",
" )\n",
"MultiIndex([('Unnamed: 0_level_0', 'Unnamed: 0_level_1'),\n",
" ( 'Buchwerte', 'Stand 31.12.2021 EUR'),\n",
" ( 'Buchwerte', 'Stand 31.12.2020 EUR')],\n",
" )\n",
"Index(['Nichtfinanzieller Leistungsindikator', 'Unnamed: 1', '2021', '2020',\n",
" '2019'],\n",
" dtype='object')\n",
"Index(['Gewinn- und Verlustrechnung', '2021 TEUR', 'Vorjahr TEUR',\n",
" 'Veränderung TEUR'],\n",
" dtype='object')\n",
"Index(['Bilanz', '31.12.2021 TEUR', 'Vorjahr TEUR', 'Veränderung TEUR'], dtype='object')\n"
] ]
} }
], ],
@ -698,7 +612,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.10.7" "version": "3.11.3"
}, },
"orig_nbformat": 4 "orig_nbformat": 4
}, },

1505
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -38,6 +38,7 @@ version = "0.1.0"
SQLAlchemy = {version = "^1.4.46", extras = ["mypy"]} SQLAlchemy = {version = "^1.4.46", extras = ["mypy"]}
dash = "^2.11.1" dash = "^2.11.1"
dash-bootstrap-components = "^1.4.2" dash-bootstrap-components = "^1.4.2"
deutschland = {git = "https://github.com/TrisNol/deutschland.git", branch = "hotfix/python-3.11-support"}
loguru = "^0.7.0" loguru = "^0.7.0"
matplotlib = "^3.7.1" matplotlib = "^3.7.1"
plotly = "^5.14.1" plotly = "^5.14.1"
@ -48,6 +49,10 @@ seaborn = "^0.12.2"
selenium = "^4.10.0" selenium = "^4.10.0"
tqdm = "^4.65.0" tqdm = "^4.65.0"
# TODO Add dependent libraries (i.e., deutshcland, plotly, etc)
[tool.poetry.extras]
ingest = ["selenium"]
[tool.poetry.group.develop.dependencies] [tool.poetry.group.develop.dependencies]
black = {extras = ["jupyter"], version = "^23.3.0"} black = {extras = ["jupyter"], version = "^23.3.0"}
jupyterlab = "^4.0.0" jupyterlab = "^4.0.0"

View File

@ -0,0 +1 @@
"""Main applications."""

View File

@ -0,0 +1,63 @@
"""Add financial data to companies."""
import typing
from aki_prj23_transparenzregister.config.config_providers import JsonFileConfigProvider
from aki_prj23_transparenzregister.utils.data_extraction.bundesanzeiger import (
Bundesanzeiger,
)
from aki_prj23_transparenzregister.utils.mongo.company_mongo_service import (
CompanyMongoService,
)
from aki_prj23_transparenzregister.utils.mongo.connector import MongoConnector
def work(company: typing.Any, company_service: CompanyMongoService) -> None:
"""Process company regarding financials.
Args:
company (dict): Company to process
company_service (CompanyMongoService): Interface to Company collection on MongoDB
"""
yearly_results = Bundesanzeiger().get_information(
company["name"], company["location"]["city"]
)
yearly_results_data = {}
for _index, row in yearly_results.iterrows():
yearly_results_data[row.jahr] = {
"auditors": [auditor.to_dict() for auditor in row.auditors],
"financials": row.financial_results,
}
company_service.add_yearly_results(company["_id"], yearly_results_data)
if __name__ == "__main__":
import concurrent.futures
from loguru import logger
config_provider = JsonFileConfigProvider("./secrets.json")
mongo_connector = MongoConnector(config_provider.get_mongo_connection_string())
company_service = CompanyMongoService(mongo_connector)
num_threads = 25
companies = company_service.get_where_no_financial_results()
with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
# Submit tasks for each entry in the list
future_to_entry = {
executor.submit(work, entry, company_service): entry for entry in companies
}
# with tqdm(total=len(companies)) as pbar:
# Wait for all tasks to complete
for future in concurrent.futures.as_completed(future_to_entry):
entry = future_to_entry[future]
logger.info(entry["name"])
try:
# Get the result of the completed task (if needed)
result = future.result()
# pbar.set_description(entry["name"])
# pbar.update(1)
except Exception as e:
logger.error(f"Error processing entry {e}")

View File

@ -0,0 +1,18 @@
"""Auditor model."""
from dataclasses import asdict, dataclass
@dataclass
class Auditor:
"""Auditor."""
name: str
company: str | None
def to_dict(self) -> dict:
"""_summary_.
Returns:
dict: _description_
"""
return asdict(self)

View File

@ -45,19 +45,61 @@ class CompanyRelationship(ABC):
location: Location location: Location
class FinancialKPIEnum(Enum):
"""Financial KPI keys."""
# Umsatz || Erlöse
REVENUE = "revenue"
# Jahresüberschuss || Nettoeinkommen
NET_INCOME = "net_income"
# Ebit
EBIT = "ebit"
# Ebitda
EBITDA = "ebitda"
# Bruttogewinn
GROSS_PROFIT = "gross_profit"
# Betriebsgewinn
OPERATING_PROFIT = "operating_profit"
# Bilanzsumme
ASSETS = "assets"
# Gesamtverbindlichkeiten
LIABILITIES = "liabilities"
# Eigenkapital
EQUITY = "equity"
# Umlaufvermögen
CURRENT_ASSETS = "current_assets"
# Kurzfristige Verbindlichkeiten
CURRENT_LIABILITIES = "current_liabilities"
# Langfristige Verbindlichkeiten
LONG_TERM_DEBT = "long_term_debt"
# Kurzfristige Verbindlichkeiten
SHORT_TERM_DEBT = "short_term_debt"
# Barmittel
CASH_AND_CASH_EQUIVALENTS = "cash_and_cash_equivalents"
# Dividende
DIVIDENDS = "dividends"
# Cash Flow
CASH_FLOW = "cash_flow"
@dataclass
class YearlyResult:
"""Company yearly result."""
year: int
kpis: dict[FinancialKPIEnum, float]
@dataclass @dataclass
class Company: class Company:
"""_summary_. """Company dataclass."""
Returns:
_type_: _description_
"""
id: CompanyID id: CompanyID
location: Location location: Location
name: str name: str
last_update: str last_update: str
relationships: list[CompanyRelationship] relationships: list[CompanyRelationship]
# yearly_results: list[FinancialResults]
def to_dict(self) -> dict: def to_dict(self) -> dict:
"""_summary_. """_summary_.

View File

@ -0,0 +1 @@
"""Everything regarding data extraction from various sources."""

View File

@ -0,0 +1,183 @@
"""Fetch data from Bundesanzeiger."""
import re
import pandas as pd
from bs4 import BeautifulSoup
from deutschland.bundesanzeiger import Bundesanzeiger as Ba
from aki_prj23_transparenzregister.models.auditor import Auditor
from aki_prj23_transparenzregister.models.company import FinancialKPIEnum
pd.options.mode.chained_assignment = None # type: ignore
class Bundesanzeiger:
"""Bundesanzeiger wrapper to export relevant information."""
def get_information(self, company_name: str, city: str | None) -> pd.DataFrame:
"""Extract relevant information from all found yearly results for the given company.
Args:
company_name (str): Name of the company to search for
city (Optional[str]): City where the company is registered
Returns:
pd.DataFrame: Result
"""
ba = Ba()
# Get Bundesanzeiger entries for company
reports = ba.get_reports(f"{company_name} {city}")
# Transform to list of data
report_contents = []
for key in reports:
report_contents.append(reports[key])
if len(report_contents) == 0:
return pd.DataFrame()
# Transform to DataFrame and filter out irrelevant entries
df_data = pd.DataFrame(report_contents)
df_data = self.filter_reports(df_data)
# Filter out entries of different companies
df_data = df_data.loc[df_data.company == company_name]
# Add Auditor information
df_data["auditors"] = df_data.raw_report.apply(self.extract_auditors)
# Add Financial information
df_data["financial_results"] = df_data.raw_report.apply(
self.extract_financial_results
)
# Remove irrelevant columns
return df_data.drop(["raw_report"], axis=1)
def filter_reports(self, df_reports: pd.DataFrame) -> pd.DataFrame:
"""Returns only reports of type `Jahresabschluss` and extracts the year of the report.
Args:
df_reports (pd.DataFrame): DataFrame containing list of reports
Returns:
pd.DataFrame: Filtered and pruned DataFrame
"""
df_reports["type"] = df_reports.name.apply(lambda name: name.split(" ")[0])
df_reports = df_reports.loc[df_reports.type == "Jahresabschluss"]
df_reports["jahr"] = df_reports.name.apply(
lambda name: name.split(" ")[-1].split(".")[-1]
)
return df_reports.drop(["name", "report", "type"], axis=1)
def extract_auditor_company(self, report: str) -> str | None:
"""Extract the name of an auditor company from the given yearly results report.
Args:
report (str): Yearly results report as raw string
Returns:
str | None: Name of the auditor company if found, otherwise None
"""
soup = BeautifulSoup(report, features="html.parser")
temp = soup.find_all("b")
for elem in temp:
br = elem.findChildren("br")
if len(br) > 0:
return elem.text.split("\n")[1].strip()
return None
def extract_auditors(self, report: str) -> list:
"""Find the list of auditors involved in the given yearly results report.
Args:
report (str): Yearly results report as raw string
Returns:
list[Auditor]: List of Auditors found in the given report
"""
auditor_company = self.extract_auditor_company(report)
auditor_regex = r"[a-z A-Z,.'-]+, Wirtschaftsprüfer"
hits = re.findall(auditor_regex, report)
return [
Auditor(hit.replace(", Wirtschaftsprüfer", "").lstrip(), auditor_company)
for hit in hits
]
def __extract_kpis__(self, report: str) -> dict:
"""Source: https://github.com/bundesAPI/deutschland/pull/87/files#diff-f5b9db5384cf523fcc677056065041e7793bfc4da9cf74c4eebd6fab732739bd.
Extracts Key Performance Indicators (KPIs) from the financial reports.
Args:
report (str): The yearly report as a parsed string
Returns:
dict: A dictionary containing the extracted KPIs with their report hash as keys and KPIs as values.
"""
kpis = {}
# Define KPI patterns to search for
kpi_patterns = {
FinancialKPIEnum.REVENUE.value: r"(?:revenue|umsatz|erlöse)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.NET_INCOME.value: r"(?:net income|jahresüberschuss|nettoeinkommen|Ergebnis nach Steuern)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.EBIT.value: r"(?:ebit|operating income)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.EBITDA.value: r"(?:ebitda)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.GROSS_PROFIT.value: r"(?:gross profit|bruttogewinn)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.OPERATING_PROFIT.value: r"(?:operating profit|betriebsgewinn)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.ASSETS.value: r"(?:total assets|bilanzsumme)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.LIABILITIES.value: r"(?:total liabilities|gesamtverbindlichkeiten)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.EQUITY.value: r"(?:shareholders'? equity|eigenkapital)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.CURRENT_ASSETS.value: r"(?:current assets|umlaufvermögen)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.CURRENT_LIABILITIES.value: r"(?:current liabilities|kurzfristige verbindlichkeiten)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.LONG_TERM_DEBT.value: r"(?:long[-\s]?term debt|langfristige verbindlichkeiten)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.SHORT_TERM_DEBT.value: r"(?:short[-\s]?term debt|kurzfristige verbindlichkeiten)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.CASH_AND_CASH_EQUIVALENTS.value: r"(?:cash (?:and cash equivalents)?|barmittel)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.DIVIDENDS.value: r"(?:dividends?|dividende)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.CASH_FLOW.value: r"(?:cash flow|cashflow|cash flow from operating activities)[:\s]*([\d,.]+[mmb]?)",
}
for kpi, pattern in kpi_patterns.items():
match = re.search(pattern, report, flags=re.IGNORECASE | re.UNICODE)
if match:
value = match.group(1)
# Clean and validate the extracted number
try:
if not value: # Check if value is empty
cleaned_value = None
else:
multiplier = 1
if value[-1].lower() == "m":
value = value[:-1]
multiplier = 1_000_000
elif value[-1].lower() == "b":
value = value[:-1]
multiplier = 1_000_000_000
# Remove commas after checking for multipliers
value = value.replace(".", "").replace(",", ".").strip()
cleaned_value = float(value) * multiplier
except ValueError:
cleaned_value = None
if cleaned_value is not None:
kpis[kpi] = cleaned_value
return kpis
def extract_financial_results(self, report: str) -> dict:
"""Extract financial data from given report.
Args:
report (str): Report to be analyzed
Returns:
dict: Results
"""
report_parsed = (
BeautifulSoup(report, features="html.parser").get_text().replace("\n", " ")
)
return self.__extract_kpis__(report_parsed)
if __name__ == "__main__":
ba_wrapper = Bundesanzeiger()
ba_wrapper.get_information("Atos IT-Dienstleistung und Beratung GmbH", None)

View File

@ -1,7 +1,10 @@
"""CompanyMongoService.""" """CompanyMongoService."""
from pymongo.results import InsertOneResult from threading import Lock
from aki_prj23_transparenzregister.models.company import Company, CompanyID from bson.objectid import ObjectId
from pymongo.results import InsertOneResult, UpdateResult
from aki_prj23_transparenzregister.models.company import Company
from aki_prj23_transparenzregister.utils.mongo.connector import MongoConnector from aki_prj23_transparenzregister.utils.mongo.connector import MongoConnector
@ -15,6 +18,7 @@ class CompanyMongoService:
connector (MongoConnector): _description_ connector (MongoConnector): _description_
""" """
self.collection = connector.database["companies"] self.collection = connector.database["companies"]
self.lock = Lock() # Create a lock for synchronization
def get_all(self) -> list[Company]: def get_all(self) -> list[Company]:
"""_summary_. """_summary_.
@ -22,10 +26,11 @@ class CompanyMongoService:
Returns: Returns:
list[Company]: _description_ list[Company]: _description_
""" """
result = self.collection.find() with self.lock:
return list(result) result = self.collection.find()
return list(result)
def get_by_id(self, id: CompanyID) -> Company | None: def get_by_id(self, id: str) -> Company | None:
"""_summary_. """_summary_.
Args: Args:
@ -34,10 +39,46 @@ class CompanyMongoService:
Returns: Returns:
Company | None: _description_ Company | None: _description_
""" """
result = list(self.collection.find({"id": id})) with self.lock:
if len(result) == 1: result = list(self.collection.find({"id": id}))
return result[0] if len(result) == 1:
return None return result[0]
return None
def get_by_object_id(self, _id: str) -> dict | None:
"""Find an object by given _id.
Args:
_id (str): ID
Returns:
Company | None: Entry if found, otherwise None
"""
with self.lock:
result = list(self.collection.find({"_id": ObjectId(_id)}))
if len(result) == 1:
return result[0]
return None
def get_where_no_financial_results(self) -> list[dict]:
"""Get all entries that have no yearly_results.
Returns:
list[dict]: List of companies found
"""
with self.lock:
return list(
self.collection.find({"$or": [{"yearly_results": {"$exists": False}}]})
)
def get_where_yearly_results(self) -> list[dict]:
"""Get a list of all companies with valid yearly_results (interesting entries for data loader).
Returns:
list[dict]: List of companies
"""
with self.lock:
return list(self.collection.find({"yearly_results": {"$gt": {}}}))
def insert(self, company: Company) -> InsertOneResult: def insert(self, company: Company) -> InsertOneResult:
"""_summary_. """_summary_.
@ -48,4 +89,20 @@ class CompanyMongoService:
Returns: Returns:
_type_: _description_ _type_: _description_
""" """
return self.collection.insert_one(company.to_dict()) with self.lock:
return self.collection.insert_one(company.to_dict())
def add_yearly_results(self, _id: str, yearly_results: dict) -> UpdateResult:
"""Add the `yearly_results` field to a Company entry.
Args:
_id (str): ID of the object
yearly_results (dict): Yearly results dictionary
Returns:
UpdateResult: Result
"""
with self.lock:
return self.collection.update_one(
{"_id": ObjectId(_id)}, {"$set": {"yearly_results": yearly_results}}
)

View File

@ -0,0 +1,36 @@
"""Tests for the enrich_company_financials module."""
from unittest.mock import Mock, patch
import pandas as pd
from aki_prj23_transparenzregister.apps import enrich_company_financials
from aki_prj23_transparenzregister.models.auditor import Auditor
def test_import_enrich_company_financials() -> None:
"""Testing if the enrich_company_financials can be imported."""
assert enrich_company_financials
@patch(
"aki_prj23_transparenzregister.apps.enrich_company_financials.Bundesanzeiger.get_information"
)
@patch(
"aki_prj23_transparenzregister.apps.enrich_company_financials.CompanyMongoService"
)
def test_work(mock_compnay_service: Mock, mock_bundesanzeiger: Mock) -> None:
mock_bundesanzeiger.return_value = pd.DataFrame(
[
{
"jahr": "2042",
"auditors": [Auditor(name="", company="")],
"financial_results": [],
}
]
)
# mock_compnay_service.add_yearly_resreturn_value
enrich_company_financials.work(
{"_id": "", "name": "ABC AG", "location": {"city": "Haltern am See"}},
mock_compnay_service,
)
assert enrich_company_financials

View File

@ -0,0 +1 @@
"""Tests for data_extraction."""

View File

@ -0,0 +1,111 @@
from unittest.mock import Mock, patch
import pandas as pd
from aki_prj23_transparenzregister.models.company import FinancialKPIEnum
from aki_prj23_transparenzregister.utils.data_extraction.bundesanzeiger import (
Bundesanzeiger,
)
def test_extract_auditor_company_no_hits() -> None:
input_data = """
<b>
Nothing to see here
</b>
"""
ba = Bundesanzeiger()
result = ba.extract_auditor_company(input_data)
assert result is None
def test_extract_auditor_company() -> None:
company_name = "Korrupte Wirtschaftsprüfer GmbH & Co. KG"
input_data = f"""
<b>
{company_name}
<br>
Max Mustermann
</b>
"""
ba = Bundesanzeiger()
result = ba.extract_auditor_company(input_data)
assert result == company_name
def test_extract_kpis() -> None:
input_data = """
Die Prj23_Transparenzregister GmbH erwirtschaftete einen Jahresüberschuss 10.000,43 .
Des Weiteren sanken die Gesamtverbindlichkeiten 42,00
"""
ba = Bundesanzeiger()
result = ba.__extract_kpis__(input_data)
net_income = 10000.43
liabilities = 42.00
assert result[FinancialKPIEnum.NET_INCOME.value] == net_income
assert result[FinancialKPIEnum.LIABILITIES.value] == liabilities
def test_extracct_financial_results() -> None:
input_data = """
<br>
Die Prj23_Transparenzregister GmbH erwirtschaftete einen Jahresüberschuss 10.000,43 .
</br>
<h2>Dies ist ein Platzhalter, der ignoriert werden soll</h2>
<b>Des Weiteren sanken die Gesamtverbindlichkeiten 42,00 </b>
"""
ba = Bundesanzeiger()
result = ba.extract_financial_results(input_data)
net_income = 10000.43
liabilities = 42.00
assert result[FinancialKPIEnum.NET_INCOME.value] == net_income
assert result[FinancialKPIEnum.LIABILITIES.value] == liabilities
def test_filter_reports() -> None:
test_data = [
{"name": "Bedienungsanleitung", "report": "", "raw_report": ""},
{"name": "Jahresabschluss 1998", "report": "", "raw_report": ""},
]
test_df = pd.DataFrame(test_data)
ba = Bundesanzeiger()
result = ba.filter_reports(test_df)
assert len(result) == 1
assert result.iloc[0].jahr == "1998"
@patch(
"aki_prj23_transparenzregister.utils.data_extraction.bundesanzeiger.Ba.get_reports"
)
def test_get_information(mock_bundesanzeiger: Mock) -> None:
mock_bundesanzeiger.return_value = {
"1": {
"name": "Bedienungsanleitung",
"report": "",
"company": "",
"raw_report": "",
},
"2": {
"name": "Jahresabschluss 1998",
"report": "",
"company": "PRJ 23 Transparenzregister GmbH",
"raw_report": "",
},
}
ba = Bundesanzeiger()
result = ba.get_information("PRJ 23 Transparenzregister GmbH", "Iserlohn")
assert len(result) == 1
@patch(
"aki_prj23_transparenzregister.utils.data_extraction.bundesanzeiger.Ba.get_reports"
)
def test_get_information_no_results(mock_bundesanzeiger: Mock) -> None:
mock_bundesanzeiger.return_value = {}
ba = Bundesanzeiger()
result = ba.get_information("PRJ 23 Transparenzregister GmbH", "Iserlohn")
assert len(result) == 0

View File

@ -3,7 +3,7 @@ from unittest.mock import Mock
import pytest import pytest
from aki_prj23_transparenzregister.models.company import Company from aki_prj23_transparenzregister.models.company import Company, CompanyID, Location
from aki_prj23_transparenzregister.utils.mongo.company_mongo_service import ( from aki_prj23_transparenzregister.utils.mongo.company_mongo_service import (
CompanyMongoService, CompanyMongoService,
) )
@ -73,7 +73,7 @@ def test_by_id_no_result(mock_mongo_connector: Mock, mock_collection: Mock) -> N
mock_mongo_connector.database = {"companies": mock_collection} mock_mongo_connector.database = {"companies": mock_collection}
service = CompanyMongoService(mock_mongo_connector) service = CompanyMongoService(mock_mongo_connector)
mock_collection.find.return_value = [] mock_collection.find.return_value = []
assert service.get_by_id("Does not exist") is None # type: ignore assert service.get_by_id("Does not exist") is None
def test_by_id_result(mock_mongo_connector: Mock, mock_collection: Mock) -> None: def test_by_id_result(mock_mongo_connector: Mock, mock_collection: Mock) -> None:
@ -87,7 +87,7 @@ def test_by_id_result(mock_mongo_connector: Mock, mock_collection: Mock) -> None
service = CompanyMongoService(mock_mongo_connector) service = CompanyMongoService(mock_mongo_connector)
mock_entry = {"id": "Does exist", "vaue": 42} mock_entry = {"id": "Does exist", "vaue": 42}
mock_collection.find.return_value = [mock_entry] mock_collection.find.return_value = [mock_entry]
assert service.get_by_id("Does exist") == mock_entry # type: ignore assert service.get_by_id("Does exist") == mock_entry
def test_insert(mock_mongo_connector: Mock, mock_collection: Mock) -> None: def test_insert(mock_mongo_connector: Mock, mock_collection: Mock) -> None:
@ -101,4 +101,55 @@ def test_insert(mock_mongo_connector: Mock, mock_collection: Mock) -> None:
service = CompanyMongoService(mock_mongo_connector) service = CompanyMongoService(mock_mongo_connector)
mock_result = 42 mock_result = 42
mock_collection.insert_one.return_value = mock_result mock_collection.insert_one.return_value = mock_result
assert service.insert(Company(None, None, "", "", [])) == mock_result # type: ignore assert (
service.insert(
Company(CompanyID("", ""), Location("Hier und Dort"), "", "", [])
)
== mock_result
)
def test_get_by_object_id_no_result(
mock_mongo_connector: Mock, mock_collection: Mock
) -> None:
mock_mongo_connector.database = {"companies": mock_collection}
service = CompanyMongoService(mock_mongo_connector)
mock_result: list = []
mock_collection.find.return_value = mock_result
assert service.get_by_object_id("649f16a1e198338c3b44299e") is None
def test_get_by_object_id(mock_mongo_connector: Mock, mock_collection: Mock) -> None:
mock_mongo_connector.database = {"companies": mock_collection}
service = CompanyMongoService(mock_mongo_connector)
mock_result: list = [{"_id": "abc", "brille?": "Fielmann", "Hotel?": "Trivago"}]
mock_collection.find.return_value = mock_result
assert service.get_by_object_id("612316a1e198338c3b44299e") == mock_result[0]
def test_get_where_financial_no_results(
mock_mongo_connector: Mock, mock_collection: Mock
) -> None:
mock_mongo_connector.database = {"companies": mock_collection}
service = CompanyMongoService(mock_mongo_connector)
mock_result: list = [{"_id": "abc", "brille?": "Fielmann", "Hotel?": "Trivago"}]
mock_collection.find.return_value = mock_result
assert service.get_where_no_financial_results() == mock_result
def test_get_where_financial_results(
mock_mongo_connector: Mock, mock_collection: Mock
) -> None:
mock_mongo_connector.database = {"companies": mock_collection}
service = CompanyMongoService(mock_mongo_connector)
mock_result: list = [{"_id": "abc", "brille?": "Fielmann", "Hotel?": "Trivago"}]
mock_collection.find.return_value = mock_result
assert service.get_where_yearly_results() == mock_result
def test_add_yearly_reslults(mock_mongo_connector: Mock, mock_collection: Mock) -> None:
mock_mongo_connector.database = {"companies": mock_collection}
service = CompanyMongoService(mock_mongo_connector)
mock_result: list = [{"_id": "abc", "brille?": "Fielmann", "Hotel?": "Trivago"}]
mock_collection.update_one.return_value = mock_result
assert service.add_yearly_results("612316a1e198338c3b44299e", {}) == mock_result