mirror of
https://github.com/fhswf/aki_prj23_transparenzregister.git
synced 2025-04-25 07:52:35 +02:00
Feat/fetch financials (#79)
This commit is contained in:
commit
2cd8def200
@ -18,216 +18,125 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 32,
|
"execution_count": 2,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>date</th>\n",
|
||||||
|
" <th>company</th>\n",
|
||||||
|
" <th>raw_report</th>\n",
|
||||||
|
" <th>jahr</th>\n",
|
||||||
|
" <th>auditors</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>2023-07-07</td>\n",
|
||||||
|
" <td>Atos IT-Dienstleistung und Beratung GmbH</td>\n",
|
||||||
|
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
||||||
|
" <td>2021</td>\n",
|
||||||
|
" <td>[]</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2</th>\n",
|
||||||
|
" <td>2023-05-10</td>\n",
|
||||||
|
" <td>Atos IT-Dienstleistung und Beratung GmbH</td>\n",
|
||||||
|
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
||||||
|
" <td>2021</td>\n",
|
||||||
|
" <td>[Auditor(name='Eckhard Lewe', company='Grant T...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>4</th>\n",
|
||||||
|
" <td>2022-03-25</td>\n",
|
||||||
|
" <td>Atos IT-Dienstleistung und Beratung GmbH</td>\n",
|
||||||
|
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
||||||
|
" <td>2020</td>\n",
|
||||||
|
" <td>[Auditor(name='Eckhard Lewe', company='Warth &...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>5</th>\n",
|
||||||
|
" <td>2021-03-11</td>\n",
|
||||||
|
" <td>Atos IT-Dienstleistung und Beratung GmbH</td>\n",
|
||||||
|
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
||||||
|
" <td>2019</td>\n",
|
||||||
|
" <td>[Auditor(name='Eckhard Lewe', company='Warth &...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>6</th>\n",
|
||||||
|
" <td>2020-03-24</td>\n",
|
||||||
|
" <td>Atos IT-Dienstleistung und Beratung GmbH</td>\n",
|
||||||
|
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
||||||
|
" <td>2018</td>\n",
|
||||||
|
" <td>[Auditor(name='Ulrich Diersch', company='Warth...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" date company \\\n",
|
||||||
|
"0 2023-07-07 Atos IT-Dienstleistung und Beratung GmbH \n",
|
||||||
|
"2 2023-05-10 Atos IT-Dienstleistung und Beratung GmbH \n",
|
||||||
|
"4 2022-03-25 Atos IT-Dienstleistung und Beratung GmbH \n",
|
||||||
|
"5 2021-03-11 Atos IT-Dienstleistung und Beratung GmbH \n",
|
||||||
|
"6 2020-03-24 Atos IT-Dienstleistung und Beratung GmbH \n",
|
||||||
|
"\n",
|
||||||
|
" raw_report jahr \\\n",
|
||||||
|
"0 <div class=\"publication_container\">\\n <div cla... 2021 \n",
|
||||||
|
"2 <div class=\"publication_container\">\\n <div cla... 2021 \n",
|
||||||
|
"4 <div class=\"publication_container\">\\n <div cla... 2020 \n",
|
||||||
|
"5 <div class=\"publication_container\">\\n <div cla... 2019 \n",
|
||||||
|
"6 <div class=\"publication_container\">\\n <div cla... 2018 \n",
|
||||||
|
"\n",
|
||||||
|
" auditors \n",
|
||||||
|
"0 [] \n",
|
||||||
|
"2 [Auditor(name='Eckhard Lewe', company='Grant T... \n",
|
||||||
|
"4 [Auditor(name='Eckhard Lewe', company='Warth &... \n",
|
||||||
|
"5 [Auditor(name='Eckhard Lewe', company='Warth &... \n",
|
||||||
|
"6 [Auditor(name='Ulrich Diersch', company='Warth... "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"import pandas as pd\n",
|
"import pandas as pd\n",
|
||||||
"from deutschland.bundesanzeiger import Bundesanzeiger"
|
"\n",
|
||||||
]
|
"from aki_prj23_transparenzregister.utils.data_extraction.bundesanzeiger import (\n",
|
||||||
},
|
" Bundesanzeiger,\n",
|
||||||
{
|
")\n",
|
||||||
"cell_type": "code",
|
"\n",
|
||||||
"execution_count": 33,
|
"ba_wrapper = Bundesanzeiger()\n",
|
||||||
"metadata": {},
|
"df_reports = ba_wrapper.get_information(\"Atos IT-Dienstleistung und Beratung GmbH\")\n",
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"dict_keys(['c1051233030a8e0232523052fd4a2310', '57d129e6fd7505d567fa13919e5e6bdd'])\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"ba = Bundesanzeiger()\n",
|
|
||||||
"reports = ba.get_reports(\n",
|
|
||||||
" \"Volkswagen Economy Service Erdle Bernhard Erdle GmbH\"\n",
|
|
||||||
") # \"Atos IT-Dienstleistung und Beratung GmbH\")\n",
|
|
||||||
"print(reports.keys())"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 34,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"report_contents = []\n",
|
|
||||||
"for key in reports.keys():\n",
|
|
||||||
" report_contents.append(reports[key])"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 35,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/html": [
|
|
||||||
"<div>\n",
|
|
||||||
"<style scoped>\n",
|
|
||||||
" .dataframe tbody tr th:only-of-type {\n",
|
|
||||||
" vertical-align: middle;\n",
|
|
||||||
" }\n",
|
|
||||||
"\n",
|
|
||||||
" .dataframe tbody tr th {\n",
|
|
||||||
" vertical-align: top;\n",
|
|
||||||
" }\n",
|
|
||||||
"\n",
|
|
||||||
" .dataframe thead th {\n",
|
|
||||||
" text-align: right;\n",
|
|
||||||
" }\n",
|
|
||||||
"</style>\n",
|
|
||||||
"<table border=\"1\" class=\"dataframe\">\n",
|
|
||||||
" <thead>\n",
|
|
||||||
" <tr style=\"text-align: right;\">\n",
|
|
||||||
" <th></th>\n",
|
|
||||||
" <th>date</th>\n",
|
|
||||||
" <th>name</th>\n",
|
|
||||||
" <th>company</th>\n",
|
|
||||||
" <th>report</th>\n",
|
|
||||||
" <th>raw_report</th>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" </thead>\n",
|
|
||||||
" <tbody>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>0</th>\n",
|
|
||||||
" <td>2023-05-25</td>\n",
|
|
||||||
" <td>Jahresabschluss zum Geschäftsjahr vom 01.01.20...</td>\n",
|
|
||||||
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
|
|
||||||
" <td>\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se...</td>\n",
|
|
||||||
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>1</th>\n",
|
|
||||||
" <td>2023-05-24</td>\n",
|
|
||||||
" <td>Jahresabschluss zum Geschäftsjahr vom 01.01.20...</td>\n",
|
|
||||||
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
|
|
||||||
" <td>\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se...</td>\n",
|
|
||||||
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" </tbody>\n",
|
|
||||||
"</table>\n",
|
|
||||||
"</div>"
|
|
||||||
],
|
|
||||||
"text/plain": [
|
|
||||||
" date name \\\n",
|
|
||||||
"0 2023-05-25 Jahresabschluss zum Geschäftsjahr vom 01.01.20... \n",
|
|
||||||
"1 2023-05-24 Jahresabschluss zum Geschäftsjahr vom 01.01.20... \n",
|
|
||||||
"\n",
|
|
||||||
" company \\\n",
|
|
||||||
"0 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
|
|
||||||
"1 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
|
|
||||||
"\n",
|
|
||||||
" report \\\n",
|
|
||||||
"0 \\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se... \n",
|
|
||||||
"1 \\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se... \n",
|
|
||||||
"\n",
|
|
||||||
" raw_report \n",
|
|
||||||
"0 <div class=\"publication_container\">\\n <div cla... \n",
|
|
||||||
"1 <div class=\"publication_container\">\\n <div cla... "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 35,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"df_reports = pd.DataFrame(report_contents)\n",
|
|
||||||
"df_reports.head()"
|
"df_reports.head()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 36,
|
"execution_count": 9,
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/html": [
|
|
||||||
"<div>\n",
|
|
||||||
"<style scoped>\n",
|
|
||||||
" .dataframe tbody tr th:only-of-type {\n",
|
|
||||||
" vertical-align: middle;\n",
|
|
||||||
" }\n",
|
|
||||||
"\n",
|
|
||||||
" .dataframe tbody tr th {\n",
|
|
||||||
" vertical-align: top;\n",
|
|
||||||
" }\n",
|
|
||||||
"\n",
|
|
||||||
" .dataframe thead th {\n",
|
|
||||||
" text-align: right;\n",
|
|
||||||
" }\n",
|
|
||||||
"</style>\n",
|
|
||||||
"<table border=\"1\" class=\"dataframe\">\n",
|
|
||||||
" <thead>\n",
|
|
||||||
" <tr style=\"text-align: right;\">\n",
|
|
||||||
" <th></th>\n",
|
|
||||||
" <th>date</th>\n",
|
|
||||||
" <th>name</th>\n",
|
|
||||||
" <th>company</th>\n",
|
|
||||||
" <th>report</th>\n",
|
|
||||||
" <th>raw_report</th>\n",
|
|
||||||
" <th>type</th>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" </thead>\n",
|
|
||||||
" <tbody>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>0</th>\n",
|
|
||||||
" <td>2023-05-25</td>\n",
|
|
||||||
" <td>Jahresabschluss zum Geschäftsjahr vom 01.01.20...</td>\n",
|
|
||||||
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
|
|
||||||
" <td>\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se...</td>\n",
|
|
||||||
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
|
||||||
" <td>Jahresabschluss</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>1</th>\n",
|
|
||||||
" <td>2023-05-24</td>\n",
|
|
||||||
" <td>Jahresabschluss zum Geschäftsjahr vom 01.01.20...</td>\n",
|
|
||||||
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
|
|
||||||
" <td>\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se...</td>\n",
|
|
||||||
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
|
||||||
" <td>Jahresabschluss</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" </tbody>\n",
|
|
||||||
"</table>\n",
|
|
||||||
"</div>"
|
|
||||||
],
|
|
||||||
"text/plain": [
|
|
||||||
" date name \\\n",
|
|
||||||
"0 2023-05-25 Jahresabschluss zum Geschäftsjahr vom 01.01.20... \n",
|
|
||||||
"1 2023-05-24 Jahresabschluss zum Geschäftsjahr vom 01.01.20... \n",
|
|
||||||
"\n",
|
|
||||||
" company \\\n",
|
|
||||||
"0 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
|
|
||||||
"1 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
|
|
||||||
"\n",
|
|
||||||
" report \\\n",
|
|
||||||
"0 \\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se... \n",
|
|
||||||
"1 \\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se... \n",
|
|
||||||
"\n",
|
|
||||||
" raw_report type \n",
|
|
||||||
"0 <div class=\"publication_container\">\\n <div cla... Jahresabschluss \n",
|
|
||||||
"1 <div class=\"publication_container\">\\n <div cla... Jahresabschluss "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 36,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"df_reports[\"type\"] = df_reports.name.apply(lambda name: name.split(\" \")[0])\n",
|
|
||||||
"df_reports.head()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 37,
|
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -260,13 +169,20 @@
|
|||||||
" <tbody>\n",
|
" <tbody>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>0</th>\n",
|
" <th>0</th>\n",
|
||||||
|
" <td>2023-07-11</td>\n",
|
||||||
|
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
|
||||||
|
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
||||||
|
" <td>2021</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1</th>\n",
|
||||||
" <td>2023-05-25</td>\n",
|
" <td>2023-05-25</td>\n",
|
||||||
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
|
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
|
||||||
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
||||||
" <td>2020</td>\n",
|
" <td>2020</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>1</th>\n",
|
" <th>2</th>\n",
|
||||||
" <td>2023-05-24</td>\n",
|
" <td>2023-05-24</td>\n",
|
||||||
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
|
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
|
||||||
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
||||||
@ -278,15 +194,17 @@
|
|||||||
],
|
],
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
" date company \\\n",
|
" date company \\\n",
|
||||||
"0 2023-05-25 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
|
"0 2023-07-11 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
|
||||||
"1 2023-05-24 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
|
"1 2023-05-25 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
|
||||||
|
"2 2023-05-24 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
|
||||||
"\n",
|
"\n",
|
||||||
" raw_report jahr \n",
|
" raw_report jahr \n",
|
||||||
"0 <div class=\"publication_container\">\\n <div cla... 2020 \n",
|
"0 <div class=\"publication_container\">\\n <div cla... 2021 \n",
|
||||||
"1 <div class=\"publication_container\">\\n <div cla... 2019 "
|
"1 <div class=\"publication_container\">\\n <div cla... 2020 \n",
|
||||||
|
"2 <div class=\"publication_container\">\\n <div cla... 2019 "
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 37,
|
"execution_count": 9,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@ -310,7 +228,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 38,
|
"execution_count": 10,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -320,7 +238,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 39,
|
"execution_count": 11,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -338,18 +256,12 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 40,
|
"execution_count": 18,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import re\n",
|
"import re\n",
|
||||||
"from dataclasses import dataclass\n",
|
"from aki_prj23_transparenzregister.models.auditor import Auditor\n",
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"@dataclass\n",
|
|
||||||
"class Auditor:\n",
|
|
||||||
" name: str\n",
|
|
||||||
" company: str\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def extract_auditor_company(report: str) -> str:\n",
|
"def extract_auditor_company(report: str) -> str:\n",
|
||||||
@ -374,7 +286,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 41,
|
"execution_count": 13,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -383,7 +295,7 @@
|
|||||||
"[]"
|
"[]"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 41,
|
"execution_count": 13,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@ -418,16 +330,16 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 42,
|
"execution_count": 14,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
"{'net_income': 23484.67, 'equity': 65083.84, 'current_assets': 357613.61}"
|
"{'net_income': 100238.5, 'equity': 165322.34, 'current_assets': 435344.07}"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 42,
|
"execution_count": 14,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@ -502,7 +414,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 43,
|
"execution_count": 15,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -518,7 +430,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 46,
|
"execution_count": 16,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -526,24 +438,30 @@
|
|||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"MultiIndex([('Aktiva', 'Unnamed: 0_level_1'),\n",
|
"MultiIndex([('Aktiva', 'Unnamed: 0_level_1'),\n",
|
||||||
" ('Aktiva', '31.12.2020 EUR'),\n",
|
" ('Aktiva', '31.12.2021 EUR'),\n",
|
||||||
" ('Aktiva', '31.12.2019 EUR')],\n",
|
" ('Aktiva', '31.12.2020 EUR')],\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
"Aktiva Unnamed: 0_level_1 object\n",
|
"Aktiva Unnamed: 0_level_1 object\n",
|
||||||
|
" 31.12.2021 EUR object\n",
|
||||||
" 31.12.2020 EUR object\n",
|
" 31.12.2020 EUR object\n",
|
||||||
" 31.12.2019 EUR object\n",
|
|
||||||
"dtype: object\n",
|
"dtype: object\n",
|
||||||
"MultiIndex([('Passiva', 'Unnamed: 0_level_1'),\n",
|
"MultiIndex([('Passiva', 'Unnamed: 0_level_1'),\n",
|
||||||
" ('Passiva', '31.12.2020 EUR'),\n",
|
" ('Passiva', '31.12.2021 EUR'),\n",
|
||||||
" ('Passiva', '31.12.2019 EUR')],\n",
|
" ('Passiva', '31.12.2020 EUR')],\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
"Passiva Unnamed: 0_level_1 object\n",
|
"Passiva Unnamed: 0_level_1 object\n",
|
||||||
|
" 31.12.2021 EUR object\n",
|
||||||
" 31.12.2020 EUR object\n",
|
" 31.12.2020 EUR object\n",
|
||||||
" 31.12.2019 EUR object\n",
|
|
||||||
"dtype: object\n",
|
"dtype: object\n",
|
||||||
"Index(['Angaben zur Identifikation der Gesellschaft laut Registergericht', 'Angaben zur Identifikation der Gesellschaft laut Registergericht.1'], dtype='object')\n",
|
"Index(['Angaben zur Identifikation der Gesellschaft laut Registergericht', 'Angaben zur Identifikation der Gesellschaft laut Registergericht.1'], dtype='object')\n",
|
||||||
"Angaben zur Identifikation der Gesellschaft laut Registergericht object\n",
|
"Angaben zur Identifikation der Gesellschaft laut Registergericht object\n",
|
||||||
"Angaben zur Identifikation der Gesellschaft laut Registergericht.1 object\n",
|
"Angaben zur Identifikation der Gesellschaft laut Registergericht.1 object\n",
|
||||||
|
"dtype: object\n",
|
||||||
|
"MultiIndex([('Kreditentwicklung', 'Unnamed: 0_level_1'),\n",
|
||||||
|
" ( 'Betrag', 'EUR')],\n",
|
||||||
|
" )\n",
|
||||||
|
"Kreditentwicklung Unnamed: 0_level_1 object\n",
|
||||||
|
"Betrag EUR object\n",
|
||||||
"dtype: object\n"
|
"dtype: object\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -553,7 +471,7 @@
|
|||||||
"{}"
|
"{}"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 46,
|
"execution_count": 16,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@ -574,19 +492,46 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 45,
|
"execution_count": 22,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"ename": "KeyError",
|
"data": {
|
||||||
"evalue": "'Passiva'",
|
"text/html": [
|
||||||
"output_type": "error",
|
"<div>\n",
|
||||||
"traceback": [
|
"<style scoped>\n",
|
||||||
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
|
" vertical-align: middle;\n",
|
||||||
"\u001b[1;32mc:\\Users\\trist\\Documents\\Code\\M.Sc\\aki_prj23_transparenzregister\\Jupyter\\API-tests\\Bundesanzeiger\\notebook.ipynb Cell 21\u001b[0m in \u001b[0;36m1\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/trist/Documents/Code/M.Sc/aki_prj23_transparenzregister/Jupyter/API-tests/Bundesanzeiger/notebook.ipynb#X26sZmlsZQ%3D%3D?line=10'>11</a>\u001b[0m \u001b[39mreturn\u001b[39;00m result\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/trist/Documents/Code/M.Sc/aki_prj23_transparenzregister/Jupyter/API-tests/Bundesanzeiger/notebook.ipynb#X26sZmlsZQ%3D%3D?line=13'>14</a>\u001b[0m bilanz \u001b[39m=\u001b[39m get_bilanz(sample_report)\n\u001b[1;32m---> <a href='vscode-notebook-cell:/c%3A/Users/trist/Documents/Code/M.Sc/aki_prj23_transparenzregister/Jupyter/API-tests/Bundesanzeiger/notebook.ipynb#X26sZmlsZQ%3D%3D?line=14'>15</a>\u001b[0m bilanz[\u001b[39m\"\u001b[39;49m\u001b[39mPassiva\u001b[39;49m\u001b[39m\"\u001b[39;49m]\u001b[39m.\u001b[39mhead()\n",
|
" }\n",
|
||||||
"\u001b[1;31mKeyError\u001b[0m: 'Passiva'"
|
"\n",
|
||||||
]
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
"Empty DataFrame\n",
|
||||||
|
"Columns: []\n",
|
||||||
|
"Index: []"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 22,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
@ -600,6 +545,8 @@
|
|||||||
" StringIO(str(tag.findNext(\"table\", {\"class\": \"std_table\"})))\n",
|
" StringIO(str(tag.findNext(\"table\", {\"class\": \"std_table\"})))\n",
|
||||||
" )[0]\n",
|
" )[0]\n",
|
||||||
" result[pos] = pos_results\n",
|
" result[pos] = pos_results\n",
|
||||||
|
" else:\n",
|
||||||
|
" result[pos] = pd.DataFrame([])\n",
|
||||||
" return result\n",
|
" return result\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
@ -609,58 +556,25 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 23,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"Int64Index([0, 1], dtype='int64')\n",
|
"MultiIndex([('Aktiva', 'Unnamed: 0_level_1'),\n",
|
||||||
"Index(['Unnamed: 0', 'Anhang', '31.12.2021 TEUR', 'Vorjahr TEUR'], dtype='object')\n",
|
" ('Aktiva', '31.12.2021 EUR'),\n",
|
||||||
"Index(['Unnamed: 0', 'Anhang', '2021 TEUR', 'Vorjahr TEUR'], dtype='object')\n",
|
" ('Aktiva', '31.12.2020 EUR')],\n",
|
||||||
"Index(['Aufgliederung nach Tätigkeitsbereichen', '2021 TEUR',\n",
|
|
||||||
" 'Vorjahr TEUR'],\n",
|
|
||||||
" dtype='object')\n",
|
|
||||||
"Index(['Aufgliederung nach Inland und Ausland', '2021 TEUR', 'Vorjahr TEUR'], dtype='object')\n",
|
|
||||||
"Index(['Unnamed: 0', '31.12.2021 TEUR', 'Vorjahr TEUR'], dtype='object')\n",
|
|
||||||
"Index(['Unnamed: 0', '31.12.2021 TEUR', 'Vorjahr TEUR'], dtype='object')\n",
|
|
||||||
"Index(['Unnamed: 0', '31.12.2021'], dtype='object')\n",
|
|
||||||
"Index(['Unnamed: 0', 'TEUR'], dtype='object')\n",
|
|
||||||
"Index(['Unnamed: 0', 'TEUR'], dtype='object')\n",
|
|
||||||
"Index(['Unnamed: 0', 'TEUR'], dtype='object')\n",
|
|
||||||
"Int64Index([0, 1, 2], dtype='int64')\n",
|
|
||||||
"Index(['Unnamed: 0', 'TEUR'], dtype='object')\n",
|
|
||||||
"Index(['Unnamed: 0', '31.12.2021 TEUR', 'Vorjahr TEUR'], dtype='object')\n",
|
|
||||||
"Index(['Unnamed: 0', '2021 Anzahl MA', 'Vorjahr Anzahl MA'], dtype='object')\n",
|
|
||||||
"MultiIndex([('Art des Geschäfts', 'Unnamed: 0_level_1'),\n",
|
|
||||||
" ('Art der Beziehung', 'Gesellschafterin TEUR'),\n",
|
|
||||||
" ('Art der Beziehung', 'Verbundene Unternehmen TEUR')],\n",
|
|
||||||
" )\n",
|
" )\n",
|
||||||
"Int64Index([0, 1], dtype='int64')\n",
|
"MultiIndex([('Passiva', 'Unnamed: 0_level_1'),\n",
|
||||||
"MultiIndex([( 'Unnamed: 0_level_0', ...),\n",
|
" ('Passiva', '31.12.2021 EUR'),\n",
|
||||||
" ('Anschaffungs- oder Herstellungskosten', ...),\n",
|
" ('Passiva', '31.12.2020 EUR')],\n",
|
||||||
" ('Anschaffungs- oder Herstellungskosten', ...),\n",
|
|
||||||
" ('Anschaffungs- oder Herstellungskosten', ...),\n",
|
|
||||||
" ('Anschaffungs- oder Herstellungskosten', ...)],\n",
|
|
||||||
" )\n",
|
" )\n",
|
||||||
"MultiIndex([('Unnamed: 0_level_0', ...),\n",
|
"Index(['Angaben zur Identifikation der Gesellschaft laut Registergericht', 'Angaben zur Identifikation der Gesellschaft laut Registergericht.1'], dtype='object')\n",
|
||||||
" ( 'Abschreibungen', ...),\n",
|
"MultiIndex([('Kreditentwicklung', 'Unnamed: 0_level_1'),\n",
|
||||||
" ( 'Abschreibungen', ...),\n",
|
" ( 'Betrag', 'EUR')],\n",
|
||||||
" ( 'Abschreibungen', ...),\n",
|
" )\n"
|
||||||
" ( 'Abschreibungen', ...)],\n",
|
|
||||||
" )\n",
|
|
||||||
"MultiIndex([('Unnamed: 0_level_0', 'Unnamed: 0_level_1'),\n",
|
|
||||||
" ( 'Buchwerte', 'Stand 31.12.2021 EUR'),\n",
|
|
||||||
" ( 'Buchwerte', 'Stand 31.12.2020 EUR')],\n",
|
|
||||||
" )\n",
|
|
||||||
"Index(['Nichtfinanzieller Leistungsindikator', 'Unnamed: 1', '2021', '2020',\n",
|
|
||||||
" '2019'],\n",
|
|
||||||
" dtype='object')\n",
|
|
||||||
"Index(['Gewinn- und Verlustrechnung', '2021 TEUR', 'Vorjahr TEUR',\n",
|
|
||||||
" 'Veränderung TEUR'],\n",
|
|
||||||
" dtype='object')\n",
|
|
||||||
"Index(['Bilanz', '31.12.2021 TEUR', 'Vorjahr TEUR', 'Veränderung TEUR'], dtype='object')\n"
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -698,7 +612,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.10.7"
|
"version": "3.11.3"
|
||||||
},
|
},
|
||||||
"orig_nbformat": 4
|
"orig_nbformat": 4
|
||||||
},
|
},
|
||||||
|
1505
poetry.lock
generated
1505
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@ -38,6 +38,7 @@ version = "0.1.0"
|
|||||||
SQLAlchemy = {version = "^1.4.46", extras = ["mypy"]}
|
SQLAlchemy = {version = "^1.4.46", extras = ["mypy"]}
|
||||||
dash = "^2.11.1"
|
dash = "^2.11.1"
|
||||||
dash-bootstrap-components = "^1.4.2"
|
dash-bootstrap-components = "^1.4.2"
|
||||||
|
deutschland = {git = "https://github.com/TrisNol/deutschland.git", branch = "hotfix/python-3.11-support"}
|
||||||
loguru = "^0.7.0"
|
loguru = "^0.7.0"
|
||||||
matplotlib = "^3.7.1"
|
matplotlib = "^3.7.1"
|
||||||
plotly = "^5.14.1"
|
plotly = "^5.14.1"
|
||||||
@ -48,6 +49,10 @@ seaborn = "^0.12.2"
|
|||||||
selenium = "^4.10.0"
|
selenium = "^4.10.0"
|
||||||
tqdm = "^4.65.0"
|
tqdm = "^4.65.0"
|
||||||
|
|
||||||
|
# TODO Add dependent libraries (i.e., deutshcland, plotly, etc)
|
||||||
|
[tool.poetry.extras]
|
||||||
|
ingest = ["selenium"]
|
||||||
|
|
||||||
[tool.poetry.group.develop.dependencies]
|
[tool.poetry.group.develop.dependencies]
|
||||||
black = {extras = ["jupyter"], version = "^23.3.0"}
|
black = {extras = ["jupyter"], version = "^23.3.0"}
|
||||||
jupyterlab = "^4.0.0"
|
jupyterlab = "^4.0.0"
|
||||||
|
1
src/aki_prj23_transparenzregister/apps/__init__.py
Normal file
1
src/aki_prj23_transparenzregister/apps/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
"""Main applications."""
|
@ -0,0 +1,63 @@
|
|||||||
|
"""Add financial data to companies."""
|
||||||
|
import typing
|
||||||
|
|
||||||
|
from aki_prj23_transparenzregister.config.config_providers import JsonFileConfigProvider
|
||||||
|
from aki_prj23_transparenzregister.utils.data_extraction.bundesanzeiger import (
|
||||||
|
Bundesanzeiger,
|
||||||
|
)
|
||||||
|
from aki_prj23_transparenzregister.utils.mongo.company_mongo_service import (
|
||||||
|
CompanyMongoService,
|
||||||
|
)
|
||||||
|
from aki_prj23_transparenzregister.utils.mongo.connector import MongoConnector
|
||||||
|
|
||||||
|
|
||||||
|
def work(company: typing.Any, company_service: CompanyMongoService) -> None:
|
||||||
|
"""Process company regarding financials.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
company (dict): Company to process
|
||||||
|
company_service (CompanyMongoService): Interface to Company collection on MongoDB
|
||||||
|
"""
|
||||||
|
yearly_results = Bundesanzeiger().get_information(
|
||||||
|
company["name"], company["location"]["city"]
|
||||||
|
)
|
||||||
|
yearly_results_data = {}
|
||||||
|
for _index, row in yearly_results.iterrows():
|
||||||
|
yearly_results_data[row.jahr] = {
|
||||||
|
"auditors": [auditor.to_dict() for auditor in row.auditors],
|
||||||
|
"financials": row.financial_results,
|
||||||
|
}
|
||||||
|
|
||||||
|
company_service.add_yearly_results(company["_id"], yearly_results_data)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import concurrent.futures
|
||||||
|
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
config_provider = JsonFileConfigProvider("./secrets.json")
|
||||||
|
|
||||||
|
mongo_connector = MongoConnector(config_provider.get_mongo_connection_string())
|
||||||
|
company_service = CompanyMongoService(mongo_connector)
|
||||||
|
|
||||||
|
num_threads = 25
|
||||||
|
companies = company_service.get_where_no_financial_results()
|
||||||
|
with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
|
||||||
|
# Submit tasks for each entry in the list
|
||||||
|
future_to_entry = {
|
||||||
|
executor.submit(work, entry, company_service): entry for entry in companies
|
||||||
|
}
|
||||||
|
|
||||||
|
# with tqdm(total=len(companies)) as pbar:
|
||||||
|
# Wait for all tasks to complete
|
||||||
|
for future in concurrent.futures.as_completed(future_to_entry):
|
||||||
|
entry = future_to_entry[future]
|
||||||
|
logger.info(entry["name"])
|
||||||
|
try:
|
||||||
|
# Get the result of the completed task (if needed)
|
||||||
|
result = future.result()
|
||||||
|
# pbar.set_description(entry["name"])
|
||||||
|
# pbar.update(1)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error processing entry {e}")
|
18
src/aki_prj23_transparenzregister/models/auditor.py
Normal file
18
src/aki_prj23_transparenzregister/models/auditor.py
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
"""Auditor model."""
|
||||||
|
from dataclasses import asdict, dataclass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Auditor:
|
||||||
|
"""Auditor."""
|
||||||
|
|
||||||
|
name: str
|
||||||
|
company: str | None
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
"""_summary_.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: _description_
|
||||||
|
"""
|
||||||
|
return asdict(self)
|
@ -45,19 +45,61 @@ class CompanyRelationship(ABC):
|
|||||||
location: Location
|
location: Location
|
||||||
|
|
||||||
|
|
||||||
|
class FinancialKPIEnum(Enum):
|
||||||
|
"""Financial KPI keys."""
|
||||||
|
|
||||||
|
# Umsatz || Erlöse
|
||||||
|
REVENUE = "revenue"
|
||||||
|
# Jahresüberschuss || Nettoeinkommen
|
||||||
|
NET_INCOME = "net_income"
|
||||||
|
# Ebit
|
||||||
|
EBIT = "ebit"
|
||||||
|
# Ebitda
|
||||||
|
EBITDA = "ebitda"
|
||||||
|
# Bruttogewinn
|
||||||
|
GROSS_PROFIT = "gross_profit"
|
||||||
|
# Betriebsgewinn
|
||||||
|
OPERATING_PROFIT = "operating_profit"
|
||||||
|
# Bilanzsumme
|
||||||
|
ASSETS = "assets"
|
||||||
|
# Gesamtverbindlichkeiten
|
||||||
|
LIABILITIES = "liabilities"
|
||||||
|
# Eigenkapital
|
||||||
|
EQUITY = "equity"
|
||||||
|
# Umlaufvermögen
|
||||||
|
CURRENT_ASSETS = "current_assets"
|
||||||
|
# Kurzfristige Verbindlichkeiten
|
||||||
|
CURRENT_LIABILITIES = "current_liabilities"
|
||||||
|
# Langfristige Verbindlichkeiten
|
||||||
|
LONG_TERM_DEBT = "long_term_debt"
|
||||||
|
# Kurzfristige Verbindlichkeiten
|
||||||
|
SHORT_TERM_DEBT = "short_term_debt"
|
||||||
|
# Barmittel
|
||||||
|
CASH_AND_CASH_EQUIVALENTS = "cash_and_cash_equivalents"
|
||||||
|
# Dividende
|
||||||
|
DIVIDENDS = "dividends"
|
||||||
|
# Cash Flow
|
||||||
|
CASH_FLOW = "cash_flow"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class YearlyResult:
|
||||||
|
"""Company yearly result."""
|
||||||
|
|
||||||
|
year: int
|
||||||
|
kpis: dict[FinancialKPIEnum, float]
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Company:
|
class Company:
|
||||||
"""_summary_.
|
"""Company dataclass."""
|
||||||
|
|
||||||
Returns:
|
|
||||||
_type_: _description_
|
|
||||||
"""
|
|
||||||
|
|
||||||
id: CompanyID
|
id: CompanyID
|
||||||
location: Location
|
location: Location
|
||||||
name: str
|
name: str
|
||||||
last_update: str
|
last_update: str
|
||||||
relationships: list[CompanyRelationship]
|
relationships: list[CompanyRelationship]
|
||||||
|
# yearly_results: list[FinancialResults]
|
||||||
|
|
||||||
def to_dict(self) -> dict:
|
def to_dict(self) -> dict:
|
||||||
"""_summary_.
|
"""_summary_.
|
||||||
|
@ -0,0 +1 @@
|
|||||||
|
"""Everything regarding data extraction from various sources."""
|
@ -0,0 +1,183 @@
|
|||||||
|
"""Fetch data from Bundesanzeiger."""
|
||||||
|
import re
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from deutschland.bundesanzeiger import Bundesanzeiger as Ba
|
||||||
|
|
||||||
|
from aki_prj23_transparenzregister.models.auditor import Auditor
|
||||||
|
from aki_prj23_transparenzregister.models.company import FinancialKPIEnum
|
||||||
|
|
||||||
|
pd.options.mode.chained_assignment = None # type: ignore
|
||||||
|
|
||||||
|
|
||||||
|
class Bundesanzeiger:
|
||||||
|
"""Bundesanzeiger wrapper to export relevant information."""
|
||||||
|
|
||||||
|
def get_information(self, company_name: str, city: str | None) -> pd.DataFrame:
|
||||||
|
"""Extract relevant information from all found yearly results for the given company.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
company_name (str): Name of the company to search for
|
||||||
|
city (Optional[str]): City where the company is registered
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
pd.DataFrame: Result
|
||||||
|
"""
|
||||||
|
ba = Ba()
|
||||||
|
# Get Bundesanzeiger entries for company
|
||||||
|
reports = ba.get_reports(f"{company_name} {city}")
|
||||||
|
# Transform to list of data
|
||||||
|
report_contents = []
|
||||||
|
for key in reports:
|
||||||
|
report_contents.append(reports[key])
|
||||||
|
|
||||||
|
if len(report_contents) == 0:
|
||||||
|
return pd.DataFrame()
|
||||||
|
# Transform to DataFrame and filter out irrelevant entries
|
||||||
|
df_data = pd.DataFrame(report_contents)
|
||||||
|
df_data = self.filter_reports(df_data)
|
||||||
|
|
||||||
|
# Filter out entries of different companies
|
||||||
|
df_data = df_data.loc[df_data.company == company_name]
|
||||||
|
|
||||||
|
# Add Auditor information
|
||||||
|
df_data["auditors"] = df_data.raw_report.apply(self.extract_auditors)
|
||||||
|
|
||||||
|
# Add Financial information
|
||||||
|
df_data["financial_results"] = df_data.raw_report.apply(
|
||||||
|
self.extract_financial_results
|
||||||
|
)
|
||||||
|
|
||||||
|
# Remove irrelevant columns
|
||||||
|
return df_data.drop(["raw_report"], axis=1)
|
||||||
|
|
||||||
|
def filter_reports(self, df_reports: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
"""Returns only reports of type `Jahresabschluss` and extracts the year of the report.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
df_reports (pd.DataFrame): DataFrame containing list of reports
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
pd.DataFrame: Filtered and pruned DataFrame
|
||||||
|
"""
|
||||||
|
df_reports["type"] = df_reports.name.apply(lambda name: name.split(" ")[0])
|
||||||
|
df_reports = df_reports.loc[df_reports.type == "Jahresabschluss"]
|
||||||
|
df_reports["jahr"] = df_reports.name.apply(
|
||||||
|
lambda name: name.split(" ")[-1].split(".")[-1]
|
||||||
|
)
|
||||||
|
return df_reports.drop(["name", "report", "type"], axis=1)
|
||||||
|
|
||||||
|
def extract_auditor_company(self, report: str) -> str | None:
|
||||||
|
"""Extract the name of an auditor company from the given yearly results report.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
report (str): Yearly results report as raw string
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str | None: Name of the auditor company if found, otherwise None
|
||||||
|
"""
|
||||||
|
soup = BeautifulSoup(report, features="html.parser")
|
||||||
|
temp = soup.find_all("b")
|
||||||
|
for elem in temp:
|
||||||
|
br = elem.findChildren("br")
|
||||||
|
if len(br) > 0:
|
||||||
|
return elem.text.split("\n")[1].strip()
|
||||||
|
return None
|
||||||
|
|
||||||
|
def extract_auditors(self, report: str) -> list:
|
||||||
|
"""Find the list of auditors involved in the given yearly results report.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
report (str): Yearly results report as raw string
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
list[Auditor]: List of Auditors found in the given report
|
||||||
|
"""
|
||||||
|
auditor_company = self.extract_auditor_company(report)
|
||||||
|
auditor_regex = r"[a-z A-Z,.'-]+, Wirtschaftsprüfer"
|
||||||
|
hits = re.findall(auditor_regex, report)
|
||||||
|
return [
|
||||||
|
Auditor(hit.replace(", Wirtschaftsprüfer", "").lstrip(), auditor_company)
|
||||||
|
for hit in hits
|
||||||
|
]
|
||||||
|
|
||||||
|
def __extract_kpis__(self, report: str) -> dict:
|
||||||
|
"""Source: https://github.com/bundesAPI/deutschland/pull/87/files#diff-f5b9db5384cf523fcc677056065041e7793bfc4da9cf74c4eebd6fab732739bd.
|
||||||
|
|
||||||
|
Extracts Key Performance Indicators (KPIs) from the financial reports.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
report (str): The yearly report as a parsed string
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: A dictionary containing the extracted KPIs with their report hash as keys and KPIs as values.
|
||||||
|
"""
|
||||||
|
kpis = {}
|
||||||
|
|
||||||
|
# Define KPI patterns to search for
|
||||||
|
kpi_patterns = {
|
||||||
|
FinancialKPIEnum.REVENUE.value: r"(?:revenue|umsatz|erlöse)[:\s]*([\d,.]+[mmb]?)",
|
||||||
|
FinancialKPIEnum.NET_INCOME.value: r"(?:net income|jahresüberschuss|nettoeinkommen|Ergebnis nach Steuern)[:\s]*([\d,.]+[mmb]?)",
|
||||||
|
FinancialKPIEnum.EBIT.value: r"(?:ebit|operating income)[:\s]*([\d,.]+[mmb]?)",
|
||||||
|
FinancialKPIEnum.EBITDA.value: r"(?:ebitda)[:\s]*([\d,.]+[mmb]?)",
|
||||||
|
FinancialKPIEnum.GROSS_PROFIT.value: r"(?:gross profit|bruttogewinn)[:\s]*([\d,.]+[mmb]?)",
|
||||||
|
FinancialKPIEnum.OPERATING_PROFIT.value: r"(?:operating profit|betriebsgewinn)[:\s]*([\d,.]+[mmb]?)",
|
||||||
|
FinancialKPIEnum.ASSETS.value: r"(?:total assets|bilanzsumme)[:\s]*([\d,.]+[mmb]?)",
|
||||||
|
FinancialKPIEnum.LIABILITIES.value: r"(?:total liabilities|gesamtverbindlichkeiten)[:\s]*([\d,.]+[mmb]?)",
|
||||||
|
FinancialKPIEnum.EQUITY.value: r"(?:shareholders'? equity|eigenkapital)[:\s]*([\d,.]+[mmb]?)",
|
||||||
|
FinancialKPIEnum.CURRENT_ASSETS.value: r"(?:current assets|umlaufvermögen)[:\s]*([\d,.]+[mmb]?)",
|
||||||
|
FinancialKPIEnum.CURRENT_LIABILITIES.value: r"(?:current liabilities|kurzfristige verbindlichkeiten)[:\s]*([\d,.]+[mmb]?)",
|
||||||
|
FinancialKPIEnum.LONG_TERM_DEBT.value: r"(?:long[-\s]?term debt|langfristige verbindlichkeiten)[:\s]*([\d,.]+[mmb]?)",
|
||||||
|
FinancialKPIEnum.SHORT_TERM_DEBT.value: r"(?:short[-\s]?term debt|kurzfristige verbindlichkeiten)[:\s]*([\d,.]+[mmb]?)",
|
||||||
|
FinancialKPIEnum.CASH_AND_CASH_EQUIVALENTS.value: r"(?:cash (?:and cash equivalents)?|barmittel)[:\s]*([\d,.]+[mmb]?)",
|
||||||
|
FinancialKPIEnum.DIVIDENDS.value: r"(?:dividends?|dividende)[:\s]*([\d,.]+[mmb]?)",
|
||||||
|
FinancialKPIEnum.CASH_FLOW.value: r"(?:cash flow|cashflow|cash flow from operating activities)[:\s]*([\d,.]+[mmb]?)",
|
||||||
|
}
|
||||||
|
|
||||||
|
for kpi, pattern in kpi_patterns.items():
|
||||||
|
match = re.search(pattern, report, flags=re.IGNORECASE | re.UNICODE)
|
||||||
|
if match:
|
||||||
|
value = match.group(1)
|
||||||
|
|
||||||
|
# Clean and validate the extracted number
|
||||||
|
try:
|
||||||
|
if not value: # Check if value is empty
|
||||||
|
cleaned_value = None
|
||||||
|
else:
|
||||||
|
multiplier = 1
|
||||||
|
if value[-1].lower() == "m":
|
||||||
|
value = value[:-1]
|
||||||
|
multiplier = 1_000_000
|
||||||
|
elif value[-1].lower() == "b":
|
||||||
|
value = value[:-1]
|
||||||
|
multiplier = 1_000_000_000
|
||||||
|
|
||||||
|
# Remove commas after checking for multipliers
|
||||||
|
value = value.replace(".", "").replace(",", ".").strip()
|
||||||
|
cleaned_value = float(value) * multiplier
|
||||||
|
except ValueError:
|
||||||
|
cleaned_value = None
|
||||||
|
|
||||||
|
if cleaned_value is not None:
|
||||||
|
kpis[kpi] = cleaned_value
|
||||||
|
return kpis
|
||||||
|
|
||||||
|
def extract_financial_results(self, report: str) -> dict:
|
||||||
|
"""Extract financial data from given report.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
report (str): Report to be analyzed
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: Results
|
||||||
|
"""
|
||||||
|
report_parsed = (
|
||||||
|
BeautifulSoup(report, features="html.parser").get_text().replace("\n", " ")
|
||||||
|
)
|
||||||
|
return self.__extract_kpis__(report_parsed)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
ba_wrapper = Bundesanzeiger()
|
||||||
|
ba_wrapper.get_information("Atos IT-Dienstleistung und Beratung GmbH", None)
|
@ -1,7 +1,10 @@
|
|||||||
"""CompanyMongoService."""
|
"""CompanyMongoService."""
|
||||||
from pymongo.results import InsertOneResult
|
from threading import Lock
|
||||||
|
|
||||||
from aki_prj23_transparenzregister.models.company import Company, CompanyID
|
from bson.objectid import ObjectId
|
||||||
|
from pymongo.results import InsertOneResult, UpdateResult
|
||||||
|
|
||||||
|
from aki_prj23_transparenzregister.models.company import Company
|
||||||
from aki_prj23_transparenzregister.utils.mongo.connector import MongoConnector
|
from aki_prj23_transparenzregister.utils.mongo.connector import MongoConnector
|
||||||
|
|
||||||
|
|
||||||
@ -15,6 +18,7 @@ class CompanyMongoService:
|
|||||||
connector (MongoConnector): _description_
|
connector (MongoConnector): _description_
|
||||||
"""
|
"""
|
||||||
self.collection = connector.database["companies"]
|
self.collection = connector.database["companies"]
|
||||||
|
self.lock = Lock() # Create a lock for synchronization
|
||||||
|
|
||||||
def get_all(self) -> list[Company]:
|
def get_all(self) -> list[Company]:
|
||||||
"""_summary_.
|
"""_summary_.
|
||||||
@ -22,10 +26,11 @@ class CompanyMongoService:
|
|||||||
Returns:
|
Returns:
|
||||||
list[Company]: _description_
|
list[Company]: _description_
|
||||||
"""
|
"""
|
||||||
result = self.collection.find()
|
with self.lock:
|
||||||
return list(result)
|
result = self.collection.find()
|
||||||
|
return list(result)
|
||||||
|
|
||||||
def get_by_id(self, id: CompanyID) -> Company | None:
|
def get_by_id(self, id: str) -> Company | None:
|
||||||
"""_summary_.
|
"""_summary_.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@ -34,10 +39,46 @@ class CompanyMongoService:
|
|||||||
Returns:
|
Returns:
|
||||||
Company | None: _description_
|
Company | None: _description_
|
||||||
"""
|
"""
|
||||||
result = list(self.collection.find({"id": id}))
|
with self.lock:
|
||||||
if len(result) == 1:
|
result = list(self.collection.find({"id": id}))
|
||||||
return result[0]
|
if len(result) == 1:
|
||||||
return None
|
return result[0]
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_by_object_id(self, _id: str) -> dict | None:
|
||||||
|
"""Find an object by given _id.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
_id (str): ID
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Company | None: Entry if found, otherwise None
|
||||||
|
"""
|
||||||
|
with self.lock:
|
||||||
|
result = list(self.collection.find({"_id": ObjectId(_id)}))
|
||||||
|
if len(result) == 1:
|
||||||
|
return result[0]
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_where_no_financial_results(self) -> list[dict]:
|
||||||
|
"""Get all entries that have no yearly_results.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
list[dict]: List of companies found
|
||||||
|
"""
|
||||||
|
with self.lock:
|
||||||
|
return list(
|
||||||
|
self.collection.find({"$or": [{"yearly_results": {"$exists": False}}]})
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_where_yearly_results(self) -> list[dict]:
|
||||||
|
"""Get a list of all companies with valid yearly_results (interesting entries for data loader).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
list[dict]: List of companies
|
||||||
|
"""
|
||||||
|
with self.lock:
|
||||||
|
return list(self.collection.find({"yearly_results": {"$gt": {}}}))
|
||||||
|
|
||||||
def insert(self, company: Company) -> InsertOneResult:
|
def insert(self, company: Company) -> InsertOneResult:
|
||||||
"""_summary_.
|
"""_summary_.
|
||||||
@ -48,4 +89,20 @@ class CompanyMongoService:
|
|||||||
Returns:
|
Returns:
|
||||||
_type_: _description_
|
_type_: _description_
|
||||||
"""
|
"""
|
||||||
return self.collection.insert_one(company.to_dict())
|
with self.lock:
|
||||||
|
return self.collection.insert_one(company.to_dict())
|
||||||
|
|
||||||
|
def add_yearly_results(self, _id: str, yearly_results: dict) -> UpdateResult:
|
||||||
|
"""Add the `yearly_results` field to a Company entry.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
_id (str): ID of the object
|
||||||
|
yearly_results (dict): Yearly results dictionary
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
UpdateResult: Result
|
||||||
|
"""
|
||||||
|
with self.lock:
|
||||||
|
return self.collection.update_one(
|
||||||
|
{"_id": ObjectId(_id)}, {"$set": {"yearly_results": yearly_results}}
|
||||||
|
)
|
||||||
|
36
tests/apps/enrich_company_financials_test.py
Normal file
36
tests/apps/enrich_company_financials_test.py
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
"""Tests for the enrich_company_financials module."""
|
||||||
|
from unittest.mock import Mock, patch
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
from aki_prj23_transparenzregister.apps import enrich_company_financials
|
||||||
|
from aki_prj23_transparenzregister.models.auditor import Auditor
|
||||||
|
|
||||||
|
|
||||||
|
def test_import_enrich_company_financials() -> None:
|
||||||
|
"""Testing if the enrich_company_financials can be imported."""
|
||||||
|
assert enrich_company_financials
|
||||||
|
|
||||||
|
|
||||||
|
@patch(
|
||||||
|
"aki_prj23_transparenzregister.apps.enrich_company_financials.Bundesanzeiger.get_information"
|
||||||
|
)
|
||||||
|
@patch(
|
||||||
|
"aki_prj23_transparenzregister.apps.enrich_company_financials.CompanyMongoService"
|
||||||
|
)
|
||||||
|
def test_work(mock_compnay_service: Mock, mock_bundesanzeiger: Mock) -> None:
|
||||||
|
mock_bundesanzeiger.return_value = pd.DataFrame(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"jahr": "2042",
|
||||||
|
"auditors": [Auditor(name="", company="")],
|
||||||
|
"financial_results": [],
|
||||||
|
}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
# mock_compnay_service.add_yearly_resreturn_value
|
||||||
|
enrich_company_financials.work(
|
||||||
|
{"_id": "", "name": "ABC AG", "location": {"city": "Haltern am See"}},
|
||||||
|
mock_compnay_service,
|
||||||
|
)
|
||||||
|
assert enrich_company_financials
|
1
tests/utils/data_extraction/__init__.py
Normal file
1
tests/utils/data_extraction/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
"""Tests for data_extraction."""
|
111
tests/utils/data_extraction/bundesanzeiger_test.py
Normal file
111
tests/utils/data_extraction/bundesanzeiger_test.py
Normal file
@ -0,0 +1,111 @@
|
|||||||
|
from unittest.mock import Mock, patch
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
from aki_prj23_transparenzregister.models.company import FinancialKPIEnum
|
||||||
|
from aki_prj23_transparenzregister.utils.data_extraction.bundesanzeiger import (
|
||||||
|
Bundesanzeiger,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_auditor_company_no_hits() -> None:
|
||||||
|
input_data = """
|
||||||
|
<b>
|
||||||
|
Nothing to see here
|
||||||
|
</b>
|
||||||
|
"""
|
||||||
|
ba = Bundesanzeiger()
|
||||||
|
result = ba.extract_auditor_company(input_data)
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_auditor_company() -> None:
|
||||||
|
company_name = "Korrupte Wirtschaftsprüfer GmbH & Co. KG"
|
||||||
|
input_data = f"""
|
||||||
|
<b>
|
||||||
|
{company_name}
|
||||||
|
<br>
|
||||||
|
Max Mustermann
|
||||||
|
</b>
|
||||||
|
"""
|
||||||
|
ba = Bundesanzeiger()
|
||||||
|
result = ba.extract_auditor_company(input_data)
|
||||||
|
assert result == company_name
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_kpis() -> None:
|
||||||
|
input_data = """
|
||||||
|
Die Prj23_Transparenzregister GmbH erwirtschaftete einen Jahresüberschuss 10.000,43 €.
|
||||||
|
Des Weiteren sanken die Gesamtverbindlichkeiten 42,00 €
|
||||||
|
"""
|
||||||
|
ba = Bundesanzeiger()
|
||||||
|
result = ba.__extract_kpis__(input_data)
|
||||||
|
|
||||||
|
net_income = 10000.43
|
||||||
|
liabilities = 42.00
|
||||||
|
|
||||||
|
assert result[FinancialKPIEnum.NET_INCOME.value] == net_income
|
||||||
|
assert result[FinancialKPIEnum.LIABILITIES.value] == liabilities
|
||||||
|
|
||||||
|
|
||||||
|
def test_extracct_financial_results() -> None:
|
||||||
|
input_data = """
|
||||||
|
<br>
|
||||||
|
Die Prj23_Transparenzregister GmbH erwirtschaftete einen Jahresüberschuss 10.000,43 €.
|
||||||
|
</br>
|
||||||
|
<h2>Dies ist ein Platzhalter, der ignoriert werden soll</h2>
|
||||||
|
<b>Des Weiteren sanken die Gesamtverbindlichkeiten 42,00 €</b>
|
||||||
|
"""
|
||||||
|
ba = Bundesanzeiger()
|
||||||
|
result = ba.extract_financial_results(input_data)
|
||||||
|
|
||||||
|
net_income = 10000.43
|
||||||
|
liabilities = 42.00
|
||||||
|
|
||||||
|
assert result[FinancialKPIEnum.NET_INCOME.value] == net_income
|
||||||
|
assert result[FinancialKPIEnum.LIABILITIES.value] == liabilities
|
||||||
|
|
||||||
|
|
||||||
|
def test_filter_reports() -> None:
|
||||||
|
test_data = [
|
||||||
|
{"name": "Bedienungsanleitung", "report": "", "raw_report": ""},
|
||||||
|
{"name": "Jahresabschluss 1998", "report": "", "raw_report": ""},
|
||||||
|
]
|
||||||
|
test_df = pd.DataFrame(test_data)
|
||||||
|
ba = Bundesanzeiger()
|
||||||
|
result = ba.filter_reports(test_df)
|
||||||
|
assert len(result) == 1
|
||||||
|
assert result.iloc[0].jahr == "1998"
|
||||||
|
|
||||||
|
|
||||||
|
@patch(
|
||||||
|
"aki_prj23_transparenzregister.utils.data_extraction.bundesanzeiger.Ba.get_reports"
|
||||||
|
)
|
||||||
|
def test_get_information(mock_bundesanzeiger: Mock) -> None:
|
||||||
|
mock_bundesanzeiger.return_value = {
|
||||||
|
"1": {
|
||||||
|
"name": "Bedienungsanleitung",
|
||||||
|
"report": "",
|
||||||
|
"company": "",
|
||||||
|
"raw_report": "",
|
||||||
|
},
|
||||||
|
"2": {
|
||||||
|
"name": "Jahresabschluss 1998",
|
||||||
|
"report": "",
|
||||||
|
"company": "PRJ 23 Transparenzregister GmbH",
|
||||||
|
"raw_report": "",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
ba = Bundesanzeiger()
|
||||||
|
result = ba.get_information("PRJ 23 Transparenzregister GmbH", "Iserlohn")
|
||||||
|
assert len(result) == 1
|
||||||
|
|
||||||
|
|
||||||
|
@patch(
|
||||||
|
"aki_prj23_transparenzregister.utils.data_extraction.bundesanzeiger.Ba.get_reports"
|
||||||
|
)
|
||||||
|
def test_get_information_no_results(mock_bundesanzeiger: Mock) -> None:
|
||||||
|
mock_bundesanzeiger.return_value = {}
|
||||||
|
ba = Bundesanzeiger()
|
||||||
|
result = ba.get_information("PRJ 23 Transparenzregister GmbH", "Iserlohn")
|
||||||
|
assert len(result) == 0
|
@ -3,7 +3,7 @@ from unittest.mock import Mock
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from aki_prj23_transparenzregister.models.company import Company
|
from aki_prj23_transparenzregister.models.company import Company, CompanyID, Location
|
||||||
from aki_prj23_transparenzregister.utils.mongo.company_mongo_service import (
|
from aki_prj23_transparenzregister.utils.mongo.company_mongo_service import (
|
||||||
CompanyMongoService,
|
CompanyMongoService,
|
||||||
)
|
)
|
||||||
@ -73,7 +73,7 @@ def test_by_id_no_result(mock_mongo_connector: Mock, mock_collection: Mock) -> N
|
|||||||
mock_mongo_connector.database = {"companies": mock_collection}
|
mock_mongo_connector.database = {"companies": mock_collection}
|
||||||
service = CompanyMongoService(mock_mongo_connector)
|
service = CompanyMongoService(mock_mongo_connector)
|
||||||
mock_collection.find.return_value = []
|
mock_collection.find.return_value = []
|
||||||
assert service.get_by_id("Does not exist") is None # type: ignore
|
assert service.get_by_id("Does not exist") is None
|
||||||
|
|
||||||
|
|
||||||
def test_by_id_result(mock_mongo_connector: Mock, mock_collection: Mock) -> None:
|
def test_by_id_result(mock_mongo_connector: Mock, mock_collection: Mock) -> None:
|
||||||
@ -87,7 +87,7 @@ def test_by_id_result(mock_mongo_connector: Mock, mock_collection: Mock) -> None
|
|||||||
service = CompanyMongoService(mock_mongo_connector)
|
service = CompanyMongoService(mock_mongo_connector)
|
||||||
mock_entry = {"id": "Does exist", "vaue": 42}
|
mock_entry = {"id": "Does exist", "vaue": 42}
|
||||||
mock_collection.find.return_value = [mock_entry]
|
mock_collection.find.return_value = [mock_entry]
|
||||||
assert service.get_by_id("Does exist") == mock_entry # type: ignore
|
assert service.get_by_id("Does exist") == mock_entry
|
||||||
|
|
||||||
|
|
||||||
def test_insert(mock_mongo_connector: Mock, mock_collection: Mock) -> None:
|
def test_insert(mock_mongo_connector: Mock, mock_collection: Mock) -> None:
|
||||||
@ -101,4 +101,55 @@ def test_insert(mock_mongo_connector: Mock, mock_collection: Mock) -> None:
|
|||||||
service = CompanyMongoService(mock_mongo_connector)
|
service = CompanyMongoService(mock_mongo_connector)
|
||||||
mock_result = 42
|
mock_result = 42
|
||||||
mock_collection.insert_one.return_value = mock_result
|
mock_collection.insert_one.return_value = mock_result
|
||||||
assert service.insert(Company(None, None, "", "", [])) == mock_result # type: ignore
|
assert (
|
||||||
|
service.insert(
|
||||||
|
Company(CompanyID("", ""), Location("Hier und Dort"), "", "", [])
|
||||||
|
)
|
||||||
|
== mock_result
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_by_object_id_no_result(
|
||||||
|
mock_mongo_connector: Mock, mock_collection: Mock
|
||||||
|
) -> None:
|
||||||
|
mock_mongo_connector.database = {"companies": mock_collection}
|
||||||
|
service = CompanyMongoService(mock_mongo_connector)
|
||||||
|
mock_result: list = []
|
||||||
|
mock_collection.find.return_value = mock_result
|
||||||
|
assert service.get_by_object_id("649f16a1e198338c3b44299e") is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_by_object_id(mock_mongo_connector: Mock, mock_collection: Mock) -> None:
|
||||||
|
mock_mongo_connector.database = {"companies": mock_collection}
|
||||||
|
service = CompanyMongoService(mock_mongo_connector)
|
||||||
|
mock_result: list = [{"_id": "abc", "brille?": "Fielmann", "Hotel?": "Trivago"}]
|
||||||
|
mock_collection.find.return_value = mock_result
|
||||||
|
assert service.get_by_object_id("612316a1e198338c3b44299e") == mock_result[0]
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_where_financial_no_results(
|
||||||
|
mock_mongo_connector: Mock, mock_collection: Mock
|
||||||
|
) -> None:
|
||||||
|
mock_mongo_connector.database = {"companies": mock_collection}
|
||||||
|
service = CompanyMongoService(mock_mongo_connector)
|
||||||
|
mock_result: list = [{"_id": "abc", "brille?": "Fielmann", "Hotel?": "Trivago"}]
|
||||||
|
mock_collection.find.return_value = mock_result
|
||||||
|
assert service.get_where_no_financial_results() == mock_result
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_where_financial_results(
|
||||||
|
mock_mongo_connector: Mock, mock_collection: Mock
|
||||||
|
) -> None:
|
||||||
|
mock_mongo_connector.database = {"companies": mock_collection}
|
||||||
|
service = CompanyMongoService(mock_mongo_connector)
|
||||||
|
mock_result: list = [{"_id": "abc", "brille?": "Fielmann", "Hotel?": "Trivago"}]
|
||||||
|
mock_collection.find.return_value = mock_result
|
||||||
|
assert service.get_where_yearly_results() == mock_result
|
||||||
|
|
||||||
|
|
||||||
|
def test_add_yearly_reslults(mock_mongo_connector: Mock, mock_collection: Mock) -> None:
|
||||||
|
mock_mongo_connector.database = {"companies": mock_collection}
|
||||||
|
service = CompanyMongoService(mock_mongo_connector)
|
||||||
|
mock_result: list = [{"_id": "abc", "brille?": "Fielmann", "Hotel?": "Trivago"}]
|
||||||
|
mock_collection.update_one.return_value = mock_result
|
||||||
|
assert service.add_yearly_results("612316a1e198338c3b44299e", {}) == mock_result
|
||||||
|
Loading…
x
Reference in New Issue
Block a user