mirror of
https://github.com/fhswf/aki_prj23_transparenzregister.git
synced 2025-06-21 15:13:55 +02:00
refactor: Pull Auditor extraction into Bundesanzeiger utils
This commit is contained in:
@ -18,235 +18,119 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>date</th>\n",
|
||||
" <th>company</th>\n",
|
||||
" <th>raw_report</th>\n",
|
||||
" <th>jahr</th>\n",
|
||||
" <th>auditors</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>2023-07-07</td>\n",
|
||||
" <td>Atos IT-Dienstleistung und Beratung GmbH</td>\n",
|
||||
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
||||
" <td>2021</td>\n",
|
||||
" <td>[]</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>2023-05-10</td>\n",
|
||||
" <td>Atos IT-Dienstleistung und Beratung GmbH</td>\n",
|
||||
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
||||
" <td>2021</td>\n",
|
||||
" <td>[Auditor(name='Eckhard Lewe', company='Grant T...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>2022-03-25</td>\n",
|
||||
" <td>Atos IT-Dienstleistung und Beratung GmbH</td>\n",
|
||||
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
||||
" <td>2020</td>\n",
|
||||
" <td>[Auditor(name='Eckhard Lewe', company='Warth &...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>5</th>\n",
|
||||
" <td>2021-03-11</td>\n",
|
||||
" <td>Atos IT-Dienstleistung und Beratung GmbH</td>\n",
|
||||
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
||||
" <td>2019</td>\n",
|
||||
" <td>[Auditor(name='Eckhard Lewe', company='Warth &...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>6</th>\n",
|
||||
" <td>2020-03-24</td>\n",
|
||||
" <td>Atos IT-Dienstleistung und Beratung GmbH</td>\n",
|
||||
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
||||
" <td>2018</td>\n",
|
||||
" <td>[Auditor(name='Ulrich Diersch', company='Warth...</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" date company \\\n",
|
||||
"0 2023-07-07 Atos IT-Dienstleistung und Beratung GmbH \n",
|
||||
"2 2023-05-10 Atos IT-Dienstleistung und Beratung GmbH \n",
|
||||
"4 2022-03-25 Atos IT-Dienstleistung und Beratung GmbH \n",
|
||||
"5 2021-03-11 Atos IT-Dienstleistung und Beratung GmbH \n",
|
||||
"6 2020-03-24 Atos IT-Dienstleistung und Beratung GmbH \n",
|
||||
"\n",
|
||||
" raw_report jahr \\\n",
|
||||
"0 <div class=\"publication_container\">\\n <div cla... 2021 \n",
|
||||
"2 <div class=\"publication_container\">\\n <div cla... 2021 \n",
|
||||
"4 <div class=\"publication_container\">\\n <div cla... 2020 \n",
|
||||
"5 <div class=\"publication_container\">\\n <div cla... 2019 \n",
|
||||
"6 <div class=\"publication_container\">\\n <div cla... 2018 \n",
|
||||
"\n",
|
||||
" auditors \n",
|
||||
"0 [] \n",
|
||||
"2 [Auditor(name='Eckhard Lewe', company='Grant T... \n",
|
||||
"4 [Auditor(name='Eckhard Lewe', company='Warth &... \n",
|
||||
"5 [Auditor(name='Eckhard Lewe', company='Warth &... \n",
|
||||
"6 [Auditor(name='Ulrich Diersch', company='Warth... "
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"from deutschland.bundesanzeiger import Bundesanzeiger"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"dict_keys(['7e53c9211957c6a4c17264ab86946c3b', 'c1051233030a8e0232523052fd4a2310', '57d129e6fd7505d567fa13919e5e6bdd'])\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"ba = Bundesanzeiger()\n",
|
||||
"reports = ba.get_reports(\n",
|
||||
" \"Volkswagen Economy Service Erdle Bernhard Erdle GmbH\"\n",
|
||||
") # \"Atos IT-Dienstleistung und Beratung GmbH\")\n",
|
||||
"print(reports.keys())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"report_contents = []\n",
|
||||
"for key in reports.keys():\n",
|
||||
" report_contents.append(reports[key])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>date</th>\n",
|
||||
" <th>name</th>\n",
|
||||
" <th>company</th>\n",
|
||||
" <th>report</th>\n",
|
||||
" <th>raw_report</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>2023-07-11</td>\n",
|
||||
" <td>Jahresabschluss zum Geschäftsjahr vom 01.01.20...</td>\n",
|
||||
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
|
||||
" <td>\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se...</td>\n",
|
||||
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>2023-05-25</td>\n",
|
||||
" <td>Jahresabschluss zum Geschäftsjahr vom 01.01.20...</td>\n",
|
||||
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
|
||||
" <td>\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se...</td>\n",
|
||||
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>2023-05-24</td>\n",
|
||||
" <td>Jahresabschluss zum Geschäftsjahr vom 01.01.20...</td>\n",
|
||||
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
|
||||
" <td>\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se...</td>\n",
|
||||
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" date name \\\n",
|
||||
"0 2023-07-11 Jahresabschluss zum Geschäftsjahr vom 01.01.20... \n",
|
||||
"1 2023-05-25 Jahresabschluss zum Geschäftsjahr vom 01.01.20... \n",
|
||||
"2 2023-05-24 Jahresabschluss zum Geschäftsjahr vom 01.01.20... \n",
|
||||
"\n",
|
||||
" company \\\n",
|
||||
"0 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
|
||||
"1 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
|
||||
"2 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
|
||||
"\n",
|
||||
" report \\\n",
|
||||
"0 \\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se... \n",
|
||||
"1 \\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se... \n",
|
||||
"2 \\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se... \n",
|
||||
"\n",
|
||||
" raw_report \n",
|
||||
"0 <div class=\"publication_container\">\\n <div cla... \n",
|
||||
"1 <div class=\"publication_container\">\\n <div cla... \n",
|
||||
"2 <div class=\"publication_container\">\\n <div cla... "
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df_reports = pd.DataFrame(report_contents)\n",
|
||||
"df_reports.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>date</th>\n",
|
||||
" <th>name</th>\n",
|
||||
" <th>company</th>\n",
|
||||
" <th>report</th>\n",
|
||||
" <th>raw_report</th>\n",
|
||||
" <th>type</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>2023-07-11</td>\n",
|
||||
" <td>Jahresabschluss zum Geschäftsjahr vom 01.01.20...</td>\n",
|
||||
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
|
||||
" <td>\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se...</td>\n",
|
||||
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
||||
" <td>Jahresabschluss</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>2023-05-25</td>\n",
|
||||
" <td>Jahresabschluss zum Geschäftsjahr vom 01.01.20...</td>\n",
|
||||
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
|
||||
" <td>\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se...</td>\n",
|
||||
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
||||
" <td>Jahresabschluss</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>2023-05-24</td>\n",
|
||||
" <td>Jahresabschluss zum Geschäftsjahr vom 01.01.20...</td>\n",
|
||||
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
|
||||
" <td>\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se...</td>\n",
|
||||
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
||||
" <td>Jahresabschluss</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" date name \\\n",
|
||||
"0 2023-07-11 Jahresabschluss zum Geschäftsjahr vom 01.01.20... \n",
|
||||
"1 2023-05-25 Jahresabschluss zum Geschäftsjahr vom 01.01.20... \n",
|
||||
"2 2023-05-24 Jahresabschluss zum Geschäftsjahr vom 01.01.20... \n",
|
||||
"\n",
|
||||
" company \\\n",
|
||||
"0 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
|
||||
"1 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
|
||||
"2 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
|
||||
"\n",
|
||||
" report \\\n",
|
||||
"0 \\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se... \n",
|
||||
"1 \\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se... \n",
|
||||
"2 \\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se... \n",
|
||||
"\n",
|
||||
" raw_report type \n",
|
||||
"0 <div class=\"publication_container\">\\n <div cla... Jahresabschluss \n",
|
||||
"1 <div class=\"publication_container\">\\n <div cla... Jahresabschluss \n",
|
||||
"2 <div class=\"publication_container\">\\n <div cla... Jahresabschluss "
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df_reports[\"type\"] = df_reports.name.apply(lambda name: name.split(\" \")[0])\n",
|
||||
"\n",
|
||||
"from aki_prj23_transparenzregister.utils.data_extraction.bundesanzeiger import (\n",
|
||||
" Bundesanzeiger,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"ba_wrapper = Bundesanzeiger()\n",
|
||||
"df_reports = ba_wrapper.get_information(\"Atos IT-Dienstleistung und Beratung GmbH\")\n",
|
||||
"df_reports.head()"
|
||||
]
|
||||
},
|
||||
|
Reference in New Issue
Block a user