mirror of
https://github.com/fhswf/aki_prj23_transparenzregister.git
synced 2025-12-17 20:00:43 +01:00
1790 lines
69 KiB
Plaintext
1790 lines
69 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"attachments": {},
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Daten Extraktion aus dem Bundesanzeiger"
|
|
]
|
|
},
|
|
{
|
|
"attachments": {},
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Vorbereitung"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 77,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>date</th>\n",
|
|
" <th>company</th>\n",
|
|
" <th>raw_report</th>\n",
|
|
" <th>jahr</th>\n",
|
|
" <th>auditors</th>\n",
|
|
" <th>financial_results</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>2022-10-21</td>\n",
|
|
" <td>Stadtwerke Haltern am See Gesellschaft mit bes...</td>\n",
|
|
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
|
" <td>2021</td>\n",
|
|
" <td>[Auditor(name='Volker Voelcker', company='Pric...</td>\n",
|
|
" <td>{'revenue': 46275.0, 'net_income': 1757.0, 'eb...</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>2021-10-12</td>\n",
|
|
" <td>Stadtwerke Haltern am See Gesellschaft mit bes...</td>\n",
|
|
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
|
" <td>2020</td>\n",
|
|
" <td>[Auditor(name='Hubert Ahlers', company='Pricew...</td>\n",
|
|
" <td>{'revenue': 47459.0, 'net_income': 1661.0, 'eb...</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5</th>\n",
|
|
" <td>2020-12-03</td>\n",
|
|
" <td>Stadtwerke Haltern am See Gesellschaft mit bes...</td>\n",
|
|
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
|
" <td>2019</td>\n",
|
|
" <td>[Auditor(name='Hubert Ahlers', company='Pricew...</td>\n",
|
|
" <td>{'revenue': 45575.0, 'net_income': 1599.0, 'eb...</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>6</th>\n",
|
|
" <td>2020-01-09</td>\n",
|
|
" <td>Stadtwerke Haltern am See Gesellschaft mit bes...</td>\n",
|
|
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
|
" <td>2018</td>\n",
|
|
" <td>[Auditor(name='Hubert Ahlers', company='Pricew...</td>\n",
|
|
" <td>{'revenue': 43898.0, 'net_income': 2043.0, 'eb...</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>7</th>\n",
|
|
" <td>2019-10-10</td>\n",
|
|
" <td>Stadtwerke Haltern am See Gesellschaft mit bes...</td>\n",
|
|
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
|
" <td>2017</td>\n",
|
|
" <td>[]</td>\n",
|
|
" <td>{}</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" date company \\\n",
|
|
"1 2022-10-21 Stadtwerke Haltern am See Gesellschaft mit bes... \n",
|
|
"3 2021-10-12 Stadtwerke Haltern am See Gesellschaft mit bes... \n",
|
|
"5 2020-12-03 Stadtwerke Haltern am See Gesellschaft mit bes... \n",
|
|
"6 2020-01-09 Stadtwerke Haltern am See Gesellschaft mit bes... \n",
|
|
"7 2019-10-10 Stadtwerke Haltern am See Gesellschaft mit bes... \n",
|
|
"\n",
|
|
" raw_report jahr \\\n",
|
|
"1 <div class=\"publication_container\">\\n <div cla... 2021 \n",
|
|
"3 <div class=\"publication_container\">\\n <div cla... 2020 \n",
|
|
"5 <div class=\"publication_container\">\\n <div cla... 2019 \n",
|
|
"6 <div class=\"publication_container\">\\n <div cla... 2018 \n",
|
|
"7 <div class=\"publication_container\">\\n <div cla... 2017 \n",
|
|
"\n",
|
|
" auditors \\\n",
|
|
"1 [Auditor(name='Volker Voelcker', company='Pric... \n",
|
|
"3 [Auditor(name='Hubert Ahlers', company='Pricew... \n",
|
|
"5 [Auditor(name='Hubert Ahlers', company='Pricew... \n",
|
|
"6 [Auditor(name='Hubert Ahlers', company='Pricew... \n",
|
|
"7 [] \n",
|
|
"\n",
|
|
" financial_results \n",
|
|
"1 {'revenue': 46275.0, 'net_income': 1757.0, 'eb... \n",
|
|
"3 {'revenue': 47459.0, 'net_income': 1661.0, 'eb... \n",
|
|
"5 {'revenue': 45575.0, 'net_income': 1599.0, 'eb... \n",
|
|
"6 {'revenue': 43898.0, 'net_income': 2043.0, 'eb... \n",
|
|
"7 {} "
|
|
]
|
|
},
|
|
"execution_count": 77,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"\n",
|
|
"from aki_prj23_transparenzregister.utils.data_extraction.bundesanzeiger import (\n",
|
|
" Bundesanzeiger,\n",
|
|
")\n",
|
|
"\n",
|
|
"ba_wrapper = Bundesanzeiger()\n",
|
|
"# df_reports = ba_wrapper.get_information(\"Törmer Energy Solar 1 GmbH & Co. KG\", \"\")\n",
|
|
"# df_reports = ba_wrapper.get_information(\"Atos IT-Dienstleistung und Beratung GmbH\", \"\")\n",
|
|
"df_reports = ba_wrapper.get_information(\n",
|
|
" \"Stadtwerke Haltern am See Gesellschaft mit beschränkter Haftung\", \"\"\n",
|
|
")\n",
|
|
"df_reports.head()"
|
|
]
|
|
},
|
|
{
|
|
"attachments": {},
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Daten Extraktion"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 78,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from bs4 import BeautifulSoup\n",
|
|
"from io import StringIO"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 79,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"sample_report = df_reports.iloc[1].raw_report"
|
|
]
|
|
},
|
|
{
|
|
"attachments": {},
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Aufsichtsrat"
|
|
]
|
|
},
|
|
{
|
|
"attachments": {},
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"**TODO**"
|
|
]
|
|
},
|
|
{
|
|
"attachments": {},
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Bilanz bzw. GuV"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 80,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"MultiIndex([('Unnamed: 0_level_0', 'Unnamed: 0_level_1'),\n",
|
|
" ( '2020', 'T€'),\n",
|
|
" ( '2019', 'T€'),\n",
|
|
" ( 'Veränderung', 'T€'),\n",
|
|
" ( 'Veränderung', '%')],\n",
|
|
" )\n",
|
|
"Unnamed: 0_level_0 Unnamed: 0_level_1 object\n",
|
|
"2020 T€ float64\n",
|
|
"2019 T€ float64\n",
|
|
"Veränderung T€ int64\n",
|
|
" % int64\n",
|
|
"dtype: object\n",
|
|
"MultiIndex([('Unnamed: 0_level_0', 'Unnamed: 0_level_1'),\n",
|
|
" ( '2020', 'T€'),\n",
|
|
" ( '2020', '%'),\n",
|
|
" ( '2019', 'T€'),\n",
|
|
" ( '2019', '%'),\n",
|
|
" ( 'Veränderungen', 'T€'),\n",
|
|
" ( 'Veränderungen', '%')],\n",
|
|
" )\n",
|
|
"Unnamed: 0_level_0 Unnamed: 0_level_1 object\n",
|
|
"2020 T€ float64\n",
|
|
" % int64\n",
|
|
"2019 T€ float64\n",
|
|
" % int64\n",
|
|
"Veränderungen T€ float64\n",
|
|
" % int64\n",
|
|
"dtype: object\n",
|
|
"MultiIndex([('Unnamed: 0_level_0', 'gerundet'),\n",
|
|
" ( '2020', 'T€'),\n",
|
|
" ( '2019', 'T€'),\n",
|
|
" ( 'Veränderung', 'T€'),\n",
|
|
" ( 'Veränderung', '%')],\n",
|
|
" )\n",
|
|
"Unnamed: 0_level_0 gerundet object\n",
|
|
"2020 T€ float64\n",
|
|
"2019 T€ float64\n",
|
|
"Veränderung T€ float64\n",
|
|
" % int64\n",
|
|
"dtype: object\n",
|
|
"MultiIndex([('Unnamed: 0_level_0', 'unkonsolidiert gerundet'),\n",
|
|
" ( '2020', 'T€'),\n",
|
|
" ( '2019', 'T€'),\n",
|
|
" ( 'Veränderung', 'T€'),\n",
|
|
" ( 'Veränderung', '%')],\n",
|
|
" )\n",
|
|
"Unnamed: 0_level_0 unkonsolidiert gerundet object\n",
|
|
"2020 T€ float64\n",
|
|
"2019 T€ float64\n",
|
|
"Veränderung T€ float64\n",
|
|
" % int64\n",
|
|
"dtype: object\n",
|
|
"MultiIndex([('Jahresüberschuss/Jahresfehlbetrag nach Betriebszweigen', ...),\n",
|
|
" ( '2020', ...),\n",
|
|
" ( '2019', ...),\n",
|
|
" ( 'Veränderung', ...),\n",
|
|
" ( 'Veränderung', ...)],\n",
|
|
" )\n",
|
|
"Jahresüberschuss/Jahresfehlbetrag nach Betriebszweigen Unnamed: 0_level_1 object\n",
|
|
"2020 T€ float64\n",
|
|
"2019 T€ float64\n",
|
|
"Veränderung T€ int64\n",
|
|
" % int64\n",
|
|
"dtype: object\n",
|
|
"Index(['Unnamed: 0', '2020 T€', '2019 T€'], dtype='object')\n",
|
|
"Unnamed: 0 object\n",
|
|
"2020 T€ float64\n",
|
|
"2019 T€ float64\n",
|
|
"dtype: object\n",
|
|
"MultiIndex([('Unnamed: 0_level_0', 'Unnamed: 0_level_1'),\n",
|
|
" ( '31. Dezember 2020', 'T€'),\n",
|
|
" ( '31. Dezember 2020', '%'),\n",
|
|
" ( '31. Dezember 2019', 'T€'),\n",
|
|
" ( '31. Dezember 2019', '%'),\n",
|
|
" ( 'Veränderung', 'T€')],\n",
|
|
" )\n",
|
|
"Unnamed: 0_level_0 Unnamed: 0_level_1 object\n",
|
|
"31. Dezember 2020 T€ float64\n",
|
|
" % float64\n",
|
|
"31. Dezember 2019 T€ float64\n",
|
|
" % float64\n",
|
|
"Veränderung T€ float64\n",
|
|
"dtype: object\n",
|
|
"Index(['Investitionen (netto)', '2020 T€', '2019 T€', 'Veränderung T€'], dtype='object')\n",
|
|
"Investitionen (netto) object\n",
|
|
"2020 T€ float64\n",
|
|
"2019 T€ float64\n",
|
|
"Veränderung T€ float64\n",
|
|
"dtype: object\n",
|
|
"Index(['Unnamed: 0', '€', '31.12.2019 in T €'], dtype='object')\n",
|
|
"Unnamed: 0 object\n",
|
|
"€ object\n",
|
|
"31.12.2019 in T € object\n",
|
|
"dtype: object\n",
|
|
"Index(['Unnamed: 0', '€', '€.1', '31.12.2019 in T €'], dtype='object')\n",
|
|
"Unnamed: 0 object\n",
|
|
"€ object\n",
|
|
"€.1 object\n",
|
|
"31.12.2019 in T € float64\n",
|
|
"dtype: object\n",
|
|
"Index([0, 1], dtype='int64')\n",
|
|
"0 object\n",
|
|
"1 object\n",
|
|
"dtype: object\n",
|
|
"Index(['Beteiligung', 'Anteil', 'Eigenkapital der Beteiligungsgesellschaft',\n",
|
|
" 'Eigenkapital der Beteiligungsgesellschaft.1',\n",
|
|
" 'Jahresergebnis der Beteiligungsgesellschaft',\n",
|
|
" 'Jahresergebnis der Beteiligungsgesellschaft.1'],\n",
|
|
" dtype='object')\n",
|
|
"Beteiligung object\n",
|
|
"Anteil object\n",
|
|
"Eigenkapital der Beteiligungsgesellschaft object\n",
|
|
"Eigenkapital der Beteiligungsgesellschaft.1 object\n",
|
|
"Jahresergebnis der Beteiligungsgesellschaft object\n",
|
|
"Jahresergebnis der Beteiligungsgesellschaft.1 object\n",
|
|
"dtype: object\n",
|
|
"Index(['Beteiligung', 'Anteil', 'Eigenkapital der Beteiligungsgesellschaft',\n",
|
|
" 'Eigenkapital der Beteiligungsgesellschaft.1',\n",
|
|
" 'Jahresergebnis der Beteiligungsgesellschaft',\n",
|
|
" 'Jahresergebnis der Beteiligungsgesellschaft.1'],\n",
|
|
" dtype='object')\n",
|
|
"Beteiligung object\n",
|
|
"Anteil object\n",
|
|
"Eigenkapital der Beteiligungsgesellschaft object\n",
|
|
"Eigenkapital der Beteiligungsgesellschaft.1 object\n",
|
|
"Jahresergebnis der Beteiligungsgesellschaft object\n",
|
|
"Jahresergebnis der Beteiligungsgesellschaft.1 object\n",
|
|
"dtype: object\n",
|
|
"Index(['Unnamed: 0', '2020', '2019'], dtype='object')\n",
|
|
"Unnamed: 0 object\n",
|
|
"2020 object\n",
|
|
"2019 object\n",
|
|
"dtype: object\n",
|
|
"MultiIndex([( 'Unnamed: 0_level_0', 'Unnamed: 0_level_1'),\n",
|
|
" ( 'Unnamed: 1_level_0', 'Gesamt in T€'),\n",
|
|
" ('davon mit einer Restlaufzeit', 'bis zu 1 Jahr in T€'),\n",
|
|
" ('davon mit einer Restlaufzeit', 'mehr als 1 Jahr in T€'),\n",
|
|
" ('davon mit einer Restlaufzeit', 'davon über 5 Jahre in T€')],\n",
|
|
" )\n",
|
|
"Unnamed: 0_level_0 Unnamed: 0_level_1 object\n",
|
|
"Unnamed: 1_level_0 Gesamt in T€ float64\n",
|
|
"davon mit einer Restlaufzeit bis zu 1 Jahr in T€ float64\n",
|
|
" mehr als 1 Jahr in T€ float64\n",
|
|
" davon über 5 Jahre in T€ float64\n",
|
|
"dtype: object\n",
|
|
"MultiIndex([('Unnamed: 0_level_0', 'Unnamed: 0_level_1'),\n",
|
|
" ( '2020', '€'),\n",
|
|
" ( '2019', '€'),\n",
|
|
" ( 'Veränderung', '€'),\n",
|
|
" ( 'Veränderung', '%')],\n",
|
|
" )\n",
|
|
"Unnamed: 0_level_0 Unnamed: 0_level_1 object\n",
|
|
"2020 € object\n",
|
|
"2019 € object\n",
|
|
"Veränderung € object\n",
|
|
" % int64\n",
|
|
"dtype: object\n",
|
|
"MultiIndex([('Unnamed: 0_level_0', 'Unnamed: 0_level_1'),\n",
|
|
" ( '2020', '€'),\n",
|
|
" ( '2019', '€'),\n",
|
|
" ( 'Veränderung', '€'),\n",
|
|
" ( 'Veränderung', '%')],\n",
|
|
" )\n",
|
|
"Unnamed: 0_level_0 Unnamed: 0_level_1 object\n",
|
|
"2020 € object\n",
|
|
"2019 € object\n",
|
|
"Veränderung € object\n",
|
|
" % int64\n",
|
|
"dtype: object\n",
|
|
"Index(['Unnamed: 0', '2020 T€', '2019 T€'], dtype='object')\n",
|
|
"Unnamed: 0 object\n",
|
|
"2020 T€ int64\n",
|
|
"2019 T€ int64\n",
|
|
"dtype: object\n",
|
|
"MultiIndex([( 'Unnamed: 0_level_0', 'Unnamed: 0_level_1'),\n",
|
|
" ('Anschaffungs- und Herstellungskosten', 'Stand am 01.01.2020 €'),\n",
|
|
" ('Anschaffungs- und Herstellungskosten', 'Zugang €'),\n",
|
|
" ('Anschaffungs- und Herstellungskosten', 'Abgang €'),\n",
|
|
" ('Anschaffungs- und Herstellungskosten', 'Umbuchung €'),\n",
|
|
" ('Anschaffungs- und Herstellungskosten', 'Stand am 31.12.2020 €')],\n",
|
|
" )\n",
|
|
"Unnamed: 0_level_0 Unnamed: 0_level_1 object\n",
|
|
"Anschaffungs- und Herstellungskosten Stand am 01.01.2020 € object\n",
|
|
" Zugang € object\n",
|
|
" Abgang € object\n",
|
|
" Umbuchung € object\n",
|
|
" Stand am 31.12.2020 € object\n",
|
|
"dtype: object\n",
|
|
"MultiIndex([('Unnamed: 0_level_0', 'Unnamed: 0_level_1'),\n",
|
|
" ( 'Abschreibungen', 'Stand am 01.01.2020 €'),\n",
|
|
" ( 'Abschreibungen', 'Zugang €'),\n",
|
|
" ( 'Abschreibungen', 'außerplanm. AfA'),\n",
|
|
" ( 'Abschreibungen', 'Abgang €'),\n",
|
|
" ( 'Abschreibungen', 'Umbuchung €'),\n",
|
|
" ( 'Abschreibungen', 'Stand am 31.12.2020 €')],\n",
|
|
" )\n",
|
|
"Unnamed: 0_level_0 Unnamed: 0_level_1 object\n",
|
|
"Abschreibungen Stand am 01.01.2020 € object\n",
|
|
" Zugang € object\n",
|
|
" außerplanm. AfA float64\n",
|
|
" Abgang € object\n",
|
|
" Umbuchung € float64\n",
|
|
" Stand am 31.12.2020 € object\n",
|
|
"dtype: object\n",
|
|
"MultiIndex([('Unnamed: 0_level_0', 'Unnamed: 0_level_1'),\n",
|
|
" ( 'Restbuchwerte', 'Stand am 31.12.2020 €'),\n",
|
|
" ( 'Restbuchwerte', 'Stand am 31.12.2019 €')],\n",
|
|
" )\n",
|
|
"Unnamed: 0_level_0 Unnamed: 0_level_1 object\n",
|
|
"Restbuchwerte Stand am 31.12.2020 € object\n",
|
|
" Stand am 31.12.2019 € object\n",
|
|
"dtype: object\n",
|
|
"Index(['Unnamed: 0', 'Elektrizitätsverteilung', '31.12.2019 in T €',\n",
|
|
" 'Gasverteilung', '31.12.2019 in T €.1'],\n",
|
|
" dtype='object')\n",
|
|
"Unnamed: 0 object\n",
|
|
"Elektrizitätsverteilung object\n",
|
|
"31.12.2019 in T € object\n",
|
|
"Gasverteilung object\n",
|
|
"31.12.2019 in T €.1 object\n",
|
|
"dtype: object\n",
|
|
"Index(['Unnamed: 0', '€', '31.12.2019 in T €'], dtype='object')\n",
|
|
"Unnamed: 0 object\n",
|
|
"€ object\n",
|
|
"31.12.2019 in T € object\n",
|
|
"dtype: object\n",
|
|
"Index(['Unnamed: 0', 'Elektrizitätsverteilung €', '31.12.2019 in T €',\n",
|
|
" 'Gasverteilung €', '31.12.2019 in T €.1'],\n",
|
|
" dtype='object')\n",
|
|
"Unnamed: 0 object\n",
|
|
"Elektrizitätsverteilung € object\n",
|
|
"31.12.2019 in T € float64\n",
|
|
"Gasverteilung € object\n",
|
|
"31.12.2019 in T €.1 float64\n",
|
|
"dtype: object\n",
|
|
"Index(['Unnamed: 0', '€', 'Vorjahr in T €'], dtype='object')\n",
|
|
"Unnamed: 0 object\n",
|
|
"€ object\n",
|
|
"Vorjahr in T € float64\n",
|
|
"dtype: object\n",
|
|
"MultiIndex([('Verbindlichkeitenspiegel 2020 Elektrizitätsverteilung', ...),\n",
|
|
" ( 'davon mit einer Restlaufzeit', ...),\n",
|
|
" ( 'davon mit einer Restlaufzeit', ...),\n",
|
|
" ( 'davon mit einer Restlaufzeit', ...),\n",
|
|
" ( 'davon mit einer Restlaufzeit', ...)],\n",
|
|
" )\n",
|
|
"Verbindlichkeitenspiegel 2020 Elektrizitätsverteilung Unnamed: 0_level_1 object\n",
|
|
"davon mit einer Restlaufzeit Gesamt in T€ float64\n",
|
|
" bis zu 1 Jahr in T€ float64\n",
|
|
" über 1 Jahr in T€ float64\n",
|
|
" mehr als 5 Jahre in T€ float64\n",
|
|
"dtype: object\n",
|
|
"MultiIndex([('Verbindlichkeitenspiegel 2020 Gasverteilung', ...),\n",
|
|
" ( 'davon mit einer Restlaufzeit', ...),\n",
|
|
" ( 'davon mit einer Restlaufzeit', ...),\n",
|
|
" ( 'davon mit einer Restlaufzeit', ...),\n",
|
|
" ( 'davon mit einer Restlaufzeit', ...)],\n",
|
|
" )\n",
|
|
"Verbindlichkeitenspiegel 2020 Gasverteilung Unnamed: 0_level_1 object\n",
|
|
"davon mit einer Restlaufzeit Gesamt in T€ float64\n",
|
|
" bis zu 1 Jahr in T€ float64\n",
|
|
" über 1 Jahr in T€ float64\n",
|
|
" mehr als 5 Jahre in T€ float64\n",
|
|
"dtype: object\n",
|
|
"MultiIndex([('Verbindlichkeitenspiegel 2020 Intelligenter', ...),\n",
|
|
" ( 'davon mit einer Restlaufzeit', ...),\n",
|
|
" ( 'davon mit einer Restlaufzeit', ...),\n",
|
|
" ( 'davon mit einer Restlaufzeit', ...),\n",
|
|
" ( 'davon mit einer Restlaufzeit', ...)],\n",
|
|
" )\n",
|
|
"Verbindlichkeitenspiegel 2020 Intelligenter Messstellenbetrieb object\n",
|
|
"davon mit einer Restlaufzeit Gesamt in T€ float64\n",
|
|
" bis zu 1 Jahr in T€ float64\n",
|
|
" über 1 Jahr in T€ float64\n",
|
|
" mehr als 5 Jahre in T€ float64\n",
|
|
"dtype: object\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"def parse_tables(report: str) -> list:\n",
|
|
" result = []\n",
|
|
" soup = BeautifulSoup(report, features=\"html.parser\")\n",
|
|
" for table in soup.find_all(\"table\", {\"class\": \"std_table\"}):\n",
|
|
" df = pd.read_html(StringIO(str(table)), flavor=\"bs4\")[0]\n",
|
|
" print(df.columns)\n",
|
|
" print(df.dtypes)\n",
|
|
" result.append(df)\n",
|
|
" return result\n",
|
|
"\n",
|
|
"\n",
|
|
"tables = parse_tables(sample_report)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 81,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead tr th {\n",
|
|
" text-align: left;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr>\n",
|
|
" <th></th>\n",
|
|
" <th>Unnamed: 0_level_0</th>\n",
|
|
" <th colspan=\"2\" halign=\"left\">2020</th>\n",
|
|
" <th colspan=\"2\" halign=\"left\">2019</th>\n",
|
|
" <th colspan=\"2\" halign=\"left\">Veränderungen</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th></th>\n",
|
|
" <th>Unnamed: 0_level_1</th>\n",
|
|
" <th>T€</th>\n",
|
|
" <th>%</th>\n",
|
|
" <th>T€</th>\n",
|
|
" <th>%</th>\n",
|
|
" <th>T€</th>\n",
|
|
" <th>%</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>Umsatzerlöse</td>\n",
|
|
" <td>47.459</td>\n",
|
|
" <td>978</td>\n",
|
|
" <td>45.575</td>\n",
|
|
" <td>970</td>\n",
|
|
" <td>1.884</td>\n",
|
|
" <td>41</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>Aktivierte Eigenleistungen</td>\n",
|
|
" <td>380.000</td>\n",
|
|
" <td>8</td>\n",
|
|
" <td>400.000</td>\n",
|
|
" <td>9</td>\n",
|
|
" <td>-20.000</td>\n",
|
|
" <td>-50</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>Sonstige betriebliche Erträge</td>\n",
|
|
" <td>687.000</td>\n",
|
|
" <td>14</td>\n",
|
|
" <td>991.000</td>\n",
|
|
" <td>21</td>\n",
|
|
" <td>-304.000</td>\n",
|
|
" <td>-307</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>Betriebliche Erträge</td>\n",
|
|
" <td>48.526</td>\n",
|
|
" <td>1000</td>\n",
|
|
" <td>46.966</td>\n",
|
|
" <td>1000</td>\n",
|
|
" <td>1.560</td>\n",
|
|
" <td>33</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>Materialaufwand</td>\n",
|
|
" <td>34.007</td>\n",
|
|
" <td>701</td>\n",
|
|
" <td>32.647</td>\n",
|
|
" <td>695</td>\n",
|
|
" <td>1.360</td>\n",
|
|
" <td>42</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" Unnamed: 0_level_0 2020 2019 Veränderungen \\\n",
|
|
" Unnamed: 0_level_1 T€ % T€ % T€ \n",
|
|
"0 Umsatzerlöse 47.459 978 45.575 970 1.884 \n",
|
|
"1 Aktivierte Eigenleistungen 380.000 8 400.000 9 -20.000 \n",
|
|
"2 Sonstige betriebliche Erträge 687.000 14 991.000 21 -304.000 \n",
|
|
"3 Betriebliche Erträge 48.526 1000 46.966 1000 1.560 \n",
|
|
"4 Materialaufwand 34.007 701 32.647 695 1.360 \n",
|
|
"\n",
|
|
" \n",
|
|
" % \n",
|
|
"0 41 \n",
|
|
"1 -50 \n",
|
|
"2 -307 \n",
|
|
"3 33 \n",
|
|
"4 42 "
|
|
]
|
|
},
|
|
"execution_count": 81,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"current_table = tables[1]\n",
|
|
"current_table.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 82,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import re\n",
|
|
"\n",
|
|
"\n",
|
|
"def cleanse_string(value: str) -> str:\n",
|
|
" if value is not None and isinstance(value, str):\n",
|
|
" return re.sub(r\"(.+\\.).\", \"\", value)\n",
|
|
" return None"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 83,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"C:\\Users\\trist\\AppData\\Local\\Temp\\ipykernel_15672\\152097142.py:2: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
|
|
" current_table.iloc[index][0] = cleanse_string(row[0])\n",
|
|
"C:\\Users\\trist\\AppData\\Local\\Temp\\ipykernel_15672\\152097142.py:2: FutureWarning: Series.__setitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To set a value by position, use `ser.iloc[pos] = value`\n",
|
|
" current_table.iloc[index][0] = cleanse_string(row[0])\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead tr th {\n",
|
|
" text-align: left;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr>\n",
|
|
" <th></th>\n",
|
|
" <th>Unnamed: 0_level_0</th>\n",
|
|
" <th colspan=\"2\" halign=\"left\">2020</th>\n",
|
|
" <th colspan=\"2\" halign=\"left\">2019</th>\n",
|
|
" <th colspan=\"2\" halign=\"left\">Veränderungen</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th></th>\n",
|
|
" <th>Unnamed: 0_level_1</th>\n",
|
|
" <th>T€</th>\n",
|
|
" <th>%</th>\n",
|
|
" <th>T€</th>\n",
|
|
" <th>%</th>\n",
|
|
" <th>T€</th>\n",
|
|
" <th>%</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>Umsatzerlöse</td>\n",
|
|
" <td>47.459</td>\n",
|
|
" <td>978</td>\n",
|
|
" <td>45.575</td>\n",
|
|
" <td>970</td>\n",
|
|
" <td>1.884</td>\n",
|
|
" <td>41</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>Aktivierte Eigenleistungen</td>\n",
|
|
" <td>380.000</td>\n",
|
|
" <td>8</td>\n",
|
|
" <td>400.000</td>\n",
|
|
" <td>9</td>\n",
|
|
" <td>-20.000</td>\n",
|
|
" <td>-50</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>Sonstige betriebliche Erträge</td>\n",
|
|
" <td>687.000</td>\n",
|
|
" <td>14</td>\n",
|
|
" <td>991.000</td>\n",
|
|
" <td>21</td>\n",
|
|
" <td>-304.000</td>\n",
|
|
" <td>-307</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>Betriebliche Erträge</td>\n",
|
|
" <td>48.526</td>\n",
|
|
" <td>1000</td>\n",
|
|
" <td>46.966</td>\n",
|
|
" <td>1000</td>\n",
|
|
" <td>1.560</td>\n",
|
|
" <td>33</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>Materialaufwand</td>\n",
|
|
" <td>34.007</td>\n",
|
|
" <td>701</td>\n",
|
|
" <td>32.647</td>\n",
|
|
" <td>695</td>\n",
|
|
" <td>1.360</td>\n",
|
|
" <td>42</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" Unnamed: 0_level_0 2020 2019 Veränderungen \\\n",
|
|
" Unnamed: 0_level_1 T€ % T€ % T€ \n",
|
|
"0 Umsatzerlöse 47.459 978 45.575 970 1.884 \n",
|
|
"1 Aktivierte Eigenleistungen 380.000 8 400.000 9 -20.000 \n",
|
|
"2 Sonstige betriebliche Erträge 687.000 14 991.000 21 -304.000 \n",
|
|
"3 Betriebliche Erträge 48.526 1000 46.966 1000 1.560 \n",
|
|
"4 Materialaufwand 34.007 701 32.647 695 1.360 \n",
|
|
"\n",
|
|
" \n",
|
|
" % \n",
|
|
"0 41 \n",
|
|
"1 -50 \n",
|
|
"2 -307 \n",
|
|
"3 33 \n",
|
|
"4 42 "
|
|
]
|
|
},
|
|
"execution_count": 83,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"for index, row in current_table.iterrows():\n",
|
|
" current_table.iloc[index][0] = cleanse_string(row[0])\n",
|
|
"current_table.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 84,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def parse_string_to_float(value) -> float:\n",
|
|
" try:\n",
|
|
" if value is None:\n",
|
|
" return None\n",
|
|
" if isinstance(value, float):\n",
|
|
" return value\n",
|
|
" return float(value.replace(\".\", \"\").replace(\",\", \".\"))\n",
|
|
" except Exception as e:\n",
|
|
" return None\n",
|
|
"\n",
|
|
"\n",
|
|
"def apply_factor(value, factor: float):\n",
|
|
" transformed_value = parse_string_to_float(value)\n",
|
|
" if transformed_value is None or isinstance(transformed_value, str):\n",
|
|
" return None\n",
|
|
" result = transformed_value * factor\n",
|
|
" # print(result)\n",
|
|
" return result"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 85,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead tr th {\n",
|
|
" text-align: left;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr>\n",
|
|
" <th></th>\n",
|
|
" <th>Unnamed: 0_level_0</th>\n",
|
|
" <th colspan=\"2\" halign=\"left\">2020</th>\n",
|
|
" <th colspan=\"2\" halign=\"left\">2019</th>\n",
|
|
" <th colspan=\"2\" halign=\"left\">Veränderungen</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th></th>\n",
|
|
" <th>Unnamed: 0_level_1</th>\n",
|
|
" <th>T€</th>\n",
|
|
" <th>%</th>\n",
|
|
" <th>T€</th>\n",
|
|
" <th>%</th>\n",
|
|
" <th>T€</th>\n",
|
|
" <th>%</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>Umsatzerlöse</td>\n",
|
|
" <td>47459.0</td>\n",
|
|
" <td>978</td>\n",
|
|
" <td>45575.0</td>\n",
|
|
" <td>970</td>\n",
|
|
" <td>1884.0</td>\n",
|
|
" <td>41</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>Aktivierte Eigenleistungen</td>\n",
|
|
" <td>380000.0</td>\n",
|
|
" <td>8</td>\n",
|
|
" <td>400000.0</td>\n",
|
|
" <td>9</td>\n",
|
|
" <td>-20000.0</td>\n",
|
|
" <td>-50</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>Sonstige betriebliche Erträge</td>\n",
|
|
" <td>687000.0</td>\n",
|
|
" <td>14</td>\n",
|
|
" <td>991000.0</td>\n",
|
|
" <td>21</td>\n",
|
|
" <td>-304000.0</td>\n",
|
|
" <td>-307</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>Betriebliche Erträge</td>\n",
|
|
" <td>48526.0</td>\n",
|
|
" <td>1000</td>\n",
|
|
" <td>46966.0</td>\n",
|
|
" <td>1000</td>\n",
|
|
" <td>1560.0</td>\n",
|
|
" <td>33</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>Materialaufwand</td>\n",
|
|
" <td>34007.0</td>\n",
|
|
" <td>701</td>\n",
|
|
" <td>32647.0</td>\n",
|
|
" <td>695</td>\n",
|
|
" <td>1360.0</td>\n",
|
|
" <td>42</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" Unnamed: 0_level_0 2020 2019 \\\n",
|
|
" Unnamed: 0_level_1 T€ % T€ % \n",
|
|
"0 Umsatzerlöse 47459.0 978 45575.0 970 \n",
|
|
"1 Aktivierte Eigenleistungen 380000.0 8 400000.0 9 \n",
|
|
"2 Sonstige betriebliche Erträge 687000.0 14 991000.0 21 \n",
|
|
"3 Betriebliche Erträge 48526.0 1000 46966.0 1000 \n",
|
|
"4 Materialaufwand 34007.0 701 32647.0 695 \n",
|
|
"\n",
|
|
" Veränderungen \n",
|
|
" T€ % \n",
|
|
"0 1884.0 41 \n",
|
|
"1 -20000.0 -50 \n",
|
|
"2 -304000.0 -307 \n",
|
|
"3 1560.0 33 \n",
|
|
"4 1360.0 42 "
|
|
]
|
|
},
|
|
"execution_count": 85,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"converter = {\n",
|
|
" \"Mio€\": 1 * 10**6,\n",
|
|
" \"Mio\": 1 * 10**6,\n",
|
|
" \"T€\": 1 * 10**3,\n",
|
|
" \"TEUR\": 1 * 10**3,\n",
|
|
" \"EUR\": 1,\n",
|
|
" \"€\": 1,\n",
|
|
"}\n",
|
|
"\n",
|
|
"for column in current_table.columns:\n",
|
|
" if isinstance(column, tuple):\n",
|
|
" for c in column:\n",
|
|
" for x, factor in converter.items():\n",
|
|
" if x in c:\n",
|
|
" current_table[column] = current_table[column].apply(\n",
|
|
" lambda x: apply_factor(x, factor)\n",
|
|
" )\n",
|
|
" next\n",
|
|
" else:\n",
|
|
" for x, factor in converter.items():\n",
|
|
" parts = column.split(\" \")\n",
|
|
" for y in parts:\n",
|
|
" if re.match(x, y):\n",
|
|
" current_table[column] = current_table[column].apply(\n",
|
|
" lambda x: apply_factor(x, factor)\n",
|
|
" )\n",
|
|
" current_table.rename({column: parts[0]}, inplace=True, axis=1)\n",
|
|
" next\n",
|
|
" # print(current_table[column])\n",
|
|
"current_table.dropna(axis=0, how=\"all\", inplace=True)\n",
|
|
"current_table.dropna(axis=1, how=\"all\", inplace=True)\n",
|
|
"current_table.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 86,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"Unnamed: 0_level_0 Unnamed: 0_level_1 object\n",
|
|
"2020 T€ float64\n",
|
|
" % int64\n",
|
|
"2019 T€ float64\n",
|
|
" % int64\n",
|
|
"Veränderungen T€ float64\n",
|
|
" % int64\n",
|
|
"dtype: object"
|
|
]
|
|
},
|
|
"execution_count": 86,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"current_table.dtypes"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 87,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Remove columns hosting non-numerics; excl. first column hosting keys\n",
|
|
"columns_to_prune = []\n",
|
|
"for column_index, column_type in enumerate(current_table.dtypes[1:]):\n",
|
|
" if column_type in [\"object\", \"str\"]:\n",
|
|
" columns_to_prune.append(column_index + 1)\n",
|
|
"\n",
|
|
"current_table = current_table.drop(\n",
|
|
" current_table.columns[columns_to_prune], axis=\"columns\"\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 88,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead tr th {\n",
|
|
" text-align: left;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr>\n",
|
|
" <th></th>\n",
|
|
" <th>Unnamed: 0_level_0</th>\n",
|
|
" <th colspan=\"2\" halign=\"left\">2020</th>\n",
|
|
" <th colspan=\"2\" halign=\"left\">2019</th>\n",
|
|
" <th colspan=\"2\" halign=\"left\">Veränderungen</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th></th>\n",
|
|
" <th>Unnamed: 0_level_1</th>\n",
|
|
" <th>T€</th>\n",
|
|
" <th>%</th>\n",
|
|
" <th>T€</th>\n",
|
|
" <th>%</th>\n",
|
|
" <th>T€</th>\n",
|
|
" <th>%</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>Umsatzerlöse</td>\n",
|
|
" <td>47459.0</td>\n",
|
|
" <td>978</td>\n",
|
|
" <td>45575.0</td>\n",
|
|
" <td>970</td>\n",
|
|
" <td>1884.0</td>\n",
|
|
" <td>41</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>Aktivierte Eigenleistungen</td>\n",
|
|
" <td>380000.0</td>\n",
|
|
" <td>8</td>\n",
|
|
" <td>400000.0</td>\n",
|
|
" <td>9</td>\n",
|
|
" <td>-20000.0</td>\n",
|
|
" <td>-50</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>Sonstige betriebliche Erträge</td>\n",
|
|
" <td>687000.0</td>\n",
|
|
" <td>14</td>\n",
|
|
" <td>991000.0</td>\n",
|
|
" <td>21</td>\n",
|
|
" <td>-304000.0</td>\n",
|
|
" <td>-307</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>Betriebliche Erträge</td>\n",
|
|
" <td>48526.0</td>\n",
|
|
" <td>1000</td>\n",
|
|
" <td>46966.0</td>\n",
|
|
" <td>1000</td>\n",
|
|
" <td>1560.0</td>\n",
|
|
" <td>33</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>Materialaufwand</td>\n",
|
|
" <td>34007.0</td>\n",
|
|
" <td>701</td>\n",
|
|
" <td>32647.0</td>\n",
|
|
" <td>695</td>\n",
|
|
" <td>1360.0</td>\n",
|
|
" <td>42</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5</th>\n",
|
|
" <td>Personalaufwand</td>\n",
|
|
" <td>6258.0</td>\n",
|
|
" <td>129</td>\n",
|
|
" <td>6222.0</td>\n",
|
|
" <td>132</td>\n",
|
|
" <td>36000.0</td>\n",
|
|
" <td>6</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>6</th>\n",
|
|
" <td>Abschreibungen</td>\n",
|
|
" <td>2239.0</td>\n",
|
|
" <td>46</td>\n",
|
|
" <td>2273.0</td>\n",
|
|
" <td>48</td>\n",
|
|
" <td>-34000.0</td>\n",
|
|
" <td>-15</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>7</th>\n",
|
|
" <td>Konzessionsabgabe</td>\n",
|
|
" <td>1331.0</td>\n",
|
|
" <td>27</td>\n",
|
|
" <td>1302.0</td>\n",
|
|
" <td>28</td>\n",
|
|
" <td>29000.0</td>\n",
|
|
" <td>22</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>8</th>\n",
|
|
" <td>Übrige sonstige betriebliche Aufwendungen</td>\n",
|
|
" <td>2100.0</td>\n",
|
|
" <td>43</td>\n",
|
|
" <td>2066.0</td>\n",
|
|
" <td>44</td>\n",
|
|
" <td>34000.0</td>\n",
|
|
" <td>16</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>9</th>\n",
|
|
" <td>Betriebliche Aufwendungen</td>\n",
|
|
" <td>45935.0</td>\n",
|
|
" <td>947</td>\n",
|
|
" <td>44510.0</td>\n",
|
|
" <td>948</td>\n",
|
|
" <td>1425.0</td>\n",
|
|
" <td>32</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>10</th>\n",
|
|
" <td>Ergebnis der betrieblichen Tätigkeit</td>\n",
|
|
" <td>2591.0</td>\n",
|
|
" <td>53</td>\n",
|
|
" <td>2456.0</td>\n",
|
|
" <td>52</td>\n",
|
|
" <td>135000.0</td>\n",
|
|
" <td>55</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>11</th>\n",
|
|
" <td>Finanzergebnis (Ertrags-/Aufwandsaldo)</td>\n",
|
|
" <td>-13000.0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>-99000.0</td>\n",
|
|
" <td>-2</td>\n",
|
|
" <td>86000.0</td>\n",
|
|
" <td>-869</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>12</th>\n",
|
|
" <td>sonstige Steuern</td>\n",
|
|
" <td>147000.0</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>164000.0</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>-17000.0</td>\n",
|
|
" <td>-104</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>13</th>\n",
|
|
" <td>Neutraler Bereich</td>\n",
|
|
" <td>134000.0</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>65000.0</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>86000.0</td>\n",
|
|
" <td>1062</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>14</th>\n",
|
|
" <td>Jahresüberschuss vor Ertragsteuern</td>\n",
|
|
" <td>2457.0</td>\n",
|
|
" <td>51</td>\n",
|
|
" <td>2391.0</td>\n",
|
|
" <td>51</td>\n",
|
|
" <td>66000.0</td>\n",
|
|
" <td>28</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>15</th>\n",
|
|
" <td>Ertragsteuern</td>\n",
|
|
" <td>796000.0</td>\n",
|
|
" <td>16</td>\n",
|
|
" <td>792000.0</td>\n",
|
|
" <td>17</td>\n",
|
|
" <td>4000.0</td>\n",
|
|
" <td>5</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>16</th>\n",
|
|
" <td>Jahresüberschuss</td>\n",
|
|
" <td>1661.0</td>\n",
|
|
" <td>34</td>\n",
|
|
" <td>1599.0</td>\n",
|
|
" <td>34</td>\n",
|
|
" <td>62000.0</td>\n",
|
|
" <td>39</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" Unnamed: 0_level_0 2020 2019 \\\n",
|
|
" Unnamed: 0_level_1 T€ % T€ % \n",
|
|
"0 Umsatzerlöse 47459.0 978 45575.0 970 \n",
|
|
"1 Aktivierte Eigenleistungen 380000.0 8 400000.0 9 \n",
|
|
"2 Sonstige betriebliche Erträge 687000.0 14 991000.0 21 \n",
|
|
"3 Betriebliche Erträge 48526.0 1000 46966.0 1000 \n",
|
|
"4 Materialaufwand 34007.0 701 32647.0 695 \n",
|
|
"5 Personalaufwand 6258.0 129 6222.0 132 \n",
|
|
"6 Abschreibungen 2239.0 46 2273.0 48 \n",
|
|
"7 Konzessionsabgabe 1331.0 27 1302.0 28 \n",
|
|
"8 Übrige sonstige betriebliche Aufwendungen 2100.0 43 2066.0 44 \n",
|
|
"9 Betriebliche Aufwendungen 45935.0 947 44510.0 948 \n",
|
|
"10 Ergebnis der betrieblichen Tätigkeit 2591.0 53 2456.0 52 \n",
|
|
"11 Finanzergebnis (Ertrags-/Aufwandsaldo) -13000.0 0 -99000.0 -2 \n",
|
|
"12 sonstige Steuern 147000.0 3 164000.0 3 \n",
|
|
"13 Neutraler Bereich 134000.0 3 65000.0 1 \n",
|
|
"14 Jahresüberschuss vor Ertragsteuern 2457.0 51 2391.0 51 \n",
|
|
"15 Ertragsteuern 796000.0 16 792000.0 17 \n",
|
|
"16 Jahresüberschuss 1661.0 34 1599.0 34 \n",
|
|
"\n",
|
|
" Veränderungen \n",
|
|
" T€ % \n",
|
|
"0 1884.0 41 \n",
|
|
"1 -20000.0 -50 \n",
|
|
"2 -304000.0 -307 \n",
|
|
"3 1560.0 33 \n",
|
|
"4 1360.0 42 \n",
|
|
"5 36000.0 6 \n",
|
|
"6 -34000.0 -15 \n",
|
|
"7 29000.0 22 \n",
|
|
"8 34000.0 16 \n",
|
|
"9 1425.0 32 \n",
|
|
"10 135000.0 55 \n",
|
|
"11 86000.0 -869 \n",
|
|
"12 -17000.0 -104 \n",
|
|
"13 86000.0 1062 \n",
|
|
"14 66000.0 28 \n",
|
|
"15 4000.0 5 \n",
|
|
"16 62000.0 39 "
|
|
]
|
|
},
|
|
"execution_count": 88,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# Prune rows where first columns is None\n",
|
|
"import numpy as np\n",
|
|
"\n",
|
|
"current_table = current_table.replace(to_replace=\"None\", value=np.nan).dropna()\n",
|
|
"current_table"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 89,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"C:\\Users\\trist\\AppData\\Local\\Temp\\ipykernel_15672\\340569398.py:3: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
|
|
" kpis[row[0]] = row[1]\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"{'Umsatzerlöse': 47459.0,\n",
|
|
" 'Aktivierte Eigenleistungen': 380000.0,\n",
|
|
" 'Sonstige betriebliche Erträge': 687000.0,\n",
|
|
" 'Betriebliche Erträge': 48526.0,\n",
|
|
" 'Materialaufwand': 34007.0,\n",
|
|
" 'Personalaufwand': 6258.0,\n",
|
|
" 'Abschreibungen': 2239.0,\n",
|
|
" 'Konzessionsabgabe': 1331.0,\n",
|
|
" 'Übrige sonstige betriebliche Aufwendungen': 2100.0,\n",
|
|
" 'Betriebliche Aufwendungen': 45935.0,\n",
|
|
" 'Ergebnis der betrieblichen Tätigkeit': 2591.0,\n",
|
|
" 'Finanzergebnis (Ertrags-/Aufwandsaldo)': -13000.0,\n",
|
|
" 'sonstige Steuern': 147000.0,\n",
|
|
" 'Neutraler Bereich': 134000.0,\n",
|
|
" 'Jahresüberschuss vor Ertragsteuern': 2457.0,\n",
|
|
" 'Ertragsteuern': 796000.0,\n",
|
|
" 'Jahresüberschuss': 1661.0}"
|
|
]
|
|
},
|
|
"execution_count": 89,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"kpis = {}\n",
|
|
"for _index, row in current_table.iterrows():\n",
|
|
" kpis[row[0]] = row[1]\n",
|
|
"kpis"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 90,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import re\n",
|
|
"\n",
|
|
"\n",
|
|
"def get_bilanz(report: str) -> any:\n",
|
|
" result = {}\n",
|
|
" soup = BeautifulSoup(report, features=\"html.parser\")\n",
|
|
" for pos in [\"Aktiva\", \"Passiva\"]:\n",
|
|
" tag = soup.find(\"b\", string=re.compile(pos))\n",
|
|
" if tag:\n",
|
|
" pos_results = pd.read_html(\n",
|
|
" StringIO(str(tag.findNext(\"table\", {\"class\": \"std_table\"})))\n",
|
|
" )[0]\n",
|
|
" result[pos] = pos_results\n",
|
|
" else:\n",
|
|
" result[pos] = pd.DataFrame([])\n",
|
|
" return result"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 91,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>Investitionen (netto)</th>\n",
|
|
" <th>2020 T€</th>\n",
|
|
" <th>2019 T€</th>\n",
|
|
" <th>Veränderung T€</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>Stromversorgung</td>\n",
|
|
" <td>1.372</td>\n",
|
|
" <td>1.553</td>\n",
|
|
" <td>-181.000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>Gasversorgung</td>\n",
|
|
" <td>713.000</td>\n",
|
|
" <td>707.000</td>\n",
|
|
" <td>6.000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>sonstige Aktivitäten</td>\n",
|
|
" <td>661.000</td>\n",
|
|
" <td>2.605</td>\n",
|
|
" <td>-1.944</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>Insgesamt</td>\n",
|
|
" <td>2.746</td>\n",
|
|
" <td>4.865</td>\n",
|
|
" <td>-2.119</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" Investitionen (netto) 2020 T€ 2019 T€ Veränderung T€\n",
|
|
"0 Stromversorgung 1.372 1.553 -181.000\n",
|
|
"1 Gasversorgung 713.000 707.000 6.000\n",
|
|
"2 sonstige Aktivitäten 661.000 2.605 -1.944\n",
|
|
"3 Insgesamt 2.746 4.865 -2.119"
|
|
]
|
|
},
|
|
"execution_count": 91,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"bilanz = get_bilanz(sample_report)\n",
|
|
"bilanz[\"Passiva\"].head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 92,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead tr th {\n",
|
|
" text-align: left;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr>\n",
|
|
" <th></th>\n",
|
|
" <th>Unnamed: 0_level_0</th>\n",
|
|
" <th colspan=\"2\" halign=\"left\">31. Dezember 2020</th>\n",
|
|
" <th colspan=\"2\" halign=\"left\">31. Dezember 2019</th>\n",
|
|
" <th>Veränderung</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th></th>\n",
|
|
" <th>Unnamed: 0_level_1</th>\n",
|
|
" <th>T€</th>\n",
|
|
" <th>%</th>\n",
|
|
" <th>T€</th>\n",
|
|
" <th>%</th>\n",
|
|
" <th>T€</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>Anlagevermögen</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>Sachanlagen</td>\n",
|
|
" <td>28.919</td>\n",
|
|
" <td>689.0</td>\n",
|
|
" <td>28.812</td>\n",
|
|
" <td>689.0</td>\n",
|
|
" <td>107.000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>Finanzanlagen</td>\n",
|
|
" <td>2.667</td>\n",
|
|
" <td>64.0</td>\n",
|
|
" <td>4.189</td>\n",
|
|
" <td>100.0</td>\n",
|
|
" <td>-1.522</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>31.586</td>\n",
|
|
" <td>753.0</td>\n",
|
|
" <td>33.001</td>\n",
|
|
" <td>789.0</td>\n",
|
|
" <td>-1.415</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>Umlaufvermögen</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" Unnamed: 0_level_0 31. Dezember 2020 31. Dezember 2019 \\\n",
|
|
" Unnamed: 0_level_1 T€ % T€ % \n",
|
|
"0 Anlagevermögen NaN NaN NaN NaN \n",
|
|
"1 Sachanlagen 28.919 689.0 28.812 689.0 \n",
|
|
"2 Finanzanlagen 2.667 64.0 4.189 100.0 \n",
|
|
"3 NaN 31.586 753.0 33.001 789.0 \n",
|
|
"4 Umlaufvermögen NaN NaN NaN NaN \n",
|
|
"\n",
|
|
" Veränderung \n",
|
|
" T€ \n",
|
|
"0 NaN \n",
|
|
"1 107.000 \n",
|
|
"2 -1.522 \n",
|
|
"3 -1.415 \n",
|
|
"4 NaN "
|
|
]
|
|
},
|
|
"execution_count": 92,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"bilanz[\"Aktiva\"].head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 93,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>Investitionen (netto)</th>\n",
|
|
" <th>2020 T€</th>\n",
|
|
" <th>2019 T€</th>\n",
|
|
" <th>Veränderung T€</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>Stromversorgung</td>\n",
|
|
" <td>1.372</td>\n",
|
|
" <td>1.553</td>\n",
|
|
" <td>-181.000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>Gasversorgung</td>\n",
|
|
" <td>713.000</td>\n",
|
|
" <td>707.000</td>\n",
|
|
" <td>6.000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>sonstige Aktivitäten</td>\n",
|
|
" <td>661.000</td>\n",
|
|
" <td>2.605</td>\n",
|
|
" <td>-1.944</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>Insgesamt</td>\n",
|
|
" <td>2.746</td>\n",
|
|
" <td>4.865</td>\n",
|
|
" <td>-2.119</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>"
|
|
],
|
|
"text/plain": [
|
|
"<IPython.core.display.HTML object>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"from IPython.display import display, HTML\n",
|
|
"\n",
|
|
"# Assuming that dataframes df1 and df2 are already defined:\n",
|
|
"display(HTML(bilanz[\"Passiva\"].to_html()))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 94,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"MultiIndex([('Unnamed: 0_level_0', 'Unnamed: 0_level_1'),\n",
|
|
" ( '2020', 'T€'),\n",
|
|
" ( '2019', 'T€'),\n",
|
|
" ( 'Veränderung', 'T€'),\n",
|
|
" ( 'Veränderung', '%')],\n",
|
|
" )\n",
|
|
"MultiIndex([('Unnamed: 0_level_0', 'Unnamed: 0_level_1'),\n",
|
|
" ( '2020', 'T€'),\n",
|
|
" ( '2020', '%'),\n",
|
|
" ( '2019', 'T€'),\n",
|
|
" ( '2019', '%'),\n",
|
|
" ( 'Veränderungen', 'T€'),\n",
|
|
" ( 'Veränderungen', '%')],\n",
|
|
" )\n",
|
|
"MultiIndex([('Unnamed: 0_level_0', 'gerundet'),\n",
|
|
" ( '2020', 'T€'),\n",
|
|
" ( '2019', 'T€'),\n",
|
|
" ( 'Veränderung', 'T€'),\n",
|
|
" ( 'Veränderung', '%')],\n",
|
|
" )\n",
|
|
"MultiIndex([('Unnamed: 0_level_0', 'unkonsolidiert gerundet'),\n",
|
|
" ( '2020', 'T€'),\n",
|
|
" ( '2019', 'T€'),\n",
|
|
" ( 'Veränderung', 'T€'),\n",
|
|
" ( 'Veränderung', '%')],\n",
|
|
" )\n",
|
|
"MultiIndex([('Jahresüberschuss/Jahresfehlbetrag nach Betriebszweigen', ...),\n",
|
|
" ( '2020', ...),\n",
|
|
" ( '2019', ...),\n",
|
|
" ( 'Veränderung', ...),\n",
|
|
" ( 'Veränderung', ...)],\n",
|
|
" )\n",
|
|
"Index(['Unnamed: 0', '2020 T€', '2019 T€'], dtype='object')\n",
|
|
"MultiIndex([('Unnamed: 0_level_0', 'Unnamed: 0_level_1'),\n",
|
|
" ( '31. Dezember 2020', 'T€'),\n",
|
|
" ( '31. Dezember 2020', '%'),\n",
|
|
" ( '31. Dezember 2019', 'T€'),\n",
|
|
" ( '31. Dezember 2019', '%'),\n",
|
|
" ( 'Veränderung', 'T€')],\n",
|
|
" )\n",
|
|
"Index(['Investitionen (netto)', '2020 T€', '2019 T€', 'Veränderung T€'], dtype='object')\n",
|
|
"Index(['Unnamed: 0', '€', '31.12.2019 in T €'], dtype='object')\n",
|
|
"Index(['Unnamed: 0', '€', '€.1', '31.12.2019 in T €'], dtype='object')\n",
|
|
"Index([0, 1], dtype='int64')\n",
|
|
"Index(['Beteiligung', 'Anteil', 'Eigenkapital der Beteiligungsgesellschaft',\n",
|
|
" 'Eigenkapital der Beteiligungsgesellschaft.1',\n",
|
|
" 'Jahresergebnis der Beteiligungsgesellschaft',\n",
|
|
" 'Jahresergebnis der Beteiligungsgesellschaft.1'],\n",
|
|
" dtype='object')\n",
|
|
"Index(['Beteiligung', 'Anteil', 'Eigenkapital der Beteiligungsgesellschaft',\n",
|
|
" 'Eigenkapital der Beteiligungsgesellschaft.1',\n",
|
|
" 'Jahresergebnis der Beteiligungsgesellschaft',\n",
|
|
" 'Jahresergebnis der Beteiligungsgesellschaft.1'],\n",
|
|
" dtype='object')\n",
|
|
"Index(['Unnamed: 0', '2020', '2019'], dtype='object')\n",
|
|
"MultiIndex([( 'Unnamed: 0_level_0', 'Unnamed: 0_level_1'),\n",
|
|
" ( 'Unnamed: 1_level_0', 'Gesamt in T€'),\n",
|
|
" ('davon mit einer Restlaufzeit', 'bis zu 1 Jahr in T€'),\n",
|
|
" ('davon mit einer Restlaufzeit', 'mehr als 1 Jahr in T€'),\n",
|
|
" ('davon mit einer Restlaufzeit', 'davon über 5 Jahre in T€')],\n",
|
|
" )\n",
|
|
"MultiIndex([('Unnamed: 0_level_0', 'Unnamed: 0_level_1'),\n",
|
|
" ( '2020', '€'),\n",
|
|
" ( '2019', '€'),\n",
|
|
" ( 'Veränderung', '€'),\n",
|
|
" ( 'Veränderung', '%')],\n",
|
|
" )\n",
|
|
"MultiIndex([('Unnamed: 0_level_0', 'Unnamed: 0_level_1'),\n",
|
|
" ( '2020', '€'),\n",
|
|
" ( '2019', '€'),\n",
|
|
" ( 'Veränderung', '€'),\n",
|
|
" ( 'Veränderung', '%')],\n",
|
|
" )\n",
|
|
"Index(['Unnamed: 0', '2020 T€', '2019 T€'], dtype='object')\n",
|
|
"MultiIndex([( 'Unnamed: 0_level_0', 'Unnamed: 0_level_1'),\n",
|
|
" ('Anschaffungs- und Herstellungskosten', 'Stand am 01.01.2020 €'),\n",
|
|
" ('Anschaffungs- und Herstellungskosten', 'Zugang €'),\n",
|
|
" ('Anschaffungs- und Herstellungskosten', 'Abgang €'),\n",
|
|
" ('Anschaffungs- und Herstellungskosten', 'Umbuchung €'),\n",
|
|
" ('Anschaffungs- und Herstellungskosten', 'Stand am 31.12.2020 €')],\n",
|
|
" )\n",
|
|
"MultiIndex([('Unnamed: 0_level_0', 'Unnamed: 0_level_1'),\n",
|
|
" ( 'Abschreibungen', 'Stand am 01.01.2020 €'),\n",
|
|
" ( 'Abschreibungen', 'Zugang €'),\n",
|
|
" ( 'Abschreibungen', 'außerplanm. AfA'),\n",
|
|
" ( 'Abschreibungen', 'Abgang €'),\n",
|
|
" ( 'Abschreibungen', 'Umbuchung €'),\n",
|
|
" ( 'Abschreibungen', 'Stand am 31.12.2020 €')],\n",
|
|
" )\n",
|
|
"MultiIndex([('Unnamed: 0_level_0', 'Unnamed: 0_level_1'),\n",
|
|
" ( 'Restbuchwerte', 'Stand am 31.12.2020 €'),\n",
|
|
" ( 'Restbuchwerte', 'Stand am 31.12.2019 €')],\n",
|
|
" )\n",
|
|
"Index(['Unnamed: 0', 'Elektrizitätsverteilung', '31.12.2019 in T €',\n",
|
|
" 'Gasverteilung', '31.12.2019 in T €.1'],\n",
|
|
" dtype='object')\n",
|
|
"Index(['Unnamed: 0', '€', '31.12.2019 in T €'], dtype='object')\n",
|
|
"Index(['Unnamed: 0', 'Elektrizitätsverteilung €', '31.12.2019 in T €',\n",
|
|
" 'Gasverteilung €', '31.12.2019 in T €.1'],\n",
|
|
" dtype='object')\n",
|
|
"Index(['Unnamed: 0', '€', 'Vorjahr in T €'], dtype='object')\n",
|
|
"MultiIndex([('Verbindlichkeitenspiegel 2020 Elektrizitätsverteilung', ...),\n",
|
|
" ( 'davon mit einer Restlaufzeit', ...),\n",
|
|
" ( 'davon mit einer Restlaufzeit', ...),\n",
|
|
" ( 'davon mit einer Restlaufzeit', ...),\n",
|
|
" ( 'davon mit einer Restlaufzeit', ...)],\n",
|
|
" )\n",
|
|
"MultiIndex([('Verbindlichkeitenspiegel 2020 Gasverteilung', ...),\n",
|
|
" ( 'davon mit einer Restlaufzeit', ...),\n",
|
|
" ( 'davon mit einer Restlaufzeit', ...),\n",
|
|
" ( 'davon mit einer Restlaufzeit', ...),\n",
|
|
" ( 'davon mit einer Restlaufzeit', ...)],\n",
|
|
" )\n",
|
|
"MultiIndex([('Verbindlichkeitenspiegel 2020 Intelligenter', ...),\n",
|
|
" ( 'davon mit einer Restlaufzeit', ...),\n",
|
|
" ( 'davon mit einer Restlaufzeit', ...),\n",
|
|
" ( 'davon mit einer Restlaufzeit', ...),\n",
|
|
" ( 'davon mit einer Restlaufzeit', ...)],\n",
|
|
" )\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"def get_tables(raw_report: str) -> list:\n",
|
|
" soup = BeautifulSoup(raw_report, features=\"html.parser\")\n",
|
|
" tables = soup.find_all(\"table\", {\"class\": \"std_table\"})\n",
|
|
" dfs = []\n",
|
|
" for table in tables:\n",
|
|
" for df in pd.read_html(StringIO(str(table))):\n",
|
|
" dfs.append(df)\n",
|
|
" return dfs\n",
|
|
"\n",
|
|
"\n",
|
|
"for df in get_tables(sample_report):\n",
|
|
" print(df.columns)\n",
|
|
"\n",
|
|
"tables = get_tables(sample_report)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.11.3"
|
|
},
|
|
"orig_nbformat": 4
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|