mirror of
https://github.com/fhswf/aki_prj23_transparenzregister.git
synced 2025-04-22 04:42:54 +02:00
1185 lines
46 KiB
Plaintext
1185 lines
46 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"attachments": {},
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Daten Extraktion aus dem Bundesanzeiger"
|
|
]
|
|
},
|
|
{
|
|
"attachments": {},
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Vorbereitung"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 255,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>date</th>\n",
|
|
" <th>company</th>\n",
|
|
" <th>raw_report</th>\n",
|
|
" <th>jahr</th>\n",
|
|
" <th>auditors</th>\n",
|
|
" <th>financial_results</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>2022-10-21</td>\n",
|
|
" <td>Stadtwerke Haltern am See Gesellschaft mit bes...</td>\n",
|
|
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
|
" <td>2021</td>\n",
|
|
" <td>[Auditor(name='Volker Voelcker', company='Pric...</td>\n",
|
|
" <td>{'revenue': 46275.0, 'net_income': 1757.0, 'eb...</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>2021-10-12</td>\n",
|
|
" <td>Stadtwerke Haltern am See Gesellschaft mit bes...</td>\n",
|
|
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
|
" <td>2020</td>\n",
|
|
" <td>[Auditor(name='Hubert Ahlers', company='Pricew...</td>\n",
|
|
" <td>{'revenue': 47459.0, 'net_income': 1661.0, 'eb...</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5</th>\n",
|
|
" <td>2020-12-03</td>\n",
|
|
" <td>Stadtwerke Haltern am See Gesellschaft mit bes...</td>\n",
|
|
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
|
" <td>2019</td>\n",
|
|
" <td>[Auditor(name='Hubert Ahlers', company='Pricew...</td>\n",
|
|
" <td>{'revenue': 45575.0, 'net_income': 1599.0, 'eb...</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>6</th>\n",
|
|
" <td>2020-01-09</td>\n",
|
|
" <td>Stadtwerke Haltern am See Gesellschaft mit bes...</td>\n",
|
|
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
|
" <td>2018</td>\n",
|
|
" <td>[Auditor(name='Hubert Ahlers', company='Pricew...</td>\n",
|
|
" <td>{'revenue': 43898.0, 'net_income': 2043.0, 'eb...</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>7</th>\n",
|
|
" <td>2019-10-10</td>\n",
|
|
" <td>Stadtwerke Haltern am See Gesellschaft mit bes...</td>\n",
|
|
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
|
" <td>2017</td>\n",
|
|
" <td>[]</td>\n",
|
|
" <td>{}</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" date company \\\n",
|
|
"1 2022-10-21 Stadtwerke Haltern am See Gesellschaft mit bes... \n",
|
|
"3 2021-10-12 Stadtwerke Haltern am See Gesellschaft mit bes... \n",
|
|
"5 2020-12-03 Stadtwerke Haltern am See Gesellschaft mit bes... \n",
|
|
"6 2020-01-09 Stadtwerke Haltern am See Gesellschaft mit bes... \n",
|
|
"7 2019-10-10 Stadtwerke Haltern am See Gesellschaft mit bes... \n",
|
|
"\n",
|
|
" raw_report jahr \\\n",
|
|
"1 <div class=\"publication_container\">\\n <div cla... 2021 \n",
|
|
"3 <div class=\"publication_container\">\\n <div cla... 2020 \n",
|
|
"5 <div class=\"publication_container\">\\n <div cla... 2019 \n",
|
|
"6 <div class=\"publication_container\">\\n <div cla... 2018 \n",
|
|
"7 <div class=\"publication_container\">\\n <div cla... 2017 \n",
|
|
"\n",
|
|
" auditors \\\n",
|
|
"1 [Auditor(name='Volker Voelcker', company='Pric... \n",
|
|
"3 [Auditor(name='Hubert Ahlers', company='Pricew... \n",
|
|
"5 [Auditor(name='Hubert Ahlers', company='Pricew... \n",
|
|
"6 [Auditor(name='Hubert Ahlers', company='Pricew... \n",
|
|
"7 [] \n",
|
|
"\n",
|
|
" financial_results \n",
|
|
"1 {'revenue': 46275.0, 'net_income': 1757.0, 'eb... \n",
|
|
"3 {'revenue': 47459.0, 'net_income': 1661.0, 'eb... \n",
|
|
"5 {'revenue': 45575.0, 'net_income': 1599.0, 'eb... \n",
|
|
"6 {'revenue': 43898.0, 'net_income': 2043.0, 'eb... \n",
|
|
"7 {} "
|
|
]
|
|
},
|
|
"execution_count": 255,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"\n",
|
|
"from aki_prj23_transparenzregister.utils.data_extraction.bundesanzeiger import (\n",
|
|
" Bundesanzeiger,\n",
|
|
")\n",
|
|
"\n",
|
|
"ba_wrapper = Bundesanzeiger()\n",
|
|
"# df_reports = ba_wrapper.get_information(\"Törmer Energy Solar 1 GmbH & Co. KG\", \"\")\n",
|
|
"# df_reports = ba_wrapper.get_information(\"Atos IT-Dienstleistung und Beratung GmbH\", \"\")\n",
|
|
"df_reports = ba_wrapper.get_information(\n",
|
|
" \"Stadtwerke Haltern am See Gesellschaft mit beschränkter Haftung\", \"\"\n",
|
|
")\n",
|
|
"df_reports.head()"
|
|
]
|
|
},
|
|
{
|
|
"attachments": {},
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Daten Extraktion"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 256,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from bs4 import BeautifulSoup\n",
|
|
"from io import StringIO"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 257,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"sample_report = df_reports.iloc[1].raw_report"
|
|
]
|
|
},
|
|
{
|
|
"attachments": {},
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Aufsichtsrat"
|
|
]
|
|
},
|
|
{
|
|
"attachments": {},
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"**TODO**"
|
|
]
|
|
},
|
|
{
|
|
"attachments": {},
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Bilanz bzw. GuV"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 258,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"MultiIndex([('Unnamed: 0_level_0', 'Unnamed: 0_level_1'),\n",
|
|
" ( '2020', 'T€'),\n",
|
|
" ( '2019', 'T€'),\n",
|
|
" ( 'Veränderung', 'T€'),\n",
|
|
" ( 'Veränderung', '%')],\n",
|
|
" )\n",
|
|
"Unnamed: 0_level_0 Unnamed: 0_level_1 object\n",
|
|
"2020 T€ float64\n",
|
|
"2019 T€ float64\n",
|
|
"Veränderung T€ int64\n",
|
|
" % int64\n",
|
|
"dtype: object\n",
|
|
"MultiIndex([('Unnamed: 0_level_0', 'Unnamed: 0_level_1'),\n",
|
|
" ( '2020', 'T€'),\n",
|
|
" ( '2020', '%'),\n",
|
|
" ( '2019', 'T€'),\n",
|
|
" ( '2019', '%'),\n",
|
|
" ( 'Veränderungen', 'T€'),\n",
|
|
" ( 'Veränderungen', '%')],\n",
|
|
" )\n",
|
|
"Unnamed: 0_level_0 Unnamed: 0_level_1 object\n",
|
|
"2020 T€ float64\n",
|
|
" % int64\n",
|
|
"2019 T€ float64\n",
|
|
" % int64\n",
|
|
"Veränderungen T€ float64\n",
|
|
" % int64\n",
|
|
"dtype: object\n",
|
|
"MultiIndex([('Unnamed: 0_level_0', 'gerundet'),\n",
|
|
" ( '2020', 'T€'),\n",
|
|
" ( '2019', 'T€'),\n",
|
|
" ( 'Veränderung', 'T€'),\n",
|
|
" ( 'Veränderung', '%')],\n",
|
|
" )\n",
|
|
"Unnamed: 0_level_0 gerundet object\n",
|
|
"2020 T€ float64\n",
|
|
"2019 T€ float64\n",
|
|
"Veränderung T€ float64\n",
|
|
" % int64\n",
|
|
"dtype: object\n",
|
|
"MultiIndex([('Unnamed: 0_level_0', 'unkonsolidiert gerundet'),\n",
|
|
" ( '2020', 'T€'),\n",
|
|
" ( '2019', 'T€'),\n",
|
|
" ( 'Veränderung', 'T€'),\n",
|
|
" ( 'Veränderung', '%')],\n",
|
|
" )\n",
|
|
"Unnamed: 0_level_0 unkonsolidiert gerundet object\n",
|
|
"2020 T€ float64\n",
|
|
"2019 T€ float64\n",
|
|
"Veränderung T€ float64\n",
|
|
" % int64\n",
|
|
"dtype: object\n",
|
|
"MultiIndex([('Jahresüberschuss/Jahresfehlbetrag nach Betriebszweigen', ...),\n",
|
|
" ( '2020', ...),\n",
|
|
" ( '2019', ...),\n",
|
|
" ( 'Veränderung', ...),\n",
|
|
" ( 'Veränderung', ...)],\n",
|
|
" )\n",
|
|
"Jahresüberschuss/Jahresfehlbetrag nach Betriebszweigen Unnamed: 0_level_1 object\n",
|
|
"2020 T€ float64\n",
|
|
"2019 T€ float64\n",
|
|
"Veränderung T€ int64\n",
|
|
" % int64\n",
|
|
"dtype: object\n",
|
|
"Index(['Unnamed: 0', '2020 T€', '2019 T€'], dtype='object')\n",
|
|
"Unnamed: 0 object\n",
|
|
"2020 T€ float64\n",
|
|
"2019 T€ float64\n",
|
|
"dtype: object\n",
|
|
"MultiIndex([('Unnamed: 0_level_0', 'Unnamed: 0_level_1'),\n",
|
|
" ( '31. Dezember 2020', 'T€'),\n",
|
|
" ( '31. Dezember 2020', '%'),\n",
|
|
" ( '31. Dezember 2019', 'T€'),\n",
|
|
" ( '31. Dezember 2019', '%'),\n",
|
|
" ( 'Veränderung', 'T€')],\n",
|
|
" )\n",
|
|
"Unnamed: 0_level_0 Unnamed: 0_level_1 object\n",
|
|
"31. Dezember 2020 T€ float64\n",
|
|
" % float64\n",
|
|
"31. Dezember 2019 T€ float64\n",
|
|
" % float64\n",
|
|
"Veränderung T€ float64\n",
|
|
"dtype: object\n",
|
|
"Index(['Investitionen (netto)', '2020 T€', '2019 T€', 'Veränderung T€'], dtype='object')\n",
|
|
"Investitionen (netto) object\n",
|
|
"2020 T€ float64\n",
|
|
"2019 T€ float64\n",
|
|
"Veränderung T€ float64\n",
|
|
"dtype: object\n",
|
|
"Index(['Unnamed: 0', '€', '31.12.2019 in T €'], dtype='object')\n",
|
|
"Unnamed: 0 object\n",
|
|
"€ object\n",
|
|
"31.12.2019 in T € object\n",
|
|
"dtype: object\n",
|
|
"Index(['Unnamed: 0', '€', '€.1', '31.12.2019 in T €'], dtype='object')\n",
|
|
"Unnamed: 0 object\n",
|
|
"€ object\n",
|
|
"€.1 object\n",
|
|
"31.12.2019 in T € float64\n",
|
|
"dtype: object\n",
|
|
"Index([0, 1], dtype='int64')\n",
|
|
"0 object\n",
|
|
"1 object\n",
|
|
"dtype: object\n",
|
|
"Index(['Beteiligung', 'Anteil', 'Eigenkapital der Beteiligungsgesellschaft',\n",
|
|
" 'Eigenkapital der Beteiligungsgesellschaft.1',\n",
|
|
" 'Jahresergebnis der Beteiligungsgesellschaft',\n",
|
|
" 'Jahresergebnis der Beteiligungsgesellschaft.1'],\n",
|
|
" dtype='object')\n",
|
|
"Beteiligung object\n",
|
|
"Anteil object\n",
|
|
"Eigenkapital der Beteiligungsgesellschaft object\n",
|
|
"Eigenkapital der Beteiligungsgesellschaft.1 object\n",
|
|
"Jahresergebnis der Beteiligungsgesellschaft object\n",
|
|
"Jahresergebnis der Beteiligungsgesellschaft.1 object\n",
|
|
"dtype: object\n",
|
|
"Index(['Beteiligung', 'Anteil', 'Eigenkapital der Beteiligungsgesellschaft',\n",
|
|
" 'Eigenkapital der Beteiligungsgesellschaft.1',\n",
|
|
" 'Jahresergebnis der Beteiligungsgesellschaft',\n",
|
|
" 'Jahresergebnis der Beteiligungsgesellschaft.1'],\n",
|
|
" dtype='object')\n",
|
|
"Beteiligung object\n",
|
|
"Anteil object\n",
|
|
"Eigenkapital der Beteiligungsgesellschaft object\n",
|
|
"Eigenkapital der Beteiligungsgesellschaft.1 object\n",
|
|
"Jahresergebnis der Beteiligungsgesellschaft object\n",
|
|
"Jahresergebnis der Beteiligungsgesellschaft.1 object\n",
|
|
"dtype: object\n",
|
|
"Index(['Unnamed: 0', '2020', '2019'], dtype='object')\n",
|
|
"Unnamed: 0 object\n",
|
|
"2020 object\n",
|
|
"2019 object\n",
|
|
"dtype: object\n",
|
|
"MultiIndex([( 'Unnamed: 0_level_0', 'Unnamed: 0_level_1'),\n",
|
|
" ( 'Unnamed: 1_level_0', 'Gesamt in T€'),\n",
|
|
" ('davon mit einer Restlaufzeit', 'bis zu 1 Jahr in T€'),\n",
|
|
" ('davon mit einer Restlaufzeit', 'mehr als 1 Jahr in T€'),\n",
|
|
" ('davon mit einer Restlaufzeit', 'davon über 5 Jahre in T€')],\n",
|
|
" )\n",
|
|
"Unnamed: 0_level_0 Unnamed: 0_level_1 object\n",
|
|
"Unnamed: 1_level_0 Gesamt in T€ float64\n",
|
|
"davon mit einer Restlaufzeit bis zu 1 Jahr in T€ float64\n",
|
|
" mehr als 1 Jahr in T€ float64\n",
|
|
" davon über 5 Jahre in T€ float64\n",
|
|
"dtype: object\n",
|
|
"MultiIndex([('Unnamed: 0_level_0', 'Unnamed: 0_level_1'),\n",
|
|
" ( '2020', '€'),\n",
|
|
" ( '2019', '€'),\n",
|
|
" ( 'Veränderung', '€'),\n",
|
|
" ( 'Veränderung', '%')],\n",
|
|
" )\n",
|
|
"Unnamed: 0_level_0 Unnamed: 0_level_1 object\n",
|
|
"2020 € object\n",
|
|
"2019 € object\n",
|
|
"Veränderung € object\n",
|
|
" % int64\n",
|
|
"dtype: object\n",
|
|
"MultiIndex([('Unnamed: 0_level_0', 'Unnamed: 0_level_1'),\n",
|
|
" ( '2020', '€'),\n",
|
|
" ( '2019', '€'),\n",
|
|
" ( 'Veränderung', '€'),\n",
|
|
" ( 'Veränderung', '%')],\n",
|
|
" )\n",
|
|
"Unnamed: 0_level_0 Unnamed: 0_level_1 object\n",
|
|
"2020 € object\n",
|
|
"2019 € object\n",
|
|
"Veränderung € object\n",
|
|
" % int64\n",
|
|
"dtype: object\n",
|
|
"Index(['Unnamed: 0', '2020 T€', '2019 T€'], dtype='object')\n",
|
|
"Unnamed: 0 object\n",
|
|
"2020 T€ int64\n",
|
|
"2019 T€ int64\n",
|
|
"dtype: object\n",
|
|
"MultiIndex([( 'Unnamed: 0_level_0', 'Unnamed: 0_level_1'),\n",
|
|
" ('Anschaffungs- und Herstellungskosten', 'Stand am 01.01.2020 €'),\n",
|
|
" ('Anschaffungs- und Herstellungskosten', 'Zugang €'),\n",
|
|
" ('Anschaffungs- und Herstellungskosten', 'Abgang €'),\n",
|
|
" ('Anschaffungs- und Herstellungskosten', 'Umbuchung €'),\n",
|
|
" ('Anschaffungs- und Herstellungskosten', 'Stand am 31.12.2020 €')],\n",
|
|
" )\n",
|
|
"Unnamed: 0_level_0 Unnamed: 0_level_1 object\n",
|
|
"Anschaffungs- und Herstellungskosten Stand am 01.01.2020 € object\n",
|
|
" Zugang € object\n",
|
|
" Abgang € object\n",
|
|
" Umbuchung € object\n",
|
|
" Stand am 31.12.2020 € object\n",
|
|
"dtype: object\n",
|
|
"MultiIndex([('Unnamed: 0_level_0', 'Unnamed: 0_level_1'),\n",
|
|
" ( 'Abschreibungen', 'Stand am 01.01.2020 €'),\n",
|
|
" ( 'Abschreibungen', 'Zugang €'),\n",
|
|
" ( 'Abschreibungen', 'außerplanm. AfA'),\n",
|
|
" ( 'Abschreibungen', 'Abgang €'),\n",
|
|
" ( 'Abschreibungen', 'Umbuchung €'),\n",
|
|
" ( 'Abschreibungen', 'Stand am 31.12.2020 €')],\n",
|
|
" )\n",
|
|
"Unnamed: 0_level_0 Unnamed: 0_level_1 object\n",
|
|
"Abschreibungen Stand am 01.01.2020 € object\n",
|
|
" Zugang € object\n",
|
|
" außerplanm. AfA float64\n",
|
|
" Abgang € object\n",
|
|
" Umbuchung € float64\n",
|
|
" Stand am 31.12.2020 € object\n",
|
|
"dtype: object\n",
|
|
"MultiIndex([('Unnamed: 0_level_0', 'Unnamed: 0_level_1'),\n",
|
|
" ( 'Restbuchwerte', 'Stand am 31.12.2020 €'),\n",
|
|
" ( 'Restbuchwerte', 'Stand am 31.12.2019 €')],\n",
|
|
" )\n",
|
|
"Unnamed: 0_level_0 Unnamed: 0_level_1 object\n",
|
|
"Restbuchwerte Stand am 31.12.2020 € object\n",
|
|
" Stand am 31.12.2019 € object\n",
|
|
"dtype: object\n",
|
|
"Index(['Unnamed: 0', 'Elektrizitätsverteilung', '31.12.2019 in T €',\n",
|
|
" 'Gasverteilung', '31.12.2019 in T €.1'],\n",
|
|
" dtype='object')\n",
|
|
"Unnamed: 0 object\n",
|
|
"Elektrizitätsverteilung object\n",
|
|
"31.12.2019 in T € object\n",
|
|
"Gasverteilung object\n",
|
|
"31.12.2019 in T €.1 object\n",
|
|
"dtype: object\n",
|
|
"Index(['Unnamed: 0', '€', '31.12.2019 in T €'], dtype='object')\n",
|
|
"Unnamed: 0 object\n",
|
|
"€ object\n",
|
|
"31.12.2019 in T € object\n",
|
|
"dtype: object\n",
|
|
"Index(['Unnamed: 0', 'Elektrizitätsverteilung €', '31.12.2019 in T €',\n",
|
|
" 'Gasverteilung €', '31.12.2019 in T €.1'],\n",
|
|
" dtype='object')\n",
|
|
"Unnamed: 0 object\n",
|
|
"Elektrizitätsverteilung € object\n",
|
|
"31.12.2019 in T € float64\n",
|
|
"Gasverteilung € object\n",
|
|
"31.12.2019 in T €.1 float64\n",
|
|
"dtype: object\n",
|
|
"Index(['Unnamed: 0', '€', 'Vorjahr in T €'], dtype='object')\n",
|
|
"Unnamed: 0 object\n",
|
|
"€ object\n",
|
|
"Vorjahr in T € float64\n",
|
|
"dtype: object\n",
|
|
"MultiIndex([('Verbindlichkeitenspiegel 2020 Elektrizitätsverteilung', ...),\n",
|
|
" ( 'davon mit einer Restlaufzeit', ...),\n",
|
|
" ( 'davon mit einer Restlaufzeit', ...),\n",
|
|
" ( 'davon mit einer Restlaufzeit', ...),\n",
|
|
" ( 'davon mit einer Restlaufzeit', ...)],\n",
|
|
" )\n",
|
|
"Verbindlichkeitenspiegel 2020 Elektrizitätsverteilung Unnamed: 0_level_1 object\n",
|
|
"davon mit einer Restlaufzeit Gesamt in T€ float64\n",
|
|
" bis zu 1 Jahr in T€ float64\n",
|
|
" über 1 Jahr in T€ float64\n",
|
|
" mehr als 5 Jahre in T€ float64\n",
|
|
"dtype: object\n",
|
|
"MultiIndex([('Verbindlichkeitenspiegel 2020 Gasverteilung', ...),\n",
|
|
" ( 'davon mit einer Restlaufzeit', ...),\n",
|
|
" ( 'davon mit einer Restlaufzeit', ...),\n",
|
|
" ( 'davon mit einer Restlaufzeit', ...),\n",
|
|
" ( 'davon mit einer Restlaufzeit', ...)],\n",
|
|
" )\n",
|
|
"Verbindlichkeitenspiegel 2020 Gasverteilung Unnamed: 0_level_1 object\n",
|
|
"davon mit einer Restlaufzeit Gesamt in T€ float64\n",
|
|
" bis zu 1 Jahr in T€ float64\n",
|
|
" über 1 Jahr in T€ float64\n",
|
|
" mehr als 5 Jahre in T€ float64\n",
|
|
"dtype: object\n",
|
|
"MultiIndex([('Verbindlichkeitenspiegel 2020 Intelligenter', ...),\n",
|
|
" ( 'davon mit einer Restlaufzeit', ...),\n",
|
|
" ( 'davon mit einer Restlaufzeit', ...),\n",
|
|
" ( 'davon mit einer Restlaufzeit', ...),\n",
|
|
" ( 'davon mit einer Restlaufzeit', ...)],\n",
|
|
" )\n",
|
|
"Verbindlichkeitenspiegel 2020 Intelligenter Messstellenbetrieb object\n",
|
|
"davon mit einer Restlaufzeit Gesamt in T€ float64\n",
|
|
" bis zu 1 Jahr in T€ float64\n",
|
|
" über 1 Jahr in T€ float64\n",
|
|
" mehr als 5 Jahre in T€ float64\n",
|
|
"dtype: object\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"def parse_tables(report: str) -> list:\n",
|
|
" result = []\n",
|
|
" soup = BeautifulSoup(report, features=\"html.parser\")\n",
|
|
" for table in soup.find_all(\"table\", {\"class\": \"std_table\"}):\n",
|
|
" df = pd.read_html(StringIO(str(table)), flavor=\"bs4\")[0]\n",
|
|
" print(df.columns)\n",
|
|
" print(df.dtypes)\n",
|
|
" result.append(df)\n",
|
|
" return result\n",
|
|
"\n",
|
|
"\n",
|
|
"tables = parse_tables(sample_report)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 259,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead tr th {\n",
|
|
" text-align: left;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr>\n",
|
|
" <th></th>\n",
|
|
" <th>Unnamed: 0_level_0</th>\n",
|
|
" <th>2020</th>\n",
|
|
" <th>2019</th>\n",
|
|
" <th colspan=\"2\" halign=\"left\">Veränderung</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th></th>\n",
|
|
" <th>Unnamed: 0_level_1</th>\n",
|
|
" <th>T€</th>\n",
|
|
" <th>T€</th>\n",
|
|
" <th>T€</th>\n",
|
|
" <th>%</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>Jahresüberschuss/Jahresfehlbetrag</td>\n",
|
|
" <td>1.661</td>\n",
|
|
" <td>1.599</td>\n",
|
|
" <td>62</td>\n",
|
|
" <td>39</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>+ Steuern vom Einkommen und vom Ertrag</td>\n",
|
|
" <td>796.000</td>\n",
|
|
" <td>792.000</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>5</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>= EBT</td>\n",
|
|
" <td>2.457</td>\n",
|
|
" <td>2.391</td>\n",
|
|
" <td>66</td>\n",
|
|
" <td>28</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>+ Finanzergebnis</td>\n",
|
|
" <td>-13.000</td>\n",
|
|
" <td>-99.000</td>\n",
|
|
" <td>86</td>\n",
|
|
" <td>-869</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>= EBIT</td>\n",
|
|
" <td>2.444</td>\n",
|
|
" <td>2.292</td>\n",
|
|
" <td>152</td>\n",
|
|
" <td>66</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" Unnamed: 0_level_0 2020 2019 Veränderung \n",
|
|
" Unnamed: 0_level_1 T€ T€ T€ %\n",
|
|
"0 Jahresüberschuss/Jahresfehlbetrag 1.661 1.599 62 39\n",
|
|
"1 + Steuern vom Einkommen und vom Ertrag 796.000 792.000 4 5\n",
|
|
"2 = EBT 2.457 2.391 66 28\n",
|
|
"3 + Finanzergebnis -13.000 -99.000 86 -869\n",
|
|
"4 = EBIT 2.444 2.292 152 66"
|
|
]
|
|
},
|
|
"execution_count": 259,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"current_table = tables[0]\n",
|
|
"current_table.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 260,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import re\n",
|
|
"\n",
|
|
"\n",
|
|
"def cleanse_string(value: str) -> str:\n",
|
|
" if value is not None and isinstance(value, str):\n",
|
|
" return re.sub(r\"(.+\\.).\", \"\", value)\n",
|
|
" return None"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 261,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"C:\\Users\\trist\\AppData\\Local\\Temp\\ipykernel_24300\\152097142.py:2: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
|
|
" current_table.iloc[index][0] = cleanse_string(row[0])\n",
|
|
"C:\\Users\\trist\\AppData\\Local\\Temp\\ipykernel_24300\\152097142.py:2: FutureWarning: Series.__setitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To set a value by position, use `ser.iloc[pos] = value`\n",
|
|
" current_table.iloc[index][0] = cleanse_string(row[0])\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead tr th {\n",
|
|
" text-align: left;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr>\n",
|
|
" <th></th>\n",
|
|
" <th>Unnamed: 0_level_0</th>\n",
|
|
" <th>2020</th>\n",
|
|
" <th>2019</th>\n",
|
|
" <th colspan=\"2\" halign=\"left\">Veränderung</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th></th>\n",
|
|
" <th>Unnamed: 0_level_1</th>\n",
|
|
" <th>T€</th>\n",
|
|
" <th>T€</th>\n",
|
|
" <th>T€</th>\n",
|
|
" <th>%</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>Jahresüberschuss/Jahresfehlbetrag</td>\n",
|
|
" <td>1.661</td>\n",
|
|
" <td>1.599</td>\n",
|
|
" <td>62</td>\n",
|
|
" <td>39</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>+ Steuern vom Einkommen und vom Ertrag</td>\n",
|
|
" <td>796.000</td>\n",
|
|
" <td>792.000</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>5</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>= EBT</td>\n",
|
|
" <td>2.457</td>\n",
|
|
" <td>2.391</td>\n",
|
|
" <td>66</td>\n",
|
|
" <td>28</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>+ Finanzergebnis</td>\n",
|
|
" <td>-13.000</td>\n",
|
|
" <td>-99.000</td>\n",
|
|
" <td>86</td>\n",
|
|
" <td>-869</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>= EBIT</td>\n",
|
|
" <td>2.444</td>\n",
|
|
" <td>2.292</td>\n",
|
|
" <td>152</td>\n",
|
|
" <td>66</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" Unnamed: 0_level_0 2020 2019 Veränderung \n",
|
|
" Unnamed: 0_level_1 T€ T€ T€ %\n",
|
|
"0 Jahresüberschuss/Jahresfehlbetrag 1.661 1.599 62 39\n",
|
|
"1 + Steuern vom Einkommen und vom Ertrag 796.000 792.000 4 5\n",
|
|
"2 = EBT 2.457 2.391 66 28\n",
|
|
"3 + Finanzergebnis -13.000 -99.000 86 -869\n",
|
|
"4 = EBIT 2.444 2.292 152 66"
|
|
]
|
|
},
|
|
"execution_count": 261,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"for index, row in current_table.iterrows():\n",
|
|
" current_table.iloc[index][0] = cleanse_string(row[0])\n",
|
|
"current_table.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 262,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def parse_string_to_float(value) -> float:\n",
|
|
" try:\n",
|
|
" if value is None:\n",
|
|
" return None\n",
|
|
" # if isinstance(value, float):\n",
|
|
" # return value\n",
|
|
" return float(str(value).replace(\".\", \"\").replace(\",\", \".\"))\n",
|
|
" except Exception as e:\n",
|
|
" return None\n",
|
|
"\n",
|
|
"\n",
|
|
"def apply_factor(value, factor: float):\n",
|
|
" transformed_value = parse_string_to_float(value)\n",
|
|
" if transformed_value is None or isinstance(transformed_value, str):\n",
|
|
" return None\n",
|
|
" result = transformed_value * factor\n",
|
|
" # print(result)\n",
|
|
" return result"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 263,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead tr th {\n",
|
|
" text-align: left;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr>\n",
|
|
" <th></th>\n",
|
|
" <th>Unnamed: 0_level_0</th>\n",
|
|
" <th>2020</th>\n",
|
|
" <th>2019</th>\n",
|
|
" <th colspan=\"2\" halign=\"left\">Veränderung</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th></th>\n",
|
|
" <th>Unnamed: 0_level_1</th>\n",
|
|
" <th>T€</th>\n",
|
|
" <th>T€</th>\n",
|
|
" <th>T€</th>\n",
|
|
" <th>%</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>Jahresüberschuss/Jahresfehlbetrag</td>\n",
|
|
" <td>1661000.0</td>\n",
|
|
" <td>1599000.0</td>\n",
|
|
" <td>62000.0</td>\n",
|
|
" <td>39</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>+ Steuern vom Einkommen und vom Ertrag</td>\n",
|
|
" <td>7960000.0</td>\n",
|
|
" <td>7920000.0</td>\n",
|
|
" <td>4000.0</td>\n",
|
|
" <td>5</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>= EBT</td>\n",
|
|
" <td>2457000.0</td>\n",
|
|
" <td>2391000.0</td>\n",
|
|
" <td>66000.0</td>\n",
|
|
" <td>28</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>+ Finanzergebnis</td>\n",
|
|
" <td>-130000.0</td>\n",
|
|
" <td>-990000.0</td>\n",
|
|
" <td>86000.0</td>\n",
|
|
" <td>-869</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>= EBIT</td>\n",
|
|
" <td>2444000.0</td>\n",
|
|
" <td>2292000.0</td>\n",
|
|
" <td>152000.0</td>\n",
|
|
" <td>66</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" Unnamed: 0_level_0 2020 2019 Veränderung \\\n",
|
|
" Unnamed: 0_level_1 T€ T€ T€ \n",
|
|
"0 Jahresüberschuss/Jahresfehlbetrag 1661000.0 1599000.0 62000.0 \n",
|
|
"1 + Steuern vom Einkommen und vom Ertrag 7960000.0 7920000.0 4000.0 \n",
|
|
"2 = EBT 2457000.0 2391000.0 66000.0 \n",
|
|
"3 + Finanzergebnis -130000.0 -990000.0 86000.0 \n",
|
|
"4 = EBIT 2444000.0 2292000.0 152000.0 \n",
|
|
"\n",
|
|
" \n",
|
|
" % \n",
|
|
"0 39 \n",
|
|
"1 5 \n",
|
|
"2 28 \n",
|
|
"3 -869 \n",
|
|
"4 66 "
|
|
]
|
|
},
|
|
"execution_count": 263,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"converter = {\n",
|
|
" \"Mio€\": 1 * 10**6,\n",
|
|
" \"Mio\": 1 * 10**6,\n",
|
|
" \"T€\": 1 * 10**3,\n",
|
|
" \"TEUR\": 1 * 10**3,\n",
|
|
" \"EUR\": 1,\n",
|
|
" \"€\": 1,\n",
|
|
"}\n",
|
|
"\n",
|
|
"for column in current_table.columns[1:]:\n",
|
|
" if isinstance(column, tuple):\n",
|
|
" for c in column:\n",
|
|
" for x, factor in converter.items():\n",
|
|
" if x in c:\n",
|
|
" current_table[column] = current_table[column].apply(\n",
|
|
" lambda x: apply_factor(x, factor)\n",
|
|
" )\n",
|
|
" break\n",
|
|
" else:\n",
|
|
" for x, factor in converter.items():\n",
|
|
" parts = str(column).split(\" \")\n",
|
|
" for y in parts:\n",
|
|
" if re.match(x, y):\n",
|
|
" current_table[column] = current_table[column].apply(\n",
|
|
" lambda x: apply_factor(x, factor)\n",
|
|
" )\n",
|
|
" current_table.rename({column: parts[0]}, inplace=True, axis=1)\n",
|
|
" break\n",
|
|
"\n",
|
|
"current_table.dropna(axis=0, how=\"all\", inplace=True)\n",
|
|
"current_table.dropna(axis=1, how=\"all\", inplace=True)\n",
|
|
"current_table.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 264,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"Unnamed: 0_level_0 Unnamed: 0_level_1 object\n",
|
|
"2020 T€ float64\n",
|
|
"2019 T€ float64\n",
|
|
"Veränderung T€ float64\n",
|
|
" % int64\n",
|
|
"dtype: object"
|
|
]
|
|
},
|
|
"execution_count": 264,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"current_table.dtypes"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 265,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Remove columns hosting non-numerics; excl. first column hosting keys\n",
|
|
"columns_to_prune = []\n",
|
|
"for column_index, column_type in enumerate(current_table.dtypes[1:]):\n",
|
|
" if column_type in [\"object\", \"str\"]:\n",
|
|
" columns_to_prune.append(column_index + 1)\n",
|
|
"\n",
|
|
"current_table = current_table.drop(\n",
|
|
" current_table.columns[columns_to_prune], axis=\"columns\"\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 266,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead tr th {\n",
|
|
" text-align: left;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr>\n",
|
|
" <th></th>\n",
|
|
" <th>Unnamed: 0_level_0</th>\n",
|
|
" <th>2020</th>\n",
|
|
" <th>2019</th>\n",
|
|
" <th colspan=\"2\" halign=\"left\">Veränderung</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th></th>\n",
|
|
" <th>Unnamed: 0_level_1</th>\n",
|
|
" <th>T€</th>\n",
|
|
" <th>T€</th>\n",
|
|
" <th>T€</th>\n",
|
|
" <th>%</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>Jahresüberschuss/Jahresfehlbetrag</td>\n",
|
|
" <td>1661000.0</td>\n",
|
|
" <td>1599000.0</td>\n",
|
|
" <td>62000.0</td>\n",
|
|
" <td>39</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>+ Steuern vom Einkommen und vom Ertrag</td>\n",
|
|
" <td>7960000.0</td>\n",
|
|
" <td>7920000.0</td>\n",
|
|
" <td>4000.0</td>\n",
|
|
" <td>5</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>= EBT</td>\n",
|
|
" <td>2457000.0</td>\n",
|
|
" <td>2391000.0</td>\n",
|
|
" <td>66000.0</td>\n",
|
|
" <td>28</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>+ Finanzergebnis</td>\n",
|
|
" <td>-130000.0</td>\n",
|
|
" <td>-990000.0</td>\n",
|
|
" <td>86000.0</td>\n",
|
|
" <td>-869</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>= EBIT</td>\n",
|
|
" <td>2444000.0</td>\n",
|
|
" <td>2292000.0</td>\n",
|
|
" <td>152000.0</td>\n",
|
|
" <td>66</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5</th>\n",
|
|
" <td>+ Abschreibungsaufwand</td>\n",
|
|
" <td>2239000.0</td>\n",
|
|
" <td>2273000.0</td>\n",
|
|
" <td>-34000.0</td>\n",
|
|
" <td>-15</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>6</th>\n",
|
|
" <td>= EBITDA</td>\n",
|
|
" <td>4683000.0</td>\n",
|
|
" <td>4565000.0</td>\n",
|
|
" <td>118000.0</td>\n",
|
|
" <td>26</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" Unnamed: 0_level_0 2020 2019 Veränderung \\\n",
|
|
" Unnamed: 0_level_1 T€ T€ T€ \n",
|
|
"0 Jahresüberschuss/Jahresfehlbetrag 1661000.0 1599000.0 62000.0 \n",
|
|
"1 + Steuern vom Einkommen und vom Ertrag 7960000.0 7920000.0 4000.0 \n",
|
|
"2 = EBT 2457000.0 2391000.0 66000.0 \n",
|
|
"3 + Finanzergebnis -130000.0 -990000.0 86000.0 \n",
|
|
"4 = EBIT 2444000.0 2292000.0 152000.0 \n",
|
|
"5 + Abschreibungsaufwand 2239000.0 2273000.0 -34000.0 \n",
|
|
"6 = EBITDA 4683000.0 4565000.0 118000.0 \n",
|
|
"\n",
|
|
" \n",
|
|
" % \n",
|
|
"0 39 \n",
|
|
"1 5 \n",
|
|
"2 28 \n",
|
|
"3 -869 \n",
|
|
"4 66 \n",
|
|
"5 -15 \n",
|
|
"6 26 "
|
|
]
|
|
},
|
|
"execution_count": 266,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# Prune rows where first columns is None\n",
|
|
"import numpy as np\n",
|
|
"\n",
|
|
"current_table = current_table.replace(to_replace=\"None\", value=np.nan).dropna()\n",
|
|
"current_table"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 267,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"C:\\Users\\trist\\AppData\\Local\\Temp\\ipykernel_24300\\1758297134.py:8: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
|
|
" row[0] = re.sub(exp, '', row[0]).strip()\n",
|
|
"C:\\Users\\trist\\AppData\\Local\\Temp\\ipykernel_24300\\1758297134.py:8: FutureWarning: Series.__setitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To set a value by position, use `ser.iloc[pos] = value`\n",
|
|
" row[0] = re.sub(exp, '', row[0]).strip()\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"exps = [r\"^[0-9a-zA-Z]+[\\.\\)] \", r\"[\\+\\=\\-\\_]\"]\n",
|
|
"for _index, row in current_table.iterrows():\n",
|
|
" for exp in exps:\n",
|
|
" # print(row[0])\n",
|
|
" row[0] = re.sub(exp, \"\", row[0]).strip()\n",
|
|
" # print(row[0])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 268,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"C:\\Users\\trist\\AppData\\Local\\Temp\\ipykernel_24300\\2923576447.py:8: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
|
|
" name_cleansed = row[0]\n",
|
|
"C:\\Users\\trist\\AppData\\Local\\Temp\\ipykernel_24300\\2923576447.py:12: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
|
|
" kpis[name_cleansed] = row[1]\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"{'Jahresüberschuss/Jahresfehlbetrag': 1661000.0,\n",
|
|
" 'Steuern vom Einkommen und vom Ertrag': 7960000.0,\n",
|
|
" 'EBT': 2457000.0,\n",
|
|
" 'Finanzergebnis': -130000.0,\n",
|
|
" 'EBIT': 2444000.0,\n",
|
|
" 'Abschreibungsaufwand': 2239000.0,\n",
|
|
" 'EBITDA': 4683000.0}"
|
|
]
|
|
},
|
|
"execution_count": 268,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"kpis = {}\n",
|
|
"\n",
|
|
"exps = [r\"^[0-9a-zA-Z]+[\\.\\)] \", r\"[\\+\\=\\-\\_]\"]\n",
|
|
"for _index, row in current_table.iterrows():\n",
|
|
" name_cleansed = row[0]\n",
|
|
" for exp in exps:\n",
|
|
" # print(row[0])\n",
|
|
" name_cleansed = re.sub(exp, \"\", name_cleansed).strip()\n",
|
|
" kpis[name_cleansed] = row[1]\n",
|
|
"kpis"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.11.3"
|
|
},
|
|
"orig_nbformat": 4
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|