mirror of
https://github.com/fhswf/aki_prj23_transparenzregister.git
synced 2025-05-14 05:28:45 +02:00
refactor: Pull Auditor extraction into Bundesanzeiger utils
This commit is contained in:
parent
f64e0dd96e
commit
1e15656028
@ -18,235 +18,119 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 4,
|
"execution_count": 2,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>date</th>\n",
|
||||||
|
" <th>company</th>\n",
|
||||||
|
" <th>raw_report</th>\n",
|
||||||
|
" <th>jahr</th>\n",
|
||||||
|
" <th>auditors</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>2023-07-07</td>\n",
|
||||||
|
" <td>Atos IT-Dienstleistung und Beratung GmbH</td>\n",
|
||||||
|
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
||||||
|
" <td>2021</td>\n",
|
||||||
|
" <td>[]</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2</th>\n",
|
||||||
|
" <td>2023-05-10</td>\n",
|
||||||
|
" <td>Atos IT-Dienstleistung und Beratung GmbH</td>\n",
|
||||||
|
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
||||||
|
" <td>2021</td>\n",
|
||||||
|
" <td>[Auditor(name='Eckhard Lewe', company='Grant T...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>4</th>\n",
|
||||||
|
" <td>2022-03-25</td>\n",
|
||||||
|
" <td>Atos IT-Dienstleistung und Beratung GmbH</td>\n",
|
||||||
|
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
||||||
|
" <td>2020</td>\n",
|
||||||
|
" <td>[Auditor(name='Eckhard Lewe', company='Warth &...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>5</th>\n",
|
||||||
|
" <td>2021-03-11</td>\n",
|
||||||
|
" <td>Atos IT-Dienstleistung und Beratung GmbH</td>\n",
|
||||||
|
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
||||||
|
" <td>2019</td>\n",
|
||||||
|
" <td>[Auditor(name='Eckhard Lewe', company='Warth &...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>6</th>\n",
|
||||||
|
" <td>2020-03-24</td>\n",
|
||||||
|
" <td>Atos IT-Dienstleistung und Beratung GmbH</td>\n",
|
||||||
|
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
||||||
|
" <td>2018</td>\n",
|
||||||
|
" <td>[Auditor(name='Ulrich Diersch', company='Warth...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" date company \\\n",
|
||||||
|
"0 2023-07-07 Atos IT-Dienstleistung und Beratung GmbH \n",
|
||||||
|
"2 2023-05-10 Atos IT-Dienstleistung und Beratung GmbH \n",
|
||||||
|
"4 2022-03-25 Atos IT-Dienstleistung und Beratung GmbH \n",
|
||||||
|
"5 2021-03-11 Atos IT-Dienstleistung und Beratung GmbH \n",
|
||||||
|
"6 2020-03-24 Atos IT-Dienstleistung und Beratung GmbH \n",
|
||||||
|
"\n",
|
||||||
|
" raw_report jahr \\\n",
|
||||||
|
"0 <div class=\"publication_container\">\\n <div cla... 2021 \n",
|
||||||
|
"2 <div class=\"publication_container\">\\n <div cla... 2021 \n",
|
||||||
|
"4 <div class=\"publication_container\">\\n <div cla... 2020 \n",
|
||||||
|
"5 <div class=\"publication_container\">\\n <div cla... 2019 \n",
|
||||||
|
"6 <div class=\"publication_container\">\\n <div cla... 2018 \n",
|
||||||
|
"\n",
|
||||||
|
" auditors \n",
|
||||||
|
"0 [] \n",
|
||||||
|
"2 [Auditor(name='Eckhard Lewe', company='Grant T... \n",
|
||||||
|
"4 [Auditor(name='Eckhard Lewe', company='Warth &... \n",
|
||||||
|
"5 [Auditor(name='Eckhard Lewe', company='Warth &... \n",
|
||||||
|
"6 [Auditor(name='Ulrich Diersch', company='Warth... "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"import pandas as pd\n",
|
"import pandas as pd\n",
|
||||||
"from deutschland.bundesanzeiger import Bundesanzeiger"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 5,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"dict_keys(['7e53c9211957c6a4c17264ab86946c3b', 'c1051233030a8e0232523052fd4a2310', '57d129e6fd7505d567fa13919e5e6bdd'])\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"ba = Bundesanzeiger()\n",
|
|
||||||
"reports = ba.get_reports(\n",
|
|
||||||
" \"Volkswagen Economy Service Erdle Bernhard Erdle GmbH\"\n",
|
|
||||||
") # \"Atos IT-Dienstleistung und Beratung GmbH\")\n",
|
|
||||||
"print(reports.keys())"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 6,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"report_contents = []\n",
|
|
||||||
"for key in reports.keys():\n",
|
|
||||||
" report_contents.append(reports[key])"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 7,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/html": [
|
|
||||||
"<div>\n",
|
|
||||||
"<style scoped>\n",
|
|
||||||
" .dataframe tbody tr th:only-of-type {\n",
|
|
||||||
" vertical-align: middle;\n",
|
|
||||||
" }\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
" .dataframe tbody tr th {\n",
|
"from aki_prj23_transparenzregister.utils.data_extraction.bundesanzeiger import (\n",
|
||||||
" vertical-align: top;\n",
|
" Bundesanzeiger,\n",
|
||||||
" }\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
" .dataframe thead th {\n",
|
"ba_wrapper = Bundesanzeiger()\n",
|
||||||
" text-align: right;\n",
|
"df_reports = ba_wrapper.get_information(\"Atos IT-Dienstleistung und Beratung GmbH\")\n",
|
||||||
" }\n",
|
|
||||||
"</style>\n",
|
|
||||||
"<table border=\"1\" class=\"dataframe\">\n",
|
|
||||||
" <thead>\n",
|
|
||||||
" <tr style=\"text-align: right;\">\n",
|
|
||||||
" <th></th>\n",
|
|
||||||
" <th>date</th>\n",
|
|
||||||
" <th>name</th>\n",
|
|
||||||
" <th>company</th>\n",
|
|
||||||
" <th>report</th>\n",
|
|
||||||
" <th>raw_report</th>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" </thead>\n",
|
|
||||||
" <tbody>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>0</th>\n",
|
|
||||||
" <td>2023-07-11</td>\n",
|
|
||||||
" <td>Jahresabschluss zum Geschäftsjahr vom 01.01.20...</td>\n",
|
|
||||||
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
|
|
||||||
" <td>\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se...</td>\n",
|
|
||||||
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>1</th>\n",
|
|
||||||
" <td>2023-05-25</td>\n",
|
|
||||||
" <td>Jahresabschluss zum Geschäftsjahr vom 01.01.20...</td>\n",
|
|
||||||
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
|
|
||||||
" <td>\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se...</td>\n",
|
|
||||||
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>2</th>\n",
|
|
||||||
" <td>2023-05-24</td>\n",
|
|
||||||
" <td>Jahresabschluss zum Geschäftsjahr vom 01.01.20...</td>\n",
|
|
||||||
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
|
|
||||||
" <td>\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se...</td>\n",
|
|
||||||
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" </tbody>\n",
|
|
||||||
"</table>\n",
|
|
||||||
"</div>"
|
|
||||||
],
|
|
||||||
"text/plain": [
|
|
||||||
" date name \\\n",
|
|
||||||
"0 2023-07-11 Jahresabschluss zum Geschäftsjahr vom 01.01.20... \n",
|
|
||||||
"1 2023-05-25 Jahresabschluss zum Geschäftsjahr vom 01.01.20... \n",
|
|
||||||
"2 2023-05-24 Jahresabschluss zum Geschäftsjahr vom 01.01.20... \n",
|
|
||||||
"\n",
|
|
||||||
" company \\\n",
|
|
||||||
"0 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
|
|
||||||
"1 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
|
|
||||||
"2 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
|
|
||||||
"\n",
|
|
||||||
" report \\\n",
|
|
||||||
"0 \\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se... \n",
|
|
||||||
"1 \\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se... \n",
|
|
||||||
"2 \\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se... \n",
|
|
||||||
"\n",
|
|
||||||
" raw_report \n",
|
|
||||||
"0 <div class=\"publication_container\">\\n <div cla... \n",
|
|
||||||
"1 <div class=\"publication_container\">\\n <div cla... \n",
|
|
||||||
"2 <div class=\"publication_container\">\\n <div cla... "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 7,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"df_reports = pd.DataFrame(report_contents)\n",
|
|
||||||
"df_reports.head()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 8,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/html": [
|
|
||||||
"<div>\n",
|
|
||||||
"<style scoped>\n",
|
|
||||||
" .dataframe tbody tr th:only-of-type {\n",
|
|
||||||
" vertical-align: middle;\n",
|
|
||||||
" }\n",
|
|
||||||
"\n",
|
|
||||||
" .dataframe tbody tr th {\n",
|
|
||||||
" vertical-align: top;\n",
|
|
||||||
" }\n",
|
|
||||||
"\n",
|
|
||||||
" .dataframe thead th {\n",
|
|
||||||
" text-align: right;\n",
|
|
||||||
" }\n",
|
|
||||||
"</style>\n",
|
|
||||||
"<table border=\"1\" class=\"dataframe\">\n",
|
|
||||||
" <thead>\n",
|
|
||||||
" <tr style=\"text-align: right;\">\n",
|
|
||||||
" <th></th>\n",
|
|
||||||
" <th>date</th>\n",
|
|
||||||
" <th>name</th>\n",
|
|
||||||
" <th>company</th>\n",
|
|
||||||
" <th>report</th>\n",
|
|
||||||
" <th>raw_report</th>\n",
|
|
||||||
" <th>type</th>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" </thead>\n",
|
|
||||||
" <tbody>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>0</th>\n",
|
|
||||||
" <td>2023-07-11</td>\n",
|
|
||||||
" <td>Jahresabschluss zum Geschäftsjahr vom 01.01.20...</td>\n",
|
|
||||||
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
|
|
||||||
" <td>\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se...</td>\n",
|
|
||||||
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
|
||||||
" <td>Jahresabschluss</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>1</th>\n",
|
|
||||||
" <td>2023-05-25</td>\n",
|
|
||||||
" <td>Jahresabschluss zum Geschäftsjahr vom 01.01.20...</td>\n",
|
|
||||||
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
|
|
||||||
" <td>\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se...</td>\n",
|
|
||||||
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
|
||||||
" <td>Jahresabschluss</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>2</th>\n",
|
|
||||||
" <td>2023-05-24</td>\n",
|
|
||||||
" <td>Jahresabschluss zum Geschäftsjahr vom 01.01.20...</td>\n",
|
|
||||||
" <td>Volkswagen Economy Service Erdle Bernhard Erdl...</td>\n",
|
|
||||||
" <td>\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se...</td>\n",
|
|
||||||
" <td><div class=\"publication_container\">\\n <div cla...</td>\n",
|
|
||||||
" <td>Jahresabschluss</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" </tbody>\n",
|
|
||||||
"</table>\n",
|
|
||||||
"</div>"
|
|
||||||
],
|
|
||||||
"text/plain": [
|
|
||||||
" date name \\\n",
|
|
||||||
"0 2023-07-11 Jahresabschluss zum Geschäftsjahr vom 01.01.20... \n",
|
|
||||||
"1 2023-05-25 Jahresabschluss zum Geschäftsjahr vom 01.01.20... \n",
|
|
||||||
"2 2023-05-24 Jahresabschluss zum Geschäftsjahr vom 01.01.20... \n",
|
|
||||||
"\n",
|
|
||||||
" company \\\n",
|
|
||||||
"0 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
|
|
||||||
"1 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
|
|
||||||
"2 Volkswagen Economy Service Erdle Bernhard Erdl... \n",
|
|
||||||
"\n",
|
|
||||||
" report \\\n",
|
|
||||||
"0 \\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se... \n",
|
|
||||||
"1 \\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se... \n",
|
|
||||||
"2 \\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\nVolkswagen Economy Se... \n",
|
|
||||||
"\n",
|
|
||||||
" raw_report type \n",
|
|
||||||
"0 <div class=\"publication_container\">\\n <div cla... Jahresabschluss \n",
|
|
||||||
"1 <div class=\"publication_container\">\\n <div cla... Jahresabschluss \n",
|
|
||||||
"2 <div class=\"publication_container\">\\n <div cla... Jahresabschluss "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 8,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"df_reports[\"type\"] = df_reports.name.apply(lambda name: name.split(\" \")[0])\n",
|
|
||||||
"df_reports.head()"
|
"df_reports.head()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -7,4 +7,4 @@ class Auditor:
|
|||||||
"""Auditor."""
|
"""Auditor."""
|
||||||
|
|
||||||
name: str
|
name: str
|
||||||
company: str
|
company: str | None
|
||||||
|
@ -0,0 +1 @@
|
|||||||
|
"""Everything regarding data extraction from various sources."""
|
@ -0,0 +1,75 @@
|
|||||||
|
"""Fetch data from Bundesanzeiger."""
|
||||||
|
import re
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from deutschland.bundesanzeiger import Bundesanzeiger as Ba
|
||||||
|
|
||||||
|
from aki_prj23_transparenzregister.models.auditor import Auditor
|
||||||
|
|
||||||
|
|
||||||
|
class Bundesanzeiger:
|
||||||
|
"""Bundesanzeiger wrapper to export relevant information."""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
"""Init."""
|
||||||
|
self.__ba = Ba()
|
||||||
|
|
||||||
|
def get_information(self, company_name: str) -> pd.DataFrame:
|
||||||
|
"""Extract relevant information from all found yearly results for the given company.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
company_name (str): Name of the company to search for
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
pd.DataFrame: Result
|
||||||
|
"""
|
||||||
|
reports = self.__ba.get_reports(company_name)
|
||||||
|
report_contents = []
|
||||||
|
for key in reports:
|
||||||
|
report_contents.append(reports[key])
|
||||||
|
|
||||||
|
df_data = pd.DataFrame(report_contents)
|
||||||
|
df_data["type"] = df_data.name.apply(lambda name: name.split(" ")[0])
|
||||||
|
df_data = df_data.loc[df_data.type == "Jahresabschluss"]
|
||||||
|
df_data["jahr"] = df_data.name.apply(
|
||||||
|
lambda name: name.split(" ")[-1].split(".")[-1]
|
||||||
|
)
|
||||||
|
df_data = df_data.drop(["name", "report", "type"], axis=1)
|
||||||
|
|
||||||
|
df_data["auditors"] = df_data.raw_report.apply(self.extract_auditors)
|
||||||
|
return df_data
|
||||||
|
|
||||||
|
def extract_auditor_company(self, report: str) -> str | None:
|
||||||
|
"""Extract the name of an auditor company from the given yearly results report.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
report (str): Yearly results report as raw string
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str | None: Name of the auditor company if found, otherwise None
|
||||||
|
"""
|
||||||
|
soup = BeautifulSoup(report, features="html.parser")
|
||||||
|
temp = soup.find_all("b")
|
||||||
|
for elem in temp:
|
||||||
|
br = elem.findChildren("br")
|
||||||
|
if len(br) > 0:
|
||||||
|
return elem.text.split("\n")[1].strip()
|
||||||
|
return None
|
||||||
|
|
||||||
|
def extract_auditors(self, report: str) -> list:
|
||||||
|
"""Find the list of auditors involved in the given yearly results report.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
report (str): Yearly results report as raw string
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
list[Auditor]: List of Auditors found in the given report
|
||||||
|
"""
|
||||||
|
auditor_company = self.extract_auditor_company(report)
|
||||||
|
auditor_regex = r"[a-z A-Z,.'-]+, Wirtschaftsprüfer"
|
||||||
|
hits = re.findall(auditor_regex, report)
|
||||||
|
return [
|
||||||
|
Auditor(hit.replace(", Wirtschaftsprüfer", "").lstrip(), auditor_company)
|
||||||
|
for hit in hits
|
||||||
|
]
|
1
tests/utils/data_extraction/__init__.py
Normal file
1
tests/utils/data_extraction/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
"""Tests for data_extraction."""
|
26
tests/utils/data_extraction/bundesanzeiger_test.py
Normal file
26
tests/utils/data_extraction/bundesanzeiger_test.py
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
from aki_prj23_transparenzregister.utils.data_extraction.bundesanzeiger import (
|
||||||
|
Bundesanzeiger,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_auditor_company_no_hits() -> None:
|
||||||
|
input_data = """
|
||||||
|
Nothing to see here \O_O/
|
||||||
|
"""
|
||||||
|
ba = Bundesanzeiger()
|
||||||
|
result = ba.extract_auditor_company(input_data)
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_auditor_company() -> None:
|
||||||
|
company_name = "Korrupte Wirtschaftsprüfer GmbH & Co. KG"
|
||||||
|
input_data = f"""
|
||||||
|
<b>
|
||||||
|
{company_name}
|
||||||
|
<br>
|
||||||
|
Max Mustermann
|
||||||
|
</b>
|
||||||
|
"""
|
||||||
|
ba = Bundesanzeiger()
|
||||||
|
result = ba.extract_auditor_company(input_data)
|
||||||
|
assert result == company_name
|
Loading…
x
Reference in New Issue
Block a user