246 lines
6.7 KiB
Python

"""Tests if the bundesanzeiger can be accessed and read."""
from unittest.mock import Mock, patch
import pandas as pd
from aki_prj23_transparenzregister.utils.data_extraction.bundesanzeiger import (
Bundesanzeiger,
)
from aki_prj23_transparenzregister.utils.enum_types import FinancialKPIEnum
def test_extract_auditor_company_no_hits() -> None:
input_data = """
<b>
Mega GmbH
Nothing to see here
</b>
"""
ba = Bundesanzeiger()
result = ba.extract_auditor_company(input_data, "Mega GmbH")
assert result is None
def test_extract_auditor_company_self_referencing() -> None:
company = "Mega GmbH"
auditor_company = "Super AG"
input_data = f"""
<b>
{company}
<br>
Nothing to see here
</b>
<b>
{auditor_company}
<br>
Nothing to see here
</b>
"""
ba = Bundesanzeiger()
result = ba.extract_auditor_company(input_data, company)
assert result == auditor_company
def test_extract_auditor_company() -> None:
company_name = "Korrupte Wirtschaftsprüfer GmbH & Co. KG"
input_data = f"""
<b>
{company_name}
<br>
Max Mustermann
</b>
"""
ba = Bundesanzeiger()
result = ba.extract_auditor_company(input_data, "Super AG")
assert result == company_name
def test_extract_kpis() -> None:
input_data = """
Die Prj23_Transparenzregister GmbH erwirtschaftete einen Jahresüberschuss 10.000,43 €.
Des Weiteren sanken die Gesamtverbindlichkeiten 42,00 €
"""
ba = Bundesanzeiger()
result = ba.__extract_kpis__(input_data)
net_income = 10000.43
liabilities = 42.00
assert result[FinancialKPIEnum.NET_INCOME.value] == net_income
assert result[FinancialKPIEnum.LIABILITIES.value] == liabilities
def test_extract_financial_results() -> None:
input_data = """
<br>
Die Prj23_Transparenzregister GmbH erwirtschaftete einen Jahresüberschuss 10.000,43 €.
</br>
<h2>Dies ist ein Platzhalter, der ignoriert werden soll</h2>
<b>Des Weiteren sanken die Gesamtverbindlichkeiten 42,00 €</b>
"""
ba = Bundesanzeiger()
result = ba.extract_financial_results(input_data)
net_income = 10000.43
liabilities = 42.00
assert result[FinancialKPIEnum.NET_INCOME.value] == net_income
assert result[FinancialKPIEnum.LIABILITIES.value] == liabilities
def test_filter_reports() -> None:
test_data = [
{"name": "Bedienungsanleitung", "report": "", "raw_report": ""},
{
"name": "Jahresabschluss vom 01.01.1998 bis zum 31.12.1998",
"report": "",
"raw_report": "",
},
]
test_df = pd.DataFrame(test_data)
ba = Bundesanzeiger()
result = ba.filter_reports(test_df)
assert len(result) == 1
assert result.iloc[0].jahr == "1998"
@patch(
"aki_prj23_transparenzregister.utils.data_extraction.bundesanzeiger.Ba.get_reports"
)
def test_get_information(mock_bundesanzeiger: Mock) -> None:
mock_bundesanzeiger.return_value = {
"1": {
"name": "Bedienungsanleitung",
"report": "",
"company": "",
"raw_report": "",
},
"2": {
"name": "Jahresabschluss 01.01.1998",
"report": "",
"company": "PRJ 23 Transparenzregister GmbH",
"raw_report": "",
},
}
ba = Bundesanzeiger()
result = ba.get_information("PRJ 23 Transparenzregister GmbH", "Iserlohn")
assert len(result) == 1
@patch(
"aki_prj23_transparenzregister.utils.data_extraction.bundesanzeiger.Ba.get_reports"
)
def test_get_information_no_results(mock_bundesanzeiger: Mock) -> None:
mock_bundesanzeiger.return_value = {}
ba = Bundesanzeiger()
result = ba.get_information("PRJ 23 Transparenzregister GmbH", "Iserlohn")
assert len(result) == 0
def test_extract_tables_from_reports() -> None:
report = """
<table>
</table>
<div>
Möge die Macht mir dir sein
<table class="std_table">
<tr>
<th>Column A</th>
<th>Column B</th>
</tr>
<tr>
<td>42</td>
<td>4711</td>
</tr>
</table>
</div>
"""
ba = Bundesanzeiger()
result = ba.__extract_tables_from_report__(report)
assert len(result) == 1
def test_parse_tables_to_kpis() -> None:
report = """
<table class="std_table">
<tr>
<th>Position</th>
</tr>
<tr>
<td>a) Umlaufvermögen</td>
</tr>
</table>
<table class="std_table">
<tr>
<th>Position</th>
<th>Test</th>
</tr>
<tr>
<td>4711</td>
<td>4711</td>
</tr>
</table>
<div>
Möge die Macht mir dir sein
<table class="std_table">
<tr>
<th>Position</th>
<th>2023 in T€</th>
<th>1997 in €</th>
</tr>
<tr>
<td>a) Umlaufvermögen</td>
<td>12,13</td>
<td>4711</td>
</tr>
<tr>
<td>+EBIT</td>
<td>1123</td>
<td>4711</td>
</tr>
<tr>
<td>To be ignored</td>
<td>I've tried so hard and got so far, but in the end it doesn't even matter</td>
<td>4711</td>
</tr>
<tr>
<td>Gewinn</td>
<td></td>
<td>4711</td>
</tr>
<tr>
<td>Jahresüberschuss</td>
<td>4.130,12</td>
<td>4711</td>
</tr>
</table>
<table class="std_table">
<thead>
<tr>
<th>Position</th>
<th>Betrag in</th>
</tr>
<tr>
<th>Hallo</th>
<th>€</th>
</tr>
</thead>
<tbody>
<tr>
<td>I. Schulden</td>
<td>0,12</td>
</tr>
</tbody>
</table>
</div>
"""
ba = Bundesanzeiger()
result = ba.parse_tables_to_kpis(report)
assert result == {
"Umlaufvermögen": 12130.0,
"EBIT": 1123000.0,
"Jahresüberschuss": 4130120.0,
"Schulden": 0.12,
}