Introduce extended_financial_data code (#357)

Introducing the previously developed method to fetch the financial data
via table parsing (aka "data lake like solution") in a non-destructive
manner by defaulting to the current RegEx-based behaviour.
This commit is contained in:
Tristan Nolde
2023-11-11 14:10:20 +01:00
committed by GitHub
5 changed files with 2154 additions and 364 deletions

View File

@ -136,3 +136,110 @@ def test_get_information_no_results(mock_bundesanzeiger: Mock) -> None:
ba = Bundesanzeiger()
result = ba.get_information("PRJ 23 Transparenzregister GmbH", "Iserlohn")
assert len(result) == 0
def test_extract_tables_from_reports() -> None:
report = """
<table>
</table>
<div>
Möge die Macht mir dir sein
<table class="std_table">
<tr>
<th>Column A</th>
<th>Column B</th>
</tr>
<tr>
<td>42</td>
<td>4711</td>
</tr>
</table>
</div>
"""
ba = Bundesanzeiger()
result = ba.__extract_tables_from_report__(report)
assert len(result) == 1
def test_parse_tables_to_kpis() -> None:
report = """
<table class="std_table">
<tr>
<th>Position</th>
</tr>
<tr>
<td>a) Umlaufvermögen</td>
</tr>
</table>
<table class="std_table">
<tr>
<th>Position</th>
<th>Test</th>
</tr>
<tr>
<td>4711</td>
<td>4711</td>
</tr>
</table>
<div>
Möge die Macht mir dir sein
<table class="std_table">
<tr>
<th>Position</th>
<th>2023 in T€</th>
<th>1997 in €</th>
</tr>
<tr>
<td>a) Umlaufvermögen</td>
<td>12,13</td>
<td>4711</td>
</tr>
<tr>
<td>+EBIT</td>
<td>1123</td>
<td>4711</td>
</tr>
<tr>
<td>To be ignored</td>
<td>I've tried so hard and got so far, but in the end it doesn't even matter</td>
<td>4711</td>
</tr>
<tr>
<td>Gewinn</td>
<td></td>
<td>4711</td>
</tr>
<tr>
<td>Jahresüberschuss</td>
<td>4.130,12</td>
<td>4711</td>
</tr>
</table>
<table class="std_table">
<thead>
<tr>
<th>Position</th>
<th>Betrag in</th>
</tr>
<tr>
<th>Hallo</th>
<th>€</th>
</tr>
</thead>
<tbody>
<tr>
<td>I. Schulden</td>
<td>0,12</td>
</tr>
</tbody>
</table>
</div>
"""
ba = Bundesanzeiger()
result = ba.parse_tables_to_kpis(report)
assert result == {
"Umlaufvermögen": 12130.0,
"EBIT": 1123000.0,
"Jahresüberschuss": 4130120.0,
"Schulden": 0.12,
}