diff --git a/src/aki_prj23_transparenzregister/utils/data_extraction/bundesanzeiger.py b/src/aki_prj23_transparenzregister/utils/data_extraction/bundesanzeiger.py
index 21bce5b..ec39c3d 100644
--- a/src/aki_prj23_transparenzregister/utils/data_extraction/bundesanzeiger.py
+++ b/src/aki_prj23_transparenzregister/utils/data_extraction/bundesanzeiger.py
@@ -200,7 +200,9 @@ class Bundesanzeiger:
soup = BeautifulSoup(report, features="html.parser")
for table in soup.find_all("table", {"class": "std_table"}):
try:
- results = pd.read_html(StringIO(str(table)), flavor="bs4")
+ results = pd.read_html(
+ StringIO(str(table)), flavor="bs4", thousands=".", decimal=","
+ )
if len(results) > 0:
data_frame = results[0]
result.append(data_frame)
@@ -229,10 +231,10 @@ class Bundesanzeiger:
return None
def parse_string_to_float(value: str | float) -> float | None:
+ if value is None:
+ return None
try:
- if value is None:
- return None
- return float(str(value).replace(".", "").replace(",", "."))
+ return float(value)
except Exception:
return None
diff --git a/tests/utils/data_extraction/bundesanzeiger_test.py b/tests/utils/data_extraction/bundesanzeiger_test.py
index 73bbbd9..5e2ef33 100644
--- a/tests/utils/data_extraction/bundesanzeiger_test.py
+++ b/tests/utils/data_extraction/bundesanzeiger_test.py
@@ -136,3 +136,75 @@ def test_get_information_no_results(mock_bundesanzeiger: Mock) -> None:
ba = Bundesanzeiger()
result = ba.get_information("PRJ 23 Transparenzregister GmbH", "Iserlohn")
assert len(result) == 0
+
+
+def test_extract_tables_from_reports() -> None:
+ report = """
+
+
+ Möge die Macht mir dir sein
+
+
+ Column A |
+ Column B |
+
+
+ 42 |
+ 4711 |
+
+
+
+ """
+ ba = Bundesanzeiger()
+ result = ba.__extract_tables_from_report__(report)
+ assert len(result) == 1
+
+
+def test_parse_tables_to_kpis() -> None:
+ report = """
+
+
+ Möge die Macht mir dir sein
+
+
+ Position |
+ 2023 in T€ |
+ 1997 in € |
+
+
+ a) Umlaufvermögen |
+ 12,13 |
+ 4711 |
+
+
+ +EBIT |
+ 1123 |
+ 4711 |
+
+
+ To be ignored |
+ I've tried so hard and got so far, but in the end it doesn't even matter |
+ 4711 |
+
+
+ Gewinn |
+ |
+ 4711 |
+
+
+ Jahresüberschuss |
+ 4.130,12 |
+ 4711 |
+
+
+
+ """
+ ba = Bundesanzeiger()
+ result = ba.parse_tables_to_kpis(report)
+ assert result == {
+ "Umlaufvermögen": 12130.0,
+ "EBIT": 1123000.0,
+ "Jahresüberschuss": 4130120.0,
+ }