mirror of
https://github.com/fhswf/aki_prj23_transparenzregister.git
synced 2025-04-22 16:32:53 +02:00
fix(data-extraction): Resolve issue in different Bundesanzeiger formats
This commit is contained in:
parent
5bbdf046d2
commit
2050b49fde
@ -62,9 +62,11 @@ class Bundesanzeiger:
|
||||
pd.DataFrame: Filtered and pruned DataFrame
|
||||
"""
|
||||
df_reports["type"] = df_reports.name.apply(lambda name: name.split(" ")[0])
|
||||
df_reports = df_reports.loc[df_reports.type == "Jahresabschluss"]
|
||||
df_reports = df_reports.loc[
|
||||
(df_reports.type == "Jahresabschluss") | (df_reports.type == "Jahres-")
|
||||
]
|
||||
df_reports["jahr"] = df_reports.name.apply(
|
||||
lambda name: name.split(" ")[-1].split(".")[-1]
|
||||
lambda name: re.findall(r"\d{2}\.\d{2}.\d{4}", name)[0].split(".")[-1]
|
||||
)
|
||||
return df_reports.drop(["name", "report", "type"], axis=1)
|
||||
|
||||
|
@ -70,7 +70,11 @@ def test_extracct_financial_results() -> None:
|
||||
def test_filter_reports() -> None:
|
||||
test_data = [
|
||||
{"name": "Bedienungsanleitung", "report": "", "raw_report": ""},
|
||||
{"name": "Jahresabschluss 1998", "report": "", "raw_report": ""},
|
||||
{
|
||||
"name": "Jahresabschluss vom 01.01.1998 bis zum 31.12.1998",
|
||||
"report": "",
|
||||
"raw_report": "",
|
||||
},
|
||||
]
|
||||
test_df = pd.DataFrame(test_data)
|
||||
ba = Bundesanzeiger()
|
||||
@ -91,7 +95,7 @@ def test_get_information(mock_bundesanzeiger: Mock) -> None:
|
||||
"raw_report": "",
|
||||
},
|
||||
"2": {
|
||||
"name": "Jahresabschluss 1998",
|
||||
"name": "Jahresabschluss 01.01.1998",
|
||||
"report": "",
|
||||
"company": "PRJ 23 Transparenzregister GmbH",
|
||||
"raw_report": "",
|
||||
|
Loading…
x
Reference in New Issue
Block a user