From 7b5cf16e4908b9b82e33e1c12ff54610771d9134 Mon Sep 17 00:00:00 2001 From: TrisNol Date: Mon, 25 Sep 2023 19:33:23 +0200 Subject: [PATCH] feat: Add simple wrapper to update particual financial entries --- .../utils/data_extraction/bundesanzeiger.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/aki_prj23_transparenzregister/utils/data_extraction/bundesanzeiger.py b/src/aki_prj23_transparenzregister/utils/data_extraction/bundesanzeiger.py index f69e78f..104e48b 100644 --- a/src/aki_prj23_transparenzregister/utils/data_extraction/bundesanzeiger.py +++ b/src/aki_prj23_transparenzregister/utils/data_extraction/bundesanzeiger.py @@ -61,12 +61,15 @@ class Bundesanzeiger: Returns: pd.DataFrame: Filtered and pruned DataFrame """ + date_regex = r"\d{2}\.\d{2}.\d{4}" + df_reports["type"] = df_reports.name.apply(lambda name: name.split(" ")[0]) df_reports = df_reports.loc[ - (df_reports.type == "Jahresabschluss") | (df_reports.type == "Jahres-") + ((df_reports.type == "Jahresabschluss") | (df_reports.type == "Jahres-")) + & df_reports.name.str.contains(date_regex, regex=True) ] df_reports["jahr"] = df_reports.name.apply( - lambda name: re.findall(r"\d{2}\.\d{2}.\d{4}", name)[0].split(".")[-1] + lambda name: re.findall(date_regex, name)[0].split(".")[-1] ) return df_reports.drop(["name", "report", "type"], axis=1)