mirror of
https://github.com/fhswf/aki_prj23_transparenzregister.git
synced 2025-04-22 22:12:53 +02:00
fix(data-extraction): Resolve issue in different Bundesanzeiger formats
This commit is contained in:
parent
5bbdf046d2
commit
2050b49fde
@ -62,9 +62,11 @@ class Bundesanzeiger:
|
|||||||
pd.DataFrame: Filtered and pruned DataFrame
|
pd.DataFrame: Filtered and pruned DataFrame
|
||||||
"""
|
"""
|
||||||
df_reports["type"] = df_reports.name.apply(lambda name: name.split(" ")[0])
|
df_reports["type"] = df_reports.name.apply(lambda name: name.split(" ")[0])
|
||||||
df_reports = df_reports.loc[df_reports.type == "Jahresabschluss"]
|
df_reports = df_reports.loc[
|
||||||
|
(df_reports.type == "Jahresabschluss") | (df_reports.type == "Jahres-")
|
||||||
|
]
|
||||||
df_reports["jahr"] = df_reports.name.apply(
|
df_reports["jahr"] = df_reports.name.apply(
|
||||||
lambda name: name.split(" ")[-1].split(".")[-1]
|
lambda name: re.findall(r"\d{2}\.\d{2}.\d{4}", name)[0].split(".")[-1]
|
||||||
)
|
)
|
||||||
return df_reports.drop(["name", "report", "type"], axis=1)
|
return df_reports.drop(["name", "report", "type"], axis=1)
|
||||||
|
|
||||||
|
@ -70,7 +70,11 @@ def test_extracct_financial_results() -> None:
|
|||||||
def test_filter_reports() -> None:
|
def test_filter_reports() -> None:
|
||||||
test_data = [
|
test_data = [
|
||||||
{"name": "Bedienungsanleitung", "report": "", "raw_report": ""},
|
{"name": "Bedienungsanleitung", "report": "", "raw_report": ""},
|
||||||
{"name": "Jahresabschluss 1998", "report": "", "raw_report": ""},
|
{
|
||||||
|
"name": "Jahresabschluss vom 01.01.1998 bis zum 31.12.1998",
|
||||||
|
"report": "",
|
||||||
|
"raw_report": "",
|
||||||
|
},
|
||||||
]
|
]
|
||||||
test_df = pd.DataFrame(test_data)
|
test_df = pd.DataFrame(test_data)
|
||||||
ba = Bundesanzeiger()
|
ba = Bundesanzeiger()
|
||||||
@ -91,7 +95,7 @@ def test_get_information(mock_bundesanzeiger: Mock) -> None:
|
|||||||
"raw_report": "",
|
"raw_report": "",
|
||||||
},
|
},
|
||||||
"2": {
|
"2": {
|
||||||
"name": "Jahresabschluss 1998",
|
"name": "Jahresabschluss 01.01.1998",
|
||||||
"report": "",
|
"report": "",
|
||||||
"company": "PRJ 23 Transparenzregister GmbH",
|
"company": "PRJ 23 Transparenzregister GmbH",
|
||||||
"raw_report": "",
|
"raw_report": "",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user