mirror of
https://github.com/fhswf/aki_prj23_transparenzregister.git
synced 2025-04-25 20:42:34 +02:00
fix(data-extraction): Parse date from Gesellschaftsvertrag entry
This commit is contained in:
parent
c0d42a22d7
commit
b972acee7a
@ -367,6 +367,29 @@ def map_business_purpose(data: dict) -> str | None:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def extract_date_from_string(value: str) -> str | None:
|
||||||
|
"""Extract a date in ISO format from the given string if possible.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
value (str): Input text
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str | None: Date in ISO format, None if not found
|
||||||
|
"""
|
||||||
|
date_regex = [ # type: ignore
|
||||||
|
{"regex": r"\d{1,2}\.\d{1,2}\.\d{2,4}", "mapper": transform_date_to_iso},
|
||||||
|
{"regex": r"\d{4}-\d{1,2}-\d{1,2}", "mapper": None},
|
||||||
|
]
|
||||||
|
for regex in date_regex:
|
||||||
|
result = re.findall(regex["regex"], value) # type: ignore
|
||||||
|
if len(result) == 1:
|
||||||
|
relevant_data = result[0]
|
||||||
|
if regex["mapper"] is not None: # type: ignore
|
||||||
|
return regex["mapper"](relevant_data) # type: ignore
|
||||||
|
return relevant_data
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def map_founding_date(data: dict) -> str | None:
|
def map_founding_date(data: dict) -> str | None:
|
||||||
"""Extracts the founding date from a given Unternehmensregister export.
|
"""Extracts the founding date from a given Unternehmensregister export.
|
||||||
|
|
||||||
@ -392,9 +415,11 @@ def map_founding_date(data: dict) -> str | None:
|
|||||||
"Gruendungsmetadaten"
|
"Gruendungsmetadaten"
|
||||||
in data["XJustiz_Daten"]["Fachdaten_Register"]["Basisdaten_Register"]
|
in data["XJustiz_Daten"]["Fachdaten_Register"]["Basisdaten_Register"]
|
||||||
):
|
):
|
||||||
return data["XJustiz_Daten"]["Fachdaten_Register"]["Basisdaten_Register"][
|
return extract_date_from_string(
|
||||||
|
data["XJustiz_Daten"]["Fachdaten_Register"]["Basisdaten_Register"][
|
||||||
"Gruendungsmetadaten"
|
"Gruendungsmetadaten"
|
||||||
]["Gruendungsdatum"]
|
]["Gruendungsdatum"]
|
||||||
|
)
|
||||||
# No reliable answer
|
# No reliable answer
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@ -601,6 +601,21 @@ def test_map_business_purpose_no_result() -> None:
|
|||||||
assert result is None
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
("value", "expected_result"),
|
||||||
|
[
|
||||||
|
("", None),
|
||||||
|
("Tag der ersten Eintragung: 01.05.2004", "2004-05-01"),
|
||||||
|
("Gesellschaftsvertrag vom 06.04.2016 Hallo Welt", "2016-04-06"),
|
||||||
|
("Str. des Tests vom 1999-04-05", "1999-04-05"),
|
||||||
|
("Once upon a midnight dreary while I pondered weak and weary...", None),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_extract_date_from_string(value: str, expected_result: str) -> None:
|
||||||
|
result = transform.extract_date_from_string(value)
|
||||||
|
assert result == expected_result
|
||||||
|
|
||||||
|
|
||||||
def test_map_founding_date_from_tag_der_ersten_eintragung() -> None:
|
def test_map_founding_date_from_tag_der_ersten_eintragung() -> None:
|
||||||
data = {
|
data = {
|
||||||
"some entry": "Tag der ersten Eintragung: 01.05.2004",
|
"some entry": "Tag der ersten Eintragung: 01.05.2004",
|
||||||
@ -626,7 +641,9 @@ def test_map_founding_date_from_gruendungsdatum() -> None:
|
|||||||
"XJustiz_Daten": {
|
"XJustiz_Daten": {
|
||||||
"Fachdaten_Register": {
|
"Fachdaten_Register": {
|
||||||
"Basisdaten_Register": {
|
"Basisdaten_Register": {
|
||||||
"Gruendungsmetadaten": {"Gruendungsdatum": "1998-01-01"}
|
"Gruendungsmetadaten": {
|
||||||
|
"Gruendungsdatum": "Gesellschaftsvertrag vom 1998-01-01"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user