fix(data-extraction): Parse date from Gesellschaftsvertrag entry

This commit is contained in:
TrisNol 2023-10-14 18:22:41 +02:00
parent c0d42a22d7
commit b972acee7a
2 changed files with 46 additions and 4 deletions

View File

@ -367,6 +367,29 @@ def map_business_purpose(data: dict) -> str | None:
return None
def extract_date_from_string(value: str) -> str | None:
"""Extract a date in ISO format from the given string if possible.
Args:
value (str): Input text
Returns:
str | None: Date in ISO format, None if not found
"""
date_regex = [ # type: ignore
{"regex": r"\d{1,2}\.\d{1,2}\.\d{2,4}", "mapper": transform_date_to_iso},
{"regex": r"\d{4}-\d{1,2}-\d{1,2}", "mapper": None},
]
for regex in date_regex:
result = re.findall(regex["regex"], value) # type: ignore
if len(result) == 1:
relevant_data = result[0]
if regex["mapper"] is not None: # type: ignore
return regex["mapper"](relevant_data) # type: ignore
return relevant_data
return None
def map_founding_date(data: dict) -> str | None:
"""Extracts the founding date from a given Unternehmensregister export.
@ -392,9 +415,11 @@ def map_founding_date(data: dict) -> str | None:
"Gruendungsmetadaten"
in data["XJustiz_Daten"]["Fachdaten_Register"]["Basisdaten_Register"]
):
return data["XJustiz_Daten"]["Fachdaten_Register"]["Basisdaten_Register"][
return extract_date_from_string(
data["XJustiz_Daten"]["Fachdaten_Register"]["Basisdaten_Register"][
"Gruendungsmetadaten"
]["Gruendungsdatum"]
)
# No reliable answer
return None

View File

@ -601,6 +601,21 @@ def test_map_business_purpose_no_result() -> None:
assert result is None
@pytest.mark.parametrize(
("value", "expected_result"),
[
("", None),
("Tag der ersten Eintragung: 01.05.2004", "2004-05-01"),
("Gesellschaftsvertrag vom 06.04.2016 Hallo Welt", "2016-04-06"),
("Str. des Tests vom 1999-04-05", "1999-04-05"),
("Once upon a midnight dreary while I pondered weak and weary...", None),
],
)
def test_extract_date_from_string(value: str, expected_result: str) -> None:
result = transform.extract_date_from_string(value)
assert result == expected_result
def test_map_founding_date_from_tag_der_ersten_eintragung() -> None:
data = {
"some entry": "Tag der ersten Eintragung: 01.05.2004",
@ -626,7 +641,9 @@ def test_map_founding_date_from_gruendungsdatum() -> None:
"XJustiz_Daten": {
"Fachdaten_Register": {
"Basisdaten_Register": {
"Gruendungsmetadaten": {"Gruendungsdatum": "1998-01-01"}
"Gruendungsmetadaten": {
"Gruendungsdatum": "Gesellschaftsvertrag vom 1998-01-01"
}
}
}
}