fix(data-extraction): Handle malformed date_of_birth fields

This commit is contained in:
TrisNol 2023-10-07 17:01:34 +02:00
parent 7500895982
commit 84d0139531
3 changed files with 542 additions and 509 deletions

View File

@ -124,7 +124,7 @@ class PersonToCompanyRelationship(CompanyRelationship):
"""Extension of CompanyRelationship with extras for Person."""
name: PersonName
date_of_birth: str
date_of_birth: str | None
@dataclass

View File

@ -52,6 +52,22 @@ def transform_xml_to_json(source_dir: str, target_dir: str) -> None:
json_file.write(json.dumps(data))
def parse_date_of_birth(data: dict) -> str | None:
"""Retreives the date of birth from a stakeholder entry if possible.
Args:
data (dict): Stakeholder data
Returns:
str | None: date of birth or None if not found
"""
if "Geburt" in (base := data["Beteiligter"]["Natuerliche_Person"]):
base = base["Geburt"]["Geburtsdatum"]
if isinstance(base, str):
return base
return None
def parse_stakeholder(data: dict) -> CompanyRelationship | None:
"""Extract the company stakeholder/relation from a single "Beteiligung".
@ -92,7 +108,7 @@ def parse_stakeholder(data: dict) -> CompanyRelationship | None:
}
)
return PersonToCompanyRelationship(
**{
**{ # type: ignore
"name": PersonName(
**{
"firstname": data["Beteiligter"]["Natuerliche_Person"][
@ -103,11 +119,7 @@ def parse_stakeholder(data: dict) -> CompanyRelationship | None:
]["Nachname"],
}
),
"date_of_birth": data["Beteiligter"]["Natuerliche_Person"]["Geburt"][
"Geburtsdatum"
]
if "Geburt" in data["Beteiligter"]["Natuerliche_Person"]
else None,
"date_of_birth": parse_date_of_birth(data),
"location": Location(
**{
"city": data["Beteiligter"]["Natuerliche_Person"]["Anschrift"][

View File

@ -82,6 +82,27 @@ def test_parse_stakeholder_person() -> None:
assert transform.parse_stakeholder(data) == expected_result
def test_parse_stakeholder_person_missing_date_of_birth() -> None:
data = {
"Beteiligter": {
"Natuerliche_Person": {
"Voller_Name": {"Vorname": "Stephen", "Nachname": "King"},
"Anschrift": {"Ort": "Maine"},
"Geburt": {"Geburtsdatum": {"@xsi:nil": "true"}},
}
},
"Rolle": {"Rollenbezeichnung": {"content": "Geschäftsleiter(in)"}},
}
expected_result = PersonToCompanyRelationship(
role=RelationshipRoleEnum.GESCHAEFTSLEITER, # type: ignore
date_of_birth=None,
name=PersonName(**{"firstname": "Stephen", "lastname": "King"}),
type=CompanyRelationshipEnum.PERSON,
location=Location(**{"city": "Maine"}),
)
assert transform.parse_stakeholder(data) == expected_result
def test_parse_stakeholder_org() -> None:
data = {
"Beteiligter": {