fix(data-extraction): Handle malformed date_of_birth fields

This commit is contained in:
TrisNol 2023-10-07 17:01:34 +02:00
parent 7500895982
commit 84d0139531
3 changed files with 542 additions and 509 deletions

View File

@ -124,7 +124,7 @@ class PersonToCompanyRelationship(CompanyRelationship):
"""Extension of CompanyRelationship with extras for Person.""" """Extension of CompanyRelationship with extras for Person."""
name: PersonName name: PersonName
date_of_birth: str date_of_birth: str | None
@dataclass @dataclass

View File

@ -52,6 +52,22 @@ def transform_xml_to_json(source_dir: str, target_dir: str) -> None:
json_file.write(json.dumps(data)) json_file.write(json.dumps(data))
def parse_date_of_birth(data: dict) -> str | None:
"""Retreives the date of birth from a stakeholder entry if possible.
Args:
data (dict): Stakeholder data
Returns:
str | None: date of birth or None if not found
"""
if "Geburt" in (base := data["Beteiligter"]["Natuerliche_Person"]):
base = base["Geburt"]["Geburtsdatum"]
if isinstance(base, str):
return base
return None
def parse_stakeholder(data: dict) -> CompanyRelationship | None: def parse_stakeholder(data: dict) -> CompanyRelationship | None:
"""Extract the company stakeholder/relation from a single "Beteiligung". """Extract the company stakeholder/relation from a single "Beteiligung".
@ -92,7 +108,7 @@ def parse_stakeholder(data: dict) -> CompanyRelationship | None:
} }
) )
return PersonToCompanyRelationship( return PersonToCompanyRelationship(
**{ **{ # type: ignore
"name": PersonName( "name": PersonName(
**{ **{
"firstname": data["Beteiligter"]["Natuerliche_Person"][ "firstname": data["Beteiligter"]["Natuerliche_Person"][
@ -103,11 +119,7 @@ def parse_stakeholder(data: dict) -> CompanyRelationship | None:
]["Nachname"], ]["Nachname"],
} }
), ),
"date_of_birth": data["Beteiligter"]["Natuerliche_Person"]["Geburt"][ "date_of_birth": parse_date_of_birth(data),
"Geburtsdatum"
]
if "Geburt" in data["Beteiligter"]["Natuerliche_Person"]
else None,
"location": Location( "location": Location(
**{ **{
"city": data["Beteiligter"]["Natuerliche_Person"]["Anschrift"][ "city": data["Beteiligter"]["Natuerliche_Person"]["Anschrift"][

View File

@ -82,6 +82,27 @@ def test_parse_stakeholder_person() -> None:
assert transform.parse_stakeholder(data) == expected_result assert transform.parse_stakeholder(data) == expected_result
def test_parse_stakeholder_person_missing_date_of_birth() -> None:
data = {
"Beteiligter": {
"Natuerliche_Person": {
"Voller_Name": {"Vorname": "Stephen", "Nachname": "King"},
"Anschrift": {"Ort": "Maine"},
"Geburt": {"Geburtsdatum": {"@xsi:nil": "true"}},
}
},
"Rolle": {"Rollenbezeichnung": {"content": "Geschäftsleiter(in)"}},
}
expected_result = PersonToCompanyRelationship(
role=RelationshipRoleEnum.GESCHAEFTSLEITER, # type: ignore
date_of_birth=None,
name=PersonName(**{"firstname": "Stephen", "lastname": "King"}),
type=CompanyRelationshipEnum.PERSON,
location=Location(**{"city": "Maine"}),
)
assert transform.parse_stakeholder(data) == expected_result
def test_parse_stakeholder_org() -> None: def test_parse_stakeholder_org() -> None:
data = { data = {
"Beteiligter": { "Beteiligter": {