fix(data-extraction): Handle malformed date_of_birth fields (#204)

fix(data-extraction): Handle malformed date_of_birth fields
This commit is contained in:
Tristan Nolde 2023-10-09 19:15:37 +02:00 committed by GitHub
commit f65a377d73
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 545 additions and 518 deletions

View File

@ -124,7 +124,7 @@ class PersonToCompanyRelationship(CompanyRelationship):
"""Extension of CompanyRelationship with extras for Person.""" """Extension of CompanyRelationship with extras for Person."""
name: PersonName name: PersonName
date_of_birth: str date_of_birth: str | None
@dataclass @dataclass

View File

@ -52,6 +52,22 @@ def transform_xml_to_json(source_dir: str, target_dir: str) -> None:
json_file.write(json.dumps(data)) json_file.write(json.dumps(data))
def parse_date_of_birth(data: dict) -> str | None:
"""Retreives the date of birth from a stakeholder entry if possible.
Args:
data (dict): Stakeholder data
Returns:
str | None: date of birth or None if not found
"""
if "Geburt" in (base := data["Beteiligter"]["Natuerliche_Person"]):
base = base["Geburt"]["Geburtsdatum"]
if isinstance(base, str):
return base
return None
def parse_stakeholder(data: dict) -> CompanyRelationship | None: def parse_stakeholder(data: dict) -> CompanyRelationship | None:
"""Extract the company stakeholder/relation from a single "Beteiligung". """Extract the company stakeholder/relation from a single "Beteiligung".
@ -92,7 +108,7 @@ def parse_stakeholder(data: dict) -> CompanyRelationship | None:
} }
) )
return PersonToCompanyRelationship( return PersonToCompanyRelationship(
**{ **{ # type: ignore
"name": PersonName( "name": PersonName(
**{ **{
"firstname": data["Beteiligter"]["Natuerliche_Person"][ "firstname": data["Beteiligter"]["Natuerliche_Person"][
@ -103,11 +119,7 @@ def parse_stakeholder(data: dict) -> CompanyRelationship | None:
]["Nachname"], ]["Nachname"],
} }
), ),
"date_of_birth": data["Beteiligter"]["Natuerliche_Person"]["Geburt"][ "date_of_birth": parse_date_of_birth(data),
"Geburtsdatum"
]
if "Geburt" in data["Beteiligter"]["Natuerliche_Person"]
else None,
"location": Location( "location": Location(
**{ **{
"city": data["Beteiligter"]["Natuerliche_Person"]["Anschrift"][ "city": data["Beteiligter"]["Natuerliche_Person"]["Anschrift"][

View File

@ -395,19 +395,13 @@ def add_person_relation(person: dict[str, Any], company_id: int, db: Session) ->
db=db, db=db,
) )
except DataInvalidError: except DataInvalidError:
# logger.exception("Test except: ")
if date_of_brith: if date_of_brith:
# print("Break")
raise raise
# TODO enable the following line logger.debug(
# logger.warning(f"No date of birth birth for {name['firstname']} {name['lastname']}") f"No date of birth birth for {person['name']['lastname']}, {person['name']['firstname']}"
)
db.rollback() db.rollback()
return return
except TypeError as error:
# TODO remove unhashable type catcher
if "unhashable type: 'dict'" in str(error):
return
raise
except Exception: except Exception:
logger.exception("Test except") logger.exception("Test except")
raise raise

View File

@ -82,6 +82,27 @@ def test_parse_stakeholder_person() -> None:
assert transform.parse_stakeholder(data) == expected_result assert transform.parse_stakeholder(data) == expected_result
def test_parse_stakeholder_person_missing_date_of_birth() -> None:
data = {
"Beteiligter": {
"Natuerliche_Person": {
"Voller_Name": {"Vorname": "Stephen", "Nachname": "King"},
"Anschrift": {"Ort": "Maine"},
"Geburt": {"Geburtsdatum": {"@xsi:nil": "true"}},
}
},
"Rolle": {"Rollenbezeichnung": {"content": "Geschäftsleiter(in)"}},
}
expected_result = PersonToCompanyRelationship(
role=RelationshipRoleEnum.GESCHAEFTSLEITER, # type: ignore
date_of_birth=None,
name=PersonName(**{"firstname": "Stephen", "lastname": "King"}),
type=CompanyRelationshipEnum.PERSON,
location=Location(**{"city": "Maine"}),
)
assert transform.parse_stakeholder(data) == expected_result
def test_parse_stakeholder_org() -> None: def test_parse_stakeholder_org() -> None:
data = { data = {
"Beteiligter": { "Beteiligter": {