fix(data-extraction): Handle malformed date_of_birth fields (#204)

fix(data-extraction): Handle malformed date_of_birth fields
This commit is contained in:
Tristan Nolde 2023-10-09 19:15:37 +02:00 committed by GitHub
commit f65a377d73
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 545 additions and 518 deletions

View File

@ -124,7 +124,7 @@ class PersonToCompanyRelationship(CompanyRelationship):
"""Extension of CompanyRelationship with extras for Person."""
name: PersonName
date_of_birth: str
date_of_birth: str | None
@dataclass

View File

@ -52,6 +52,22 @@ def transform_xml_to_json(source_dir: str, target_dir: str) -> None:
json_file.write(json.dumps(data))
def parse_date_of_birth(data: dict) -> str | None:
"""Retreives the date of birth from a stakeholder entry if possible.
Args:
data (dict): Stakeholder data
Returns:
str | None: date of birth or None if not found
"""
if "Geburt" in (base := data["Beteiligter"]["Natuerliche_Person"]):
base = base["Geburt"]["Geburtsdatum"]
if isinstance(base, str):
return base
return None
def parse_stakeholder(data: dict) -> CompanyRelationship | None:
"""Extract the company stakeholder/relation from a single "Beteiligung".
@ -92,7 +108,7 @@ def parse_stakeholder(data: dict) -> CompanyRelationship | None:
}
)
return PersonToCompanyRelationship(
**{
**{ # type: ignore
"name": PersonName(
**{
"firstname": data["Beteiligter"]["Natuerliche_Person"][
@ -103,11 +119,7 @@ def parse_stakeholder(data: dict) -> CompanyRelationship | None:
]["Nachname"],
}
),
"date_of_birth": data["Beteiligter"]["Natuerliche_Person"]["Geburt"][
"Geburtsdatum"
]
if "Geburt" in data["Beteiligter"]["Natuerliche_Person"]
else None,
"date_of_birth": parse_date_of_birth(data),
"location": Location(
**{
"city": data["Beteiligter"]["Natuerliche_Person"]["Anschrift"][

View File

@ -395,19 +395,13 @@ def add_person_relation(person: dict[str, Any], company_id: int, db: Session) ->
db=db,
)
except DataInvalidError:
# logger.exception("Test except: ")
if date_of_brith:
# print("Break")
raise
# TODO enable the following line
# logger.warning(f"No date of birth birth for {name['firstname']} {name['lastname']}")
logger.debug(
f"No date of birth birth for {person['name']['lastname']}, {person['name']['firstname']}"
)
db.rollback()
return
except TypeError as error:
# TODO remove unhashable type catcher
if "unhashable type: 'dict'" in str(error):
return
raise
except Exception:
logger.exception("Test except")
raise

View File

@ -82,6 +82,27 @@ def test_parse_stakeholder_person() -> None:
assert transform.parse_stakeholder(data) == expected_result
def test_parse_stakeholder_person_missing_date_of_birth() -> None:
data = {
"Beteiligter": {
"Natuerliche_Person": {
"Voller_Name": {"Vorname": "Stephen", "Nachname": "King"},
"Anschrift": {"Ort": "Maine"},
"Geburt": {"Geburtsdatum": {"@xsi:nil": "true"}},
}
},
"Rolle": {"Rollenbezeichnung": {"content": "Geschäftsleiter(in)"}},
}
expected_result = PersonToCompanyRelationship(
role=RelationshipRoleEnum.GESCHAEFTSLEITER, # type: ignore
date_of_birth=None,
name=PersonName(**{"firstname": "Stephen", "lastname": "King"}),
type=CompanyRelationshipEnum.PERSON,
location=Location(**{"city": "Maine"}),
)
assert transform.parse_stakeholder(data) == expected_result
def test_parse_stakeholder_org() -> None:
data = {
"Beteiligter": {