diff --git a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py index 610433d..30233dd 100644 --- a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py +++ b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py @@ -25,7 +25,10 @@ from aki_prj23_transparenzregister.models.company import ( PersonToCompanyRelationship, RelationshipRoleEnum, ) -from aki_prj23_transparenzregister.utils.string_tools import transform_date_to_iso +from aki_prj23_transparenzregister.utils.string_tools import ( + remove_traling_and_leading_quotes, + transform_date_to_iso, +) def transform_xml_to_json(source_dir: str, target_dir: str) -> None: @@ -62,10 +65,12 @@ def parse_stakeholder(data: dict) -> CompanyRelationship | None: # It's a Compnay serving as a "Kommanditist" or similar if data["Beteiligter"]["Natuerliche_Person"]["Voller_Name"]["Vorname"] is None: return CompanyToCompanyRelationship( - **{ - "name": data["Beteiligter"]["Natuerliche_Person"]["Voller_Name"][ - "Nachname" - ], + **{ # type: ignore + "name": remove_traling_and_leading_quotes( + data["Beteiligter"]["Natuerliche_Person"]["Voller_Name"][ + "Nachname" + ] + ), "location": Location( **{ "city": data["Beteiligter"]["Natuerliche_Person"][ @@ -124,13 +129,15 @@ def parse_stakeholder(data: dict) -> CompanyRelationship | None: ) if "Organisation" in data["Beteiligter"]: return CompanyToCompanyRelationship( - **{ + **{ # type: ignore "role": RelationshipRoleEnum( data["Rolle"]["Rollenbezeichnung"]["content"] ), - "name": data["Beteiligter"]["Organisation"]["Bezeichnung"][ - "Bezeichnung_Aktuell" - ], + "name": remove_traling_and_leading_quotes( + data["Beteiligter"]["Organisation"]["Bezeichnung"][ + "Bezeichnung_Aktuell" + ] + ), "location": Location( **{ "city": data["Beteiligter"]["Organisation"]["Anschrift"]["Ort"], @@ -223,9 +230,10 @@ def name_from_beteiligung(data: dict) -> str: Returns: str: Company name """ - return data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"]["Beteiligung"][0][ + name = data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"]["Beteiligung"][0][ "Beteiligter" ]["Organisation"]["Bezeichnung"]["Bezeichnung_Aktuell"] + return remove_traling_and_leading_quotes(name) def map_rechtsform(company_name: str, data: dict) -> CompanyTypeEnum | None: diff --git a/src/aki_prj23_transparenzregister/utils/string_tools.py b/src/aki_prj23_transparenzregister/utils/string_tools.py index f56fbc7..28f5340 100644 --- a/src/aki_prj23_transparenzregister/utils/string_tools.py +++ b/src/aki_prj23_transparenzregister/utils/string_tools.py @@ -34,3 +34,24 @@ def transform_date_to_iso(date: str) -> str: input_format = "%d.%m.%y" if re.match(regex_yy, date) else "%d.%m.%Y" date_temp = datetime.strptime(date, input_format) return date_temp.strftime("%Y-%m-%d") + + +def remove_traling_and_leading_quotes(value: str) -> str: + """Removes trailing and leading doulbe-quotes from given string if present. + + Args: + value (str): _description_ + + Returns: + str: _description_ + """ + if value is not None: + count_quotes = value.count('"') + if count_quotes > 0: + if value.startswith('"') and count_quotes % 2 != 0: + value = value[1:] + if value.endswith('"') and count_quotes % 2 != 0: + value = value[:-1] + if value.startswith('"') and value.endswith('"'): + value = value[1:-1] + return value diff --git a/tests/utils/data_extraction/unternehmensregister/transform_test.py b/tests/utils/data_extraction/unternehmensregister/transform_test.py index b690765..fc1f8b0 100644 --- a/tests/utils/data_extraction/unternehmensregister/transform_test.py +++ b/tests/utils/data_extraction/unternehmensregister/transform_test.py @@ -46,7 +46,7 @@ def test_parse_stakeholder_org_hidden_in_person() -> None: data = { "Beteiligter": { "Natuerliche_Person": { - "Voller_Name": {"Vorname": None, "Nachname": "Some Company KG"}, + "Voller_Name": {"Vorname": None, "Nachname": '"Some Company KG'}, "Anschrift": {"Ort": "Area 51"}, } }, @@ -304,6 +304,32 @@ def test_name_from_beteiligung() -> None: assert transform.name_from_beteiligung(data) == expected_result +def test_name_from_beteiligung_remove_quotes() -> None: + data = { + "XJustiz_Daten": { + "Grunddaten": { + "Verfahrensdaten": { + "Beteiligung": [ + { + "Beteiligter": { + "Beteiligtennummer": "1", + "Organisation": { + "Bezeichnung": { + "Bezeichnung_Aktuell": '"Siemes Verwaltungs-GmbH"' + }, + }, + } + }, + ] + } + } + } + } + + expected_result = "Siemes Verwaltungs-GmbH" + assert transform.name_from_beteiligung(data) == expected_result + + def test_map_rechtsform() -> None: data = { "XJustiz_Daten": { diff --git a/tests/utils/string_tools_test.py b/tests/utils/string_tools_test.py index e19b488..e944750 100644 --- a/tests/utils/string_tools_test.py +++ b/tests/utils/string_tools_test.py @@ -45,3 +45,17 @@ def test_simplify_string_type_error(value: Any) -> None: def test_transform_date_to_iso(value: str, expected: str) -> None: result = string_tools.transform_date_to_iso(value) assert result == expected + + +@pytest.mark.parametrize( + ("value", "expected_result"), + [ + (None, None), + ('"Siemes Verwaltungs-GmbH"', "Siemes Verwaltungs-GmbH"), + ('"Hans"-Wurst GmbH', '"Hans"-Wurst GmbH'), + ('Hans-Wurst GmbH"', "Hans-Wurst GmbH"), + ], +) +def test_remove_trailing_and_leading_quotes(value: str, expected_result: str) -> None: + result = string_tools.remove_traling_and_leading_quotes(value) + assert result == expected_result