From 49498ad7c0d6912a71a2743421baa8cb4931142f Mon Sep 17 00:00:00 2001 From: TrisNol Date: Sun, 1 Oct 2023 21:30:19 +0200 Subject: [PATCH 1/6] checkpoint: Remove quotes from company name --- .../unternehmensregister/transform.py | 3 ++- .../unternehmensregister/transform_test.py | 26 +++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py index 610433d..ab60100 100644 --- a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py +++ b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py @@ -223,9 +223,10 @@ def name_from_beteiligung(data: dict) -> str: Returns: str: Company name """ - return data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"]["Beteiligung"][0][ + name = data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"]["Beteiligung"][0][ "Beteiligter" ]["Organisation"]["Bezeichnung"]["Bezeichnung_Aktuell"] + return name.replace('"', "") def map_rechtsform(company_name: str, data: dict) -> CompanyTypeEnum | None: diff --git a/tests/utils/data_extraction/unternehmensregister/transform_test.py b/tests/utils/data_extraction/unternehmensregister/transform_test.py index b690765..20c0a4d 100644 --- a/tests/utils/data_extraction/unternehmensregister/transform_test.py +++ b/tests/utils/data_extraction/unternehmensregister/transform_test.py @@ -304,6 +304,32 @@ def test_name_from_beteiligung() -> None: assert transform.name_from_beteiligung(data) == expected_result +def test_name_from_beteiligung_remove_quotes() -> None: + data = { + "XJustiz_Daten": { + "Grunddaten": { + "Verfahrensdaten": { + "Beteiligung": [ + { + "Beteiligter": { + "Beteiligtennummer": "1", + "Organisation": { + "Bezeichnung": { + "Bezeichnung_Aktuell": '"Siemes Verwaltungs-GmbH"' + }, + }, + } + }, + ] + } + } + } + } + + expected_result = "Siemes Verwaltungs-GmbH" + assert transform.name_from_beteiligung(data) == expected_result + + def test_map_rechtsform() -> None: data = { "XJustiz_Daten": { From 2a446a99376b595ff1b14d9dbccd1c2b2c14bff3 Mon Sep 17 00:00:00 2001 From: TrisNol Date: Sun, 1 Oct 2023 21:34:54 +0200 Subject: [PATCH 2/6] checkpoint: Remove quotes from company names in relations --- .../utils/data_extraction/unternehmensregister/transform.py | 4 ++-- .../data_extraction/unternehmensregister/transform_test.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py index ab60100..36970bc 100644 --- a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py +++ b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py @@ -65,7 +65,7 @@ def parse_stakeholder(data: dict) -> CompanyRelationship | None: **{ "name": data["Beteiligter"]["Natuerliche_Person"]["Voller_Name"][ "Nachname" - ], + ].replace('"', ""), "location": Location( **{ "city": data["Beteiligter"]["Natuerliche_Person"][ @@ -130,7 +130,7 @@ def parse_stakeholder(data: dict) -> CompanyRelationship | None: ), "name": data["Beteiligter"]["Organisation"]["Bezeichnung"][ "Bezeichnung_Aktuell" - ], + ].replace('"', ""), "location": Location( **{ "city": data["Beteiligter"]["Organisation"]["Anschrift"]["Ort"], diff --git a/tests/utils/data_extraction/unternehmensregister/transform_test.py b/tests/utils/data_extraction/unternehmensregister/transform_test.py index 20c0a4d..fc1f8b0 100644 --- a/tests/utils/data_extraction/unternehmensregister/transform_test.py +++ b/tests/utils/data_extraction/unternehmensregister/transform_test.py @@ -46,7 +46,7 @@ def test_parse_stakeholder_org_hidden_in_person() -> None: data = { "Beteiligter": { "Natuerliche_Person": { - "Voller_Name": {"Vorname": None, "Nachname": "Some Company KG"}, + "Voller_Name": {"Vorname": None, "Nachname": '"Some Company KG'}, "Anschrift": {"Ort": "Area 51"}, } }, From 259259953e347d7faea80981834c936c243db498 Mon Sep 17 00:00:00 2001 From: TrisNol Date: Tue, 3 Oct 2023 16:37:54 +0200 Subject: [PATCH 3/6] refactor: Move quote removal funtion to string utils, adapt to requirements --- .../unternehmensregister/transform.py | 21 ++++++++++++------- .../utils/string_tools.py | 21 +++++++++++++++++++ tests/utils/string_tools_test.py | 14 +++++++++++++ 3 files changed, 49 insertions(+), 7 deletions(-) diff --git a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py index 36970bc..5c17a5b 100644 --- a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py +++ b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py @@ -25,7 +25,10 @@ from aki_prj23_transparenzregister.models.company import ( PersonToCompanyRelationship, RelationshipRoleEnum, ) -from aki_prj23_transparenzregister.utils.string_tools import transform_date_to_iso +from aki_prj23_transparenzregister.utils.string_tools import ( + remove_traling_and_leading_quotes, + transform_date_to_iso, +) def transform_xml_to_json(source_dir: str, target_dir: str) -> None: @@ -63,9 +66,11 @@ def parse_stakeholder(data: dict) -> CompanyRelationship | None: if data["Beteiligter"]["Natuerliche_Person"]["Voller_Name"]["Vorname"] is None: return CompanyToCompanyRelationship( **{ - "name": data["Beteiligter"]["Natuerliche_Person"]["Voller_Name"][ - "Nachname" - ].replace('"', ""), + "name": remove_traling_and_leading_quotes( + data["Beteiligter"]["Natuerliche_Person"]["Voller_Name"][ + "Nachname" + ] + ), "location": Location( **{ "city": data["Beteiligter"]["Natuerliche_Person"][ @@ -128,9 +133,11 @@ def parse_stakeholder(data: dict) -> CompanyRelationship | None: "role": RelationshipRoleEnum( data["Rolle"]["Rollenbezeichnung"]["content"] ), - "name": data["Beteiligter"]["Organisation"]["Bezeichnung"][ - "Bezeichnung_Aktuell" - ].replace('"', ""), + "name": remove_traling_and_leading_quotes( + data["Beteiligter"]["Organisation"]["Bezeichnung"][ + "Bezeichnung_Aktuell" + ] + ), "location": Location( **{ "city": data["Beteiligter"]["Organisation"]["Anschrift"]["Ort"], diff --git a/src/aki_prj23_transparenzregister/utils/string_tools.py b/src/aki_prj23_transparenzregister/utils/string_tools.py index f56fbc7..28f5340 100644 --- a/src/aki_prj23_transparenzregister/utils/string_tools.py +++ b/src/aki_prj23_transparenzregister/utils/string_tools.py @@ -34,3 +34,24 @@ def transform_date_to_iso(date: str) -> str: input_format = "%d.%m.%y" if re.match(regex_yy, date) else "%d.%m.%Y" date_temp = datetime.strptime(date, input_format) return date_temp.strftime("%Y-%m-%d") + + +def remove_traling_and_leading_quotes(value: str) -> str: + """Removes trailing and leading doulbe-quotes from given string if present. + + Args: + value (str): _description_ + + Returns: + str: _description_ + """ + if value is not None: + count_quotes = value.count('"') + if count_quotes > 0: + if value.startswith('"') and count_quotes % 2 != 0: + value = value[1:] + if value.endswith('"') and count_quotes % 2 != 0: + value = value[:-1] + if value.startswith('"') and value.endswith('"'): + value = value[1:-1] + return value diff --git a/tests/utils/string_tools_test.py b/tests/utils/string_tools_test.py index e19b488..e944750 100644 --- a/tests/utils/string_tools_test.py +++ b/tests/utils/string_tools_test.py @@ -45,3 +45,17 @@ def test_simplify_string_type_error(value: Any) -> None: def test_transform_date_to_iso(value: str, expected: str) -> None: result = string_tools.transform_date_to_iso(value) assert result == expected + + +@pytest.mark.parametrize( + ("value", "expected_result"), + [ + (None, None), + ('"Siemes Verwaltungs-GmbH"', "Siemes Verwaltungs-GmbH"), + ('"Hans"-Wurst GmbH', '"Hans"-Wurst GmbH'), + ('Hans-Wurst GmbH"', "Hans-Wurst GmbH"), + ], +) +def test_remove_trailing_and_leading_quotes(value: str, expected_result: str) -> None: + result = string_tools.remove_traling_and_leading_quotes(value) + assert result == expected_result From 0dfe95652dc6b51852ac1c0ea5eed60c2724fd33 Mon Sep 17 00:00:00 2001 From: TrisNol Date: Tue, 3 Oct 2023 16:42:09 +0200 Subject: [PATCH 4/6] refactor: Implement MyPy feedback --- .../utils/data_extraction/unternehmensregister/transform.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py index 5c17a5b..56243df 100644 --- a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py +++ b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py @@ -444,7 +444,6 @@ def map_last_update(data: dict) -> str: """ return data["XJustiz_Daten"]["Fachdaten_Register"]["Auszug"]["letzte_Eintragung"] - def map_unternehmensregister_json(data: dict) -> Company: """Processes the Unternehmensregister structured export to a Company by using several helper methods. From c2f167003236c71210c6c3e3be4e5bf103893094 Mon Sep 17 00:00:00 2001 From: TrisNol Date: Tue, 3 Oct 2023 16:43:23 +0200 Subject: [PATCH 5/6] refactor: Implement MyPy feedback --- .../utils/data_extraction/unternehmensregister/transform.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py index 56243df..37d5629 100644 --- a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py +++ b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py @@ -65,7 +65,7 @@ def parse_stakeholder(data: dict) -> CompanyRelationship | None: # It's a Compnay serving as a "Kommanditist" or similar if data["Beteiligter"]["Natuerliche_Person"]["Voller_Name"]["Vorname"] is None: return CompanyToCompanyRelationship( - **{ + **{ # type: ignore "name": remove_traling_and_leading_quotes( data["Beteiligter"]["Natuerliche_Person"]["Voller_Name"][ "Nachname" @@ -129,7 +129,7 @@ def parse_stakeholder(data: dict) -> CompanyRelationship | None: ) if "Organisation" in data["Beteiligter"]: return CompanyToCompanyRelationship( - **{ + **{ # type: ignore "role": RelationshipRoleEnum( data["Rolle"]["Rollenbezeichnung"]["content"] ), @@ -444,6 +444,7 @@ def map_last_update(data: dict) -> str: """ return data["XJustiz_Daten"]["Fachdaten_Register"]["Auszug"]["letzte_Eintragung"] + def map_unternehmensregister_json(data: dict) -> Company: """Processes the Unternehmensregister structured export to a Company by using several helper methods. From 38d19a835d1e965598530a383b253d28e8dc3a55 Mon Sep 17 00:00:00 2001 From: TrisNol Date: Wed, 4 Oct 2023 19:39:50 +0200 Subject: [PATCH 6/6] refactor: Replace last replace with call of dedicated function --- .../utils/data_extraction/unternehmensregister/transform.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py index 37d5629..30233dd 100644 --- a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py +++ b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py @@ -233,7 +233,7 @@ def name_from_beteiligung(data: dict) -> str: name = data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"]["Beteiligung"][0][ "Beteiligter" ]["Organisation"]["Bezeichnung"]["Bezeichnung_Aktuell"] - return name.replace('"', "") + return remove_traling_and_leading_quotes(name) def map_rechtsform(company_name: str, data: dict) -> CompanyTypeEnum | None: