diff --git a/src/aki_prj23_transparenzregister/models/company.py b/src/aki_prj23_transparenzregister/models/company.py index 64b5ce3..49abdcd 100644 --- a/src/aki_prj23_transparenzregister/models/company.py +++ b/src/aki_prj23_transparenzregister/models/company.py @@ -31,6 +31,7 @@ class RelationshipRoleEnum(str, MultiValueEnum): GESCHAEFTSLEITER = "Geschäftsleiter(in)", "Geschäftsleiter" ZWEIGNIEDERLASSUNG = "Zweigniederlassung" HAUPTNIEDERLASSUNG = "Hauptniederlassung" + CARE_OF = "c/o" class CompanyTypeEnum(str, MultiValueEnum): diff --git a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py index 468fdee..82a8028 100644 --- a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py +++ b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py @@ -486,6 +486,48 @@ def map_last_update(data: dict) -> str: return data["XJustiz_Daten"]["Fachdaten_Register"]["Auszug"]["letzte_Eintragung"] +def map_co_relation(data: dict) -> dict: + """Search for and map the c/o relation from location.street if possible. + + Args: + data (dict): Company dict + + Returns: + dict: Modified Company dict + """ + street = data["location"].street + if street is None: + return data + parts = street.split(",") + co_company = None + co_company_index = None + for index, part in enumerate(parts): + trimmed_part = part.strip() + result = re.findall(r"^c\/o(.*)$", trimmed_part) + if len(result) == 1: + co_company = result[0].strip() + co_company_index = index + if co_company_index is not None: + del parts[co_company_index] + street = "".join(parts).strip() + data["location"].street = street + + if co_company is not None and co_company != "": + relation = CompanyToCompanyRelationship( + RelationshipRoleEnum.CARE_OF, # type: ignore + Location( + data["location"].city, + street, + data["location"].house_number, + data["location"].zip_code, + ), + CompanyRelationshipEnum.COMPANY, # type: ignore + co_company, + ) + data["relationships"].append(relation) + return data + + def map_unternehmensregister_json(data: dict) -> Company: """Processes the Unternehmensregister structured export to a Company by using several helper methods. @@ -516,15 +558,13 @@ def map_unternehmensregister_json(data: dict) -> Company: data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"]["Beteiligung"][i] ) result["relationships"].append(people) + result = map_co_relation(result) return Company(**result) if __name__ == "__main__": from loguru import logger - # transform_xml_to_json( - # "./data/Unternehmensregister/scraping/", "./data/Unternehmensregister/export/" - # ) base_path = "./Jupyter/API-tests/Unternehmensregister/data/Unternehmensregister" for file in tqdm(glob.glob1(f"{base_path}/export", "*.json")): path = os.path.join(f"{base_path}/export", file) @@ -544,6 +584,7 @@ if __name__ == "__main__": json.dump( dataclasses.asdict(company), export_file, ensure_ascii=False ) - except Exception: + except Exception as e: + logger.error(e) logger.error(f"Error in processing {path}") sys.exit(1) diff --git a/tests/utils/data_extraction/unternehmensregister/transform_test.py b/tests/utils/data_extraction/unternehmensregister/transform_test.py index a312572..08f24a7 100644 --- a/tests/utils/data_extraction/unternehmensregister/transform_test.py +++ b/tests/utils/data_extraction/unternehmensregister/transform_test.py @@ -709,6 +709,86 @@ def test_map_last_update() -> None: assert result == date +@pytest.mark.parametrize( + ("value", "expected_result"), + [ + ( + { + "location": Location( + "", "c/o Youco24 Business Center, Abc ffda", None, None + ), + "relationships": [], + }, + { + "location": Location("", "Abc ffda", None, None), + "relationships": [ + CompanyToCompanyRelationship( + RelationshipRoleEnum.CARE_OF, # type: ignore + Location("", "Abc ffda", None, None), + CompanyRelationshipEnum.COMPANY, + "Youco24 Business Center", + ) + ], + }, + ), + ( + { + "location": Location( + "Iserlohn", "c/o Youco24 Business Center, Abc Str.", "42", "58644" + ), + "relationships": [], + }, + { + "location": Location("Iserlohn", "Abc Str.", "42", "58644"), + "relationships": [ + CompanyToCompanyRelationship( + RelationshipRoleEnum.CARE_OF, # type: ignore + Location("Iserlohn", "Abc Str.", "42", "58644"), + CompanyRelationshipEnum.COMPANY, + "Youco24 Business Center", + ) + ], + }, + ), + ( + { + "location": Location( + "Iserlohn", "Abc Str., c/o Youco24 Business Center", "42", "58644" + ), + "relationships": [], + }, + { + "location": Location("Iserlohn", "Abc Str.", "42", "58644"), + "relationships": [ + CompanyToCompanyRelationship( + RelationshipRoleEnum.CARE_OF, # type: ignore + Location("Iserlohn", "Abc Str.", "42", "58644"), + CompanyRelationshipEnum.COMPANY, + "Youco24 Business Center", + ) + ], + }, + ), + ( + { + "location": Location("Iserlohn", "Abc Str., c/o", "42", "58644"), + "relationships": [], + }, + { + "location": Location("Iserlohn", "Abc Str.", "42", "58644"), + "relationships": [], + }, + ), + ], +) +def test_map_co_relation(value: dict, expected_result: dict) -> None: + result = transform.map_co_relation(value) + assert result == expected_result + + +@patch( + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.map_co_relation" +) @patch( "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.map_company_id" ) @@ -746,6 +826,7 @@ def test_map_unternehmensregister_json( # noqa: PLR0913 mock_loc_from_beteiligung: Mock, mock_map_name_from_beteiligung: Mock, mock_map_company_id: Mock, + mock_map_co_relation: Mock, ) -> None: expected_result = Company( **{ # type: ignore @@ -770,6 +851,7 @@ def test_map_unternehmensregister_json( # noqa: PLR0913 mock_map_business_purpose.return_value = expected_result.business_purpose mock_map_founding_date.return_value = expected_result.founding_date mock_map_parse_stakeholder.return_value = expected_result.relationships[0] + mock_map_co_relation.side_effect = lambda x: x data: dict = { "XJustiz_Daten": {