diff --git a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/common.py b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/common.py index 6d30ad6..8a75843 100644 --- a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/common.py +++ b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/common.py @@ -1,11 +1,17 @@ """Common functions for data transformation.""" +import abc import re import typing from collections.abc import Sequence from aki_prj23_transparenzregister.models.company import ( + Capital, + Company, + CompanyID, + CompanyRelationship, CompanyRelationshipEnum, CompanyToCompanyRelationship, + CompanyTypeEnum, Location, RelationshipRoleEnum, ) @@ -121,3 +127,130 @@ def map_co_relation(data: dict) -> dict: ) data["relationships"].append(relation) return data + + +class BaseTransformer(metaclass=abc.ABCMeta): + """Generic abstract class for data transformation between Unternehmensregister and Transparenzregister API.""" + + @abc.abstractmethod + def parse_date_of_birth(self, data: dict) -> str | None: + """Retreives the date of birth from a stakeholder entry if possible. + + Args: + data (dict): Stakeholder data + + Returns: + str | None: date of birth or None if not found + """ + + @abc.abstractmethod + def parse_stakeholder(self, data: dict) -> CompanyRelationship | None: + """Extract the company stakeholder/relation from a single "Beteiligung". + + Args: + data (dict): Data export + + Returns: + CompanyRelationship | None: Relationship if it could be processed + """ + + @abc.abstractmethod + def loc_from_beteiligung(self, data: dict) -> Location: + """Extract the company location from the first relationship in the export. + + Args: + data (dict): Data export + + Returns: + Location: location + """ + + @abc.abstractmethod + def name_from_beteiligung(self, data: dict) -> str: + """Extract the Company name from an Unternehmensregister export by using the first relationship found. + + Args: + data (dict): Data export + + Returns: + str: Company name + """ + + @abc.abstractmethod + def map_rechtsform(self, company_name: str, data: dict) -> CompanyTypeEnum | None: + """Extracts the company type from a given Unternehmensregister export. + + Args: + company_name (str): Name of the company as a fallback solution + data (dict): Data export + + Returns: + CompanyTypeEnum | None: Company type if found + """ + + @abc.abstractmethod + def map_capital(self, data: dict, company_type: CompanyTypeEnum) -> Capital | None: + """Extracts the company capital from the given Unternehmensregister export. + + Args: + data (dict): Data export + company_type (CompanyTypeEnum): Type of company (e.g., 'Gesellschaft mit beschränkter Haftung') + + Returns: + Capital | None: Company Capital if found + """ + + @abc.abstractmethod + def map_business_purpose(self, data: dict) -> str | None: + """Extracts the "Geschäftszweck" from a given Unternehmensregister export. + + Args: + data (dict): Data export + + Returns: + str | None: Business purpose if found + """ + + @abc.abstractmethod + def map_founding_date(self, data: dict) -> str | None: + """Extracts the founding date from a given Unternehmensregister export. + + Args: + data (dict): Data export + + Returns: + str | None: Founding date if found + """ + + @abc.abstractmethod + def map_company_id(self, data: dict) -> CompanyID: + """Retrieve Company ID from export. + + Args: + data (dict): Data export + + Returns: + CompanyID: ID of the company + """ + + @abc.abstractmethod + def map_last_update(self, data: dict) -> str: + """Extract last update date from export. + + Args: + data (dict): Unternehmensregister export + + Returns: + str: Last update date + """ + + @abc.abstractmethod + def map_unternehmensregister_json(self, data: dict) -> Company: + """Processes the Unternehmensregister structured export to a Company by using several helper methods. + + Args: + data (dict): Data export + + Returns: + Company: Transformed data + """ diff --git a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/main.py b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/main.py index 452e620..6459311 100644 --- a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/main.py +++ b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/main.py @@ -4,13 +4,15 @@ import glob import json import os import sys -import typing import xmltodict from loguru import logger from tqdm import tqdm from aki_prj23_transparenzregister.models.company import Company +from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.common import ( + BaseTransformer, +) from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v1 import ( v1, ) @@ -42,7 +44,7 @@ def transform_xml_to_json(source_dir: str, target_dir: str) -> None: logger.error(e) -def determine_version(data: dict) -> typing.Any: +def determine_version(data: dict) -> BaseTransformer: """Determine Unternehmensregister data API version of given entry. Args: @@ -56,9 +58,9 @@ def determine_version(data: dict) -> typing.Any: """ if "XJustiz_Daten" in data: # TODO consider class inheritance for version modules - return v1 + return v1.V1_Transformer() if "tns:nachrichtenkopf" in data[list(data.keys())[0]]: - return v3 + return v3.V3_Transformer() raise ValueError("Could not determine Unternehmensregister version.") @@ -77,6 +79,7 @@ def map_unternehmensregister_json(data: dict) -> Company: if __name__ == "__main__": base_path = "./Jupyter/API-tests/Unternehmensregister/data/Unternehmensregister" + # TODO Adapt to new structure with different versions for file in tqdm(glob.glob1(f"{base_path}/export", "*.json")): path = os.path.join(f"{base_path}/export", file) with open(path, encoding="utf-8") as file_object: diff --git a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/v1/v1.py b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/v1/v1.py index 834b1e5..77993d2 100644 --- a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/v1/v1.py +++ b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/v1/v1.py @@ -18,6 +18,7 @@ from aki_prj23_transparenzregister.models.company import ( RelationshipRoleEnum, ) from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.common import ( + BaseTransformer, extract_date_from_string, map_co_relation, normalize_street, @@ -28,41 +29,81 @@ from aki_prj23_transparenzregister.utils.string_tools import ( ) -def parse_date_of_birth(data: dict) -> str | None: - """Retreives the date of birth from a stakeholder entry if possible. +class V1_Transformer(BaseTransformer): # noqa: N801 + """Transformer for data exports from Unternehmensregister (v1).""" - Args: - data (dict): Stakeholder data + def parse_date_of_birth(self, data: dict) -> str | None: + """Retreives the date of birth from a stakeholder entry if possible. - Returns: - str | None: date of birth or None if not found - """ - if "Geburt" in (base := data["Beteiligter"]["Natuerliche_Person"]): - base = base["Geburt"]["Geburtsdatum"] - if isinstance(base, str): - return base - return None + Args: + data (dict): Stakeholder data + Returns: + str | None: date of birth or None if not found + """ + if "Geburt" in (base := data["Beteiligter"]["Natuerliche_Person"]): + base = base["Geburt"]["Geburtsdatum"] + if isinstance(base, str): + return base + return None -def parse_stakeholder(data: dict) -> CompanyRelationship | None: - """Extract the company stakeholder/relation from a single "Beteiligung". + def parse_stakeholder(self, data: dict) -> CompanyRelationship | None: + """Extract the company stakeholder/relation from a single "Beteiligung". - Args: - data (dict): Data export + Args: + data (dict): Data export - Returns: - CompanyRelationship | None: Relationship if it could be processed - """ - if "Natuerliche_Person" in data["Beteiligter"]: - # It's a Company serving as a "Kommanditist" or similar - if data["Beteiligter"]["Natuerliche_Person"]["Voller_Name"]["Vorname"] is None: - return CompanyToCompanyRelationship( + Returns: + CompanyRelationship | None: Relationship if it could be processed + """ + if "Natuerliche_Person" in data["Beteiligter"]: + # It's a Company serving as a "Kommanditist" or similar + if ( + data["Beteiligter"]["Natuerliche_Person"]["Voller_Name"]["Vorname"] + is None + ): + return CompanyToCompanyRelationship( + **{ # type: ignore + "name": remove_traling_and_leading_quotes( + data["Beteiligter"]["Natuerliche_Person"]["Voller_Name"][ + "Nachname" + ] + ), + "location": Location( + **{ + "city": data["Beteiligter"]["Natuerliche_Person"][ + "Anschrift" + ][-1]["Ort"] + if isinstance( + data["Beteiligter"]["Natuerliche_Person"][ + "Anschrift" + ], + list, + ) + else data["Beteiligter"]["Natuerliche_Person"][ + "Anschrift" + ]["Ort"] + } + ), + "role": RelationshipRoleEnum( + data["Rolle"]["Rollenbezeichnung"]["content"] + ), + "type": CompanyRelationshipEnum.COMPANY, + } + ) + return PersonToCompanyRelationship( **{ # type: ignore - "name": remove_traling_and_leading_quotes( - data["Beteiligter"]["Natuerliche_Person"]["Voller_Name"][ - "Nachname" - ] + "name": PersonName( + **{ + "firstname": data["Beteiligter"]["Natuerliche_Person"][ + "Voller_Name" + ]["Vorname"], + "lastname": data["Beteiligter"]["Natuerliche_Person"][ + "Voller_Name" + ]["Nachname"], + } ), + "date_of_birth": self.parse_date_of_birth(data), "location": Location( **{ "city": data["Beteiligter"]["Natuerliche_Person"][ @@ -80,372 +121,339 @@ def parse_stakeholder(data: dict) -> CompanyRelationship | None: "role": RelationshipRoleEnum( data["Rolle"]["Rollenbezeichnung"]["content"] ), + "type": CompanyRelationshipEnum.PERSON, + } + ) + if "Organisation" in data["Beteiligter"]: + return CompanyToCompanyRelationship( + **{ # type: ignore + "role": RelationshipRoleEnum( + data["Rolle"]["Rollenbezeichnung"]["content"] + ), + "name": remove_traling_and_leading_quotes( + data["Beteiligter"]["Organisation"]["Bezeichnung"][ + "Bezeichnung_Aktuell" + ] + ), + "location": Location( + **{ + "city": data["Beteiligter"]["Organisation"]["Anschrift"][ + "Ort" + ], + "street": data["Beteiligter"]["Organisation"]["Anschrift"][ + "Strasse" + ] + if "Strasse" + in data["Beteiligter"]["Organisation"]["Anschrift"] + else None, + "house_number": data["Beteiligter"]["Organisation"][ + "Anschrift" + ]["Hausnummer"] + if "Hausnummer" + in data["Beteiligter"]["Organisation"]["Anschrift"] + else None, + "zip_code": data["Beteiligter"]["Organisation"][ + "Anschrift" + ]["Postleitzahl"] + if "Postleitzahl" + in data["Beteiligter"]["Organisation"]["Anschrift"] + else None, + } + ), "type": CompanyRelationshipEnum.COMPANY, } ) - return PersonToCompanyRelationship( - **{ # type: ignore - "name": PersonName( - **{ - "firstname": data["Beteiligter"]["Natuerliche_Person"][ - "Voller_Name" - ]["Vorname"], - "lastname": data["Beteiligter"]["Natuerliche_Person"][ - "Voller_Name" - ]["Nachname"], - } - ), - "date_of_birth": parse_date_of_birth(data), - "location": Location( - **{ - "city": data["Beteiligter"]["Natuerliche_Person"]["Anschrift"][ - -1 - ]["Ort"] - if isinstance( - data["Beteiligter"]["Natuerliche_Person"]["Anschrift"], list - ) - else data["Beteiligter"]["Natuerliche_Person"]["Anschrift"][ - "Ort" - ] - } - ), - "role": RelationshipRoleEnum( - data["Rolle"]["Rollenbezeichnung"]["content"] - ), - "type": CompanyRelationshipEnum.PERSON, - } - ) - if "Organisation" in data["Beteiligter"]: - return CompanyToCompanyRelationship( - **{ # type: ignore - "role": RelationshipRoleEnum( - data["Rolle"]["Rollenbezeichnung"]["content"] - ), - "name": remove_traling_and_leading_quotes( - data["Beteiligter"]["Organisation"]["Bezeichnung"][ - "Bezeichnung_Aktuell" - ] - ), - "location": Location( - **{ - "city": data["Beteiligter"]["Organisation"]["Anschrift"]["Ort"], - "street": data["Beteiligter"]["Organisation"]["Anschrift"][ - "Strasse" - ] - if "Strasse" in data["Beteiligter"]["Organisation"]["Anschrift"] - else None, - "house_number": data["Beteiligter"]["Organisation"][ - "Anschrift" - ]["Hausnummer"] - if "Hausnummer" - in data["Beteiligter"]["Organisation"]["Anschrift"] - else None, - "zip_code": data["Beteiligter"]["Organisation"]["Anschrift"][ - "Postleitzahl" - ] - if "Postleitzahl" - in data["Beteiligter"]["Organisation"]["Anschrift"] - else None, - } - ), - "type": CompanyRelationshipEnum.COMPANY, - } - ) - return None - - -def loc_from_beteiligung(data: dict) -> Location: - """Extract the company location from the first relationship in the export. - - Args: - data (dict): Data export - - Returns: - Location: location - """ - base = data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"]["Beteiligung"][0][ - "Beteiligter" - ]["Organisation"]["Anschrift"] - - house_number = None - street = None - if "Strasse" in base: - regex = r".(\d+)$" - hits = re.findall(regex, base["Strasse"]) - if len(hits) == 1: - house_number = hits[0] - street = base["Strasse"][: (-1 * len(house_number))] - if "Hausnummer" in base: - house_number = house_number + base["Hausnummer"] - else: - if "Hausnummer" in base: - house_number = base["Hausnummer"] - street = base["Strasse"] - return Location( - **{ - "city": base["Ort"], - "zip_code": base["Postleitzahl"], - "street": normalize_street(street), # type: ignore - "house_number": house_number, - } - ) - - -def name_from_beteiligung(data: dict) -> str: - """Extract the Company name from an Unternehmensregister export by using the first relationship found. - - Args: - data (dict): Data export - - Returns: - str: Company name - """ - name = data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"]["Beteiligung"][0][ - "Beteiligter" - ]["Organisation"]["Bezeichnung"]["Bezeichnung_Aktuell"] - return remove_traling_and_leading_quotes(name) - - -def map_rechtsform(company_name: str, data: dict) -> CompanyTypeEnum | None: - """Extracts the company type from a given Unternehmensregister export. - - Args: - company_name (str): Name of the company as a fallback solution - data (dict): Data export - - Returns: - CompanyTypeEnum | None: Company type if found - """ - try: - return CompanyTypeEnum( - data["XJustiz_Daten"]["Fachdaten_Register"]["Basisdaten_Register"][ - "Rechtstraeger" - ]["Rechtsform"]["content"] - ) - except KeyError: - if ( - company_name.endswith("GmbH") - or company_name.endswith("UG") - or company_name.endswith("UG (haftungsbeschränkt)") - ): - return CompanyTypeEnum("Gesellschaft mit beschränkter Haftung") - if company_name.endswith("SE"): - return CompanyTypeEnum("Europäische Aktiengesellschaft (SE)") - if company_name.endswith("KG"): - return CompanyTypeEnum("Kommanditgesellschaft") return None + def loc_from_beteiligung(self, data: dict) -> Location: + """Extract the company location from the first relationship in the export. -def map_capital(data: dict, company_type: CompanyTypeEnum) -> Capital | None: - """Extracts the company capital from the given Unternehmensregister export. + Args: + data (dict): Data export - Args: - data (dict): Data export - company_type (CompanyTypeEnum): Type of company (e.g., 'Gesellschaft mit beschränkter Haftung') + Returns: + Location: location + """ + base = data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"]["Beteiligung"][0][ + "Beteiligter" + ]["Organisation"]["Anschrift"] - Returns: - Capital | None: Company Capital if found - """ - # Early return - if "Zusatzangaben" not in data["XJustiz_Daten"]["Fachdaten_Register"]: - return None - capital: dict = {"Zahl": 0.0, "Waehrung": ""} - if company_type == CompanyTypeEnum.KG: - capital_type = "Hafteinlage" - base = data["XJustiz_Daten"]["Fachdaten_Register"]["Zusatzangaben"][ - "Personengesellschaft" - ]["Zusatz_KG"]["Daten_Kommanditist"] - if isinstance(base, list): - for entry in base: - # TODO link to persons using Ref_Rollennummer then extract ["Hafteinlage"] as below - capital["Zahl"] = capital["Zahl"] + float(entry["Hafteinlage"]["Zahl"]) - capital["Waehrung"] = entry["Hafteinlage"]["Waehrung"] - elif isinstance(base, dict): - capital = base["Hafteinlage"] - elif company_type in [ - CompanyTypeEnum.GMBH, - CompanyTypeEnum.SE, - CompanyTypeEnum.AG, - CompanyTypeEnum.KGaA, - CompanyTypeEnum.AUSLAENDISCHE_RECHTSFORM, - CompanyTypeEnum.OHG, - ]: - if ( - "Kapitalgesellschaft" - not in data["XJustiz_Daten"]["Fachdaten_Register"]["Zusatzangaben"] - ): + house_number = None + street = None + if "Strasse" in base: + regex = r".(\d+)$" + hits = re.findall(regex, base["Strasse"]) + if len(hits) == 1: + house_number = hits[0] + street = base["Strasse"][: (-1 * len(house_number))] + if "Hausnummer" in base: + house_number = house_number + base["Hausnummer"] + else: + if "Hausnummer" in base: + house_number = base["Hausnummer"] + street = base["Strasse"] + return Location( + **{ + "city": base["Ort"], + "zip_code": base["Postleitzahl"], + "street": normalize_street(street), # type: ignore + "house_number": house_number, + } + ) + + def name_from_beteiligung(self, data: dict) -> str: + """Extract the Company name from an Unternehmensregister export by using the first relationship found. + + Args: + data (dict): Data export + + Returns: + str: Company name + """ + name = data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"]["Beteiligung"][0][ + "Beteiligter" + ]["Organisation"]["Bezeichnung"]["Bezeichnung_Aktuell"] + return remove_traling_and_leading_quotes(name) + + def map_rechtsform(self, company_name: str, data: dict) -> CompanyTypeEnum | None: + """Extracts the company type from a given Unternehmensregister export. + + Args: + company_name (str): Name of the company as a fallback solution + data (dict): Data export + + Returns: + CompanyTypeEnum | None: Company type if found + """ + try: + return CompanyTypeEnum( + data["XJustiz_Daten"]["Fachdaten_Register"]["Basisdaten_Register"][ + "Rechtstraeger" + ]["Rechtsform"]["content"] + ) + except KeyError: + if ( + company_name.endswith("GmbH") + or company_name.endswith("UG") + or company_name.endswith("UG (haftungsbeschränkt)") + ): + return CompanyTypeEnum("Gesellschaft mit beschränkter Haftung") + if company_name.endswith("SE"): + return CompanyTypeEnum("Europäische Aktiengesellschaft (SE)") + if company_name.endswith("KG"): + return CompanyTypeEnum("Kommanditgesellschaft") + return None + + def map_capital(self, data: dict, company_type: CompanyTypeEnum) -> Capital | None: + """Extracts the company capital from the given Unternehmensregister export. + + Args: + data (dict): Data export + company_type (CompanyTypeEnum): Type of company (e.g., 'Gesellschaft mit beschränkter Haftung') + + Returns: + Capital | None: Company Capital if found + """ + # Early return + if "Zusatzangaben" not in data["XJustiz_Daten"]["Fachdaten_Register"]: + return None + capital: dict = {"Zahl": 0.0, "Waehrung": ""} + if company_type == CompanyTypeEnum.KG: + capital_type = "Hafteinlage" base = data["XJustiz_Daten"]["Fachdaten_Register"]["Zusatzangaben"][ "Personengesellschaft" - ] - else: - base = data["XJustiz_Daten"]["Fachdaten_Register"]["Zusatzangaben"][ + ]["Zusatz_KG"]["Daten_Kommanditist"] + if isinstance(base, list): + for entry in base: + # TODO link to persons using Ref_Rollennummer then extract ["Hafteinlage"] as below + capital["Zahl"] = capital["Zahl"] + float( + entry["Hafteinlage"]["Zahl"] + ) + capital["Waehrung"] = entry["Hafteinlage"]["Waehrung"] + elif isinstance(base, dict): + capital = base["Hafteinlage"] + elif company_type in [ + CompanyTypeEnum.GMBH, + CompanyTypeEnum.SE, + CompanyTypeEnum.AG, + CompanyTypeEnum.KGaA, + CompanyTypeEnum.AUSLAENDISCHE_RECHTSFORM, + CompanyTypeEnum.OHG, + ]: + if ( "Kapitalgesellschaft" + not in data["XJustiz_Daten"]["Fachdaten_Register"]["Zusatzangaben"] + ): + base = data["XJustiz_Daten"]["Fachdaten_Register"]["Zusatzangaben"][ + "Personengesellschaft" + ] + else: + base = data["XJustiz_Daten"]["Fachdaten_Register"]["Zusatzangaben"][ + "Kapitalgesellschaft" + ] + if "Zusatz_GmbH" in base: + capital_type = "Stammkapital" + capital = base["Zusatz_GmbH"]["Stammkapital"] + elif "Zusatz_Aktiengesellschaft" in base: + capital_type = "Grundkapital" + capital = base["Zusatz_Aktiengesellschaft"]["Grundkapital"]["Hoehe"] + elif company_type in [ + CompanyTypeEnum.EINZELKAUFMANN, + CompanyTypeEnum.EG, + CompanyTypeEnum.PARTNERSCHAFT, + CompanyTypeEnum.PARTNERGESELLSCHAFT, + CompanyTypeEnum.PARTNERSCHAFTSGESELLSCHAFT, + None, + ]: + return None + # Catch entries having the dict but with null values + if not all(capital.values()): + return None + return Capital( + **{ # type: ignore + "value": float(capital["Zahl"]), + "currency": CurrencyEnum(capital["Waehrung"]), + "type": CapitalTypeEnum(capital_type), + } + ) + + def map_business_purpose(self, data: dict) -> str | None: + """Extracts the "Geschäftszweck" from a given Unternehmensregister export. + + Args: + data (dict): Data export + + Returns: + str | None: Business purpose if found + """ + try: + return data["XJustiz_Daten"]["Fachdaten_Register"]["Basisdaten_Register"][ + "Gegenstand_oder_Geschaeftszweck" ] - if "Zusatz_GmbH" in base: - capital_type = "Stammkapital" - capital = base["Zusatz_GmbH"]["Stammkapital"] - elif "Zusatz_Aktiengesellschaft" in base: - capital_type = "Grundkapital" - capital = base["Zusatz_Aktiengesellschaft"]["Grundkapital"]["Hoehe"] - elif company_type in [ - CompanyTypeEnum.EINZELKAUFMANN, - CompanyTypeEnum.EG, - CompanyTypeEnum.PARTNERSCHAFT, - CompanyTypeEnum.PARTNERGESELLSCHAFT, - CompanyTypeEnum.PARTNERSCHAFTSGESELLSCHAFT, - None, - ]: + except KeyError: + return None + + def map_founding_date(self, data: dict) -> str | None: + """Extracts the founding date from a given Unternehmensregister export. + + Args: + data (dict): Data export + + Returns: + str | None: Founding date if found + """ + text = str(data) + entry_date = re.findall( + r".Tag der ersten Eintragung:(\\n| )?(\d{1,2}\.\d{1,2}\.\d{2,4})", text + ) + if len(entry_date) == 1: + return transform_date_to_iso(entry_date[0][1]) + + entry_date = re.findall( + r".Gesellschaftsvertrag vom (\d{1,2}\.\d{1,2}\.\d{2,4})", text + ) + if len(entry_date) == 1: + return transform_date_to_iso(entry_date[0]) + if ( + "Gruendungsmetadaten" + in data["XJustiz_Daten"]["Fachdaten_Register"]["Basisdaten_Register"] + ): + return extract_date_from_string( + data["XJustiz_Daten"]["Fachdaten_Register"]["Basisdaten_Register"][ + "Gruendungsmetadaten" + ]["Gruendungsdatum"] + ) + # No reliable answer return None - # Catch entries having the dict but with null values - if not all(capital.values()): - return None - return Capital( - **{ # type: ignore - "value": float(capital["Zahl"]), - "currency": CurrencyEnum(capital["Waehrung"]), - "type": CapitalTypeEnum(capital_type), - } - ) + def map_company_id(self, data: dict) -> CompanyID: + """Retrieve Company ID from export. -def map_business_purpose(data: dict) -> str | None: - """Extracts the "Geschäftszweck" from a given Unternehmensregister export. + Args: + data (dict): Data export - Args: - data (dict): Data export + Returns: + CompanyID: ID of the company + """ + return CompanyID( + **{ + "hr_number": data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"][ + "Instanzdaten" + ]["Aktenzeichen"], + "district_court": DistrictCourt( + **{ + "name": data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"][ + "Beteiligung" + ][1]["Beteiligter"]["Organisation"]["Bezeichnung"][ + "Bezeichnung_Aktuell" + ] + if "Organisation" + in data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"][ + "Beteiligung" + ][1]["Beteiligter"] + else data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"][ + "Beteiligung" + ][1]["Beteiligter"]["Natuerliche_Person"]["Voller_Name"][ + "Nachname" + ], + "city": data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"][ + "Beteiligung" + ][1]["Beteiligter"]["Organisation"]["Sitz"]["Ort"] + if "Organisation" + in data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"][ + "Beteiligung" + ][1]["Beteiligter"] + else data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"][ + "Beteiligung" + ][1]["Beteiligter"]["Natuerliche_Person"]["Anschrift"]["Ort"], + } + ), + } + ) - Returns: - str | None: Business purpose if found - """ - try: - return data["XJustiz_Daten"]["Fachdaten_Register"]["Basisdaten_Register"][ - "Gegenstand_oder_Geschaeftszweck" + def map_last_update(self, data: dict) -> str: + """Extract last update date from export. + + Args: + data (dict): Unternehmensregister export + + Returns: + str: Last update date + """ + return data["XJustiz_Daten"]["Fachdaten_Register"]["Auszug"][ + "letzte_Eintragung" ] - except KeyError: - return None + def map_unternehmensregister_json(self, data: dict) -> Company: + """Processes the Unternehmensregister structured export to a Company by using several helper methods. -def map_founding_date(data: dict) -> str | None: - """Extracts the founding date from a given Unternehmensregister export. + Args: + data (dict): Data export - Args: - data (dict): Data export + Returns: + Company: Transformed data + """ + result: dict = {"relationships": []} - Returns: - str | None: Founding date if found - """ - text = str(data) - entry_date = re.findall( - r".Tag der ersten Eintragung:(\\n| )?(\d{1,2}\.\d{1,2}\.\d{2,4})", text - ) - if len(entry_date) == 1: - return transform_date_to_iso(entry_date[0][1]) + # TODO Refactor mapping - this is a nightmare... + result["id"] = self.map_company_id(data) + result["name"] = self.name_from_beteiligung(data) - entry_date = re.findall( - r".Gesellschaftsvertrag vom (\d{1,2}\.\d{1,2}\.\d{2,4})", text - ) - if len(entry_date) == 1: - return transform_date_to_iso(entry_date[0]) - if ( - "Gruendungsmetadaten" - in data["XJustiz_Daten"]["Fachdaten_Register"]["Basisdaten_Register"] - ): - return extract_date_from_string( - data["XJustiz_Daten"]["Fachdaten_Register"]["Basisdaten_Register"][ - "Gruendungsmetadaten" - ]["Gruendungsdatum"] - ) - # No reliable answer - return None + result["location"] = self.loc_from_beteiligung(data) + result["last_update"] = self.map_last_update(data) + result["company_type"] = self.map_rechtsform(result["name"], data) + result["capital"] = self.map_capital(data, result["company_type"]) + result["business_purpose"] = self.map_business_purpose(data) + result["founding_date"] = self.map_founding_date(data) -def map_company_id(data: dict) -> CompanyID: - """Retrieve Company ID from export. - - Args: - data (dict): Data export - - Returns: - CompanyID: ID of the company - """ - return CompanyID( - **{ - "hr_number": data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"][ - "Instanzdaten" - ]["Aktenzeichen"], - "district_court": DistrictCourt( - **{ - "name": data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"][ - "Beteiligung" - ][1]["Beteiligter"]["Organisation"]["Bezeichnung"][ - "Bezeichnung_Aktuell" - ] - if "Organisation" - in data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"][ - "Beteiligung" - ][1]["Beteiligter"] - else data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"][ - "Beteiligung" - ][1]["Beteiligter"]["Natuerliche_Person"]["Voller_Name"][ - "Nachname" - ], - "city": data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"][ - "Beteiligung" - ][1]["Beteiligter"]["Organisation"]["Sitz"]["Ort"] - if "Organisation" - in data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"][ - "Beteiligung" - ][1]["Beteiligter"] - else data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"][ - "Beteiligung" - ][1]["Beteiligter"]["Natuerliche_Person"]["Anschrift"]["Ort"], - } - ), - } - ) - - -def map_last_update(data: dict) -> str: - """Extract last update date from export. - - Args: - data (dict): Unternehmensregister export - - Returns: - str: Last update date - """ - return data["XJustiz_Daten"]["Fachdaten_Register"]["Auszug"]["letzte_Eintragung"] - - -def map_unternehmensregister_json(data: dict) -> Company: - """Processes the Unternehmensregister structured export to a Company by using several helper methods. - - Args: - data (dict): Data export - - Returns: - Company: Transformed data - """ - result: dict = {"relationships": []} - - # TODO Refactor mapping - this is a nightmare... - result["id"] = map_company_id(data) - result["name"] = name_from_beteiligung(data) - - result["location"] = loc_from_beteiligung(data) - result["last_update"] = map_last_update(data) - - result["company_type"] = map_rechtsform(result["name"], data) - result["capital"] = map_capital(data, result["company_type"]) - result["business_purpose"] = map_business_purpose(data) - result["founding_date"] = map_founding_date(data) - - for i in range( - 2, len(data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"]["Beteiligung"]) - ): - people = parse_stakeholder( - data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"]["Beteiligung"][i] - ) - result["relationships"].append(people) - result = map_co_relation(result) - return Company(**result) + for i in range( + 2, + len(data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"]["Beteiligung"]), + ): + people = self.parse_stakeholder( + data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"]["Beteiligung"][i] + ) + result["relationships"].append(people) + result = map_co_relation(result) + return Company(**result) diff --git a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/v3/v3.py b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/v3/v3.py index 240231a..70f97cb 100644 --- a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/v3/v3.py +++ b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/v3/v3.py @@ -19,6 +19,7 @@ from aki_prj23_transparenzregister.models.company import ( RelationshipRoleEnum, ) from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.common import ( + BaseTransformer, map_co_relation, normalize_street, traversal, @@ -32,63 +33,106 @@ from aki_prj23_transparenzregister.utils.string_tools import ( ) -def parse_date_of_birth(data: dict) -> str | None: - """Retreives the date of birth from a stakeholder entry if possible. +class V3_Transformer(BaseTransformer): # noqa: N801 + """Transformer for data exports from Unternehmensregister (v3).""" - Args: - data (dict): Stakeholder data + def parse_date_of_birth(self, data: dict) -> str | None: + """Retreives the date of birth from a stakeholder entry if possible. - Returns: - str | None: date of birth or None if not found - """ - if "tns:geburt" in ( - base := data["tns:beteiligter"]["tns:auswahl_beteiligter"][ - "tns:natuerlichePerson" - ] - ): - base = base["tns:geburt"]["tns:geburtsdatum"] - if isinstance(base, str): - return base - return None + Args: + data (dict): Stakeholder data - -def map_role_id_to_enum(role_id: str) -> RelationshipRoleEnum: - """Map Unternehmensregister role ID to RelationshipRoleEnum. - - Args: - role_id (str): Unternehmensregister role ID - - Returns: - RelationshipRoleEnum: Role enum - """ - mapper = RoleMapper.mapper() - return mapper.get(role_id) - - -def parse_stakeholder(data: dict) -> CompanyRelationship | None: - """Extract the company stakeholder/relation from a single "Beteiligung". - - Args: - data (dict): Data export - - Returns: - CompanyRelationship | None: Relationship if it could be processed - """ - if "tns:natuerlichePerson" in data["tns:beteiligter"]["tns:auswahl_beteiligter"]: - # It's a Company serving as a "Kommanditist" or similar - if ( - "tns:vorname" - not in data["tns:beteiligter"]["tns:auswahl_beteiligter"][ + Returns: + str | None: date of birth or None if not found + """ + if "tns:geburt" in ( + base := data["tns:beteiligter"]["tns:auswahl_beteiligter"][ "tns:natuerlichePerson" - ]["tns:vollerName"] + ] ): - return CompanyToCompanyRelationship( + base = base["tns:geburt"]["tns:geburtsdatum"] + if isinstance(base, str): + return base + return None + + def map_role_id_to_enum(self, role_id: str) -> RelationshipRoleEnum: + """Map Unternehmensregister role ID to RelationshipRoleEnum. + + Args: + role_id (str): Unternehmensregister role ID + + Returns: + RelationshipRoleEnum: Role enum + """ + mapper = RoleMapper.mapper() + return mapper.get(role_id) + + def parse_stakeholder(self, data: dict) -> CompanyRelationship | None: + """Extract the company stakeholder/relation from a single "Beteiligung". + + Args: + data (dict): Data export + + Returns: + CompanyRelationship | None: Relationship if it could be processed + """ + if ( + "tns:natuerlichePerson" + in data["tns:beteiligter"]["tns:auswahl_beteiligter"] + ): + # It's a Company serving as a "Kommanditist" or similar + if ( + "tns:vorname" + not in data["tns:beteiligter"]["tns:auswahl_beteiligter"][ + "tns:natuerlichePerson" + ]["tns:vollerName"] + ): + return CompanyToCompanyRelationship( + **{ # type: ignore + "name": remove_traling_and_leading_quotes( + data["tns:beteiligter"]["tns:auswahl_beteiligter"][ + "tns:natuerlichePerson" + ]["tns:vollerName"]["tns:nachname"] + ), + "location": Location( + **{ + "city": data["tns:beteiligter"][ + "tns:auswahl_beteiligter" + ]["tns:natuerlichePerson"]["tns:anschrift"][-1][ + "tns:ort" + ] + if isinstance( + data["tns:beteiligter"]["tns:auswahl_beteiligter"][ + "tns:natuerlichePerson" + ]["tns:anschrift"], + list, + ) + else data["tns:beteiligter"]["tns:auswahl_beteiligter"][ + "tns:natuerlichePerson" + ]["tns:anschrift"]["tns:ort"] + } + ), + "role": self.map_role_id_to_enum( + data["tns:rolle"]["tns:rollenbezeichnung"]["code"] + ), + "type": CompanyRelationshipEnum.COMPANY, + } + ) + return PersonToCompanyRelationship( **{ # type: ignore - "name": remove_traling_and_leading_quotes( - data["tns:beteiligter"]["tns:auswahl_beteiligter"][ - "tns:natuerlichePerson" - ]["tns:vollerName"]["tns:nachname"] + "name": PersonName( + **{ + "firstname": data["tns:beteiligter"][ + "tns:auswahl_beteiligter" + ]["tns:natuerlichePerson"]["tns:vollerName"]["tns:vorname"], + "lastname": data["tns:beteiligter"][ + "tns:auswahl_beteiligter" + ]["tns:natuerlichePerson"]["tns:vollerName"][ + "tns:nachname" + ], + } ), + "date_of_birth": self.parse_date_of_birth(data), "location": Location( **{ "city": data["tns:beteiligter"]["tns:auswahl_beteiligter"][ @@ -105,452 +149,414 @@ def parse_stakeholder(data: dict) -> CompanyRelationship | None: ]["tns:anschrift"]["tns:ort"] } ), - "role": map_role_id_to_enum( + "role": self.map_role_id_to_enum( data["tns:rolle"]["tns:rollenbezeichnung"]["code"] ), + "type": CompanyRelationshipEnum.PERSON, + } + ) + if "tns:organisation" in data["tns:beteiligter"]["tns:auswahl_beteiligter"]: + base = data["tns:beteiligter"]["tns:auswahl_beteiligter"][ + "tns:organisation" + ] + + location = None + if "tns:anschrift" in base: + location = Location( + **{ + "city": base["tns:anschrift"]["tns:ort"], + "street": base["tns:anschrift"]["tns:strasse"] + if "tns:strasse" in base["tns:anschrift"] + else None, + "house_number": base["tns:anschrift"]["tns:hausnummer"] + if "tns:hausnummer" in base["tns:anschrift"] + else None, + "zip_code": base["tns:anschrift"]["tns:postleitzahl"] + if "tns:postleitzahl" in base["tns:anschrift"] + else None, + } + ) + else: + location = Location( + **{ + "city": base["tns:sitz"]["tns:ort"], + "street": base["tns:sitz"]["tns:strasse"] + if "tns:strasse" in base["tns:sitz"] + else None, + "house_number": base["tns:sitz"]["tns:hausnummer"] + if "tns:hausnummer" in base["tns:sitz"] + else None, + "zip_code": base["tns:sitz"]["tns:postleitzahl"] + if "tns:postleitzahl" in base["tns:sitz"] + else None, + } + ) + + return CompanyToCompanyRelationship( + **{ # type: ignore + "role": self.map_role_id_to_enum( + data["tns:rolle"]["tns:rollenbezeichnung"]["code"] + ), + "name": remove_traling_and_leading_quotes( + base["tns:bezeichnung"]["tns:bezeichnung.aktuell"] + ), + "location": location, "type": CompanyRelationshipEnum.COMPANY, } ) - return PersonToCompanyRelationship( - **{ # type: ignore - "name": PersonName( - **{ - "firstname": data["tns:beteiligter"]["tns:auswahl_beteiligter"][ - "tns:natuerlichePerson" - ]["tns:vollerName"]["tns:vorname"], - "lastname": data["tns:beteiligter"]["tns:auswahl_beteiligter"][ - "tns:natuerlichePerson" - ]["tns:vollerName"]["tns:nachname"], - } - ), - "date_of_birth": parse_date_of_birth(data), - "location": Location( - **{ - "city": data["tns:beteiligter"]["tns:auswahl_beteiligter"][ - "tns:natuerlichePerson" - ]["tns:anschrift"][-1]["tns:ort"] - if isinstance( - data["tns:beteiligter"]["tns:auswahl_beteiligter"][ - "tns:natuerlichePerson" - ]["tns:anschrift"], - list, - ) - else data["tns:beteiligter"]["tns:auswahl_beteiligter"][ - "tns:natuerlichePerson" - ]["tns:anschrift"]["tns:ort"] - } - ), - "role": map_role_id_to_enum( - data["tns:rolle"]["tns:rollenbezeichnung"]["code"] - ), - "type": CompanyRelationshipEnum.PERSON, - } - ) - if "tns:organisation" in data["tns:beteiligter"]["tns:auswahl_beteiligter"]: - base = data["tns:beteiligter"]["tns:auswahl_beteiligter"]["tns:organisation"] + return None - location = None - if "tns:anschrift" in base: - location = Location( - **{ - "city": base["tns:anschrift"]["tns:ort"], - "street": base["tns:anschrift"]["tns:strasse"] - if "tns:strasse" in base["tns:anschrift"] - else None, - "house_number": base["tns:anschrift"]["tns:hausnummer"] - if "tns:hausnummer" in base["tns:anschrift"] - else None, - "zip_code": base["tns:anschrift"]["tns:postleitzahl"] - if "tns:potsleitzahl" in base["tns:anschrift"] - else None, - } - ) - else: - location = Location( - **{ - "city": base["tns:sitz"]["tns:ort"], - "street": base["tns:sitz"]["tns:strasse"] - if "tns:strasse" in base["tns:sitz"] - else None, - "house_number": base["tns:sitz"]["tns:hausnummer"] - if "tns:hausnummer" in base["tns:sitz"] - else None, - "zip_code": base["tns:sitz"]["tns:postleitzahl"] - if "tns:potsleitzahl" in base["tns:sitz"] - else None, - } - ) + def loc_from_beteiligung(self, data: dict) -> Location: + """Extract the company location from the first relationship in the export. - return CompanyToCompanyRelationship( - **{ # type: ignore - "role": map_role_id_to_enum( - data["tns:rolle"]["tns:rollenbezeichnung"]["code"] - ), - "name": remove_traling_and_leading_quotes( - base["tns:bezeichnung"]["tns:bezeichnung.aktuell"] - ), - "location": location, - "type": CompanyRelationshipEnum.COMPANY, - } - ) - return None + Args: + data (dict): Data export - -def loc_from_beteiligung(data: dict) -> Location: - """Extract the company location from the first relationship in the export. - - Args: - data (dict): Data export - - Returns: - Location: location - """ - base_path = [ - "tns:grunddaten", - "tns:verfahrensdaten", - "tns:beteiligung", - 0, - "tns:beteiligter", - "tns:auswahl_beteiligter", - "tns:organisation", - ] - base = traversal(data, base_path) - base = base["tns:anschrift"] if "tns:anschrift" in base else base["tns:sitz"] - - if isinstance(base, list): - base = base[0] - house_number = None - street = None - if "tns:strasse" in base: - regex = r".(\d+)$" - hits = re.findall(regex, base["tns:strasse"]) - if len(hits) == 1: - house_number = hits[0] - street = base["tns:strasse"][: (-1 * len(house_number))] - if "tns:hausnummer" in base: - house_number = house_number + base["tns:hausnummer"] - else: - if "tns:hausnummer" in base: - house_number = base["tns:hausnummer"] - street = base["tns:strasse"] - return Location( - **{ - "city": base["tns:ort"], - "zip_code": base["tns:postleitzahl"], - "street": normalize_street(street), # type: ignore - "house_number": house_number, - } - ) - - -def name_from_beteiligung(data: dict) -> str: - """Extract the Company name from an Unternehmensregister export by using the first relationship found. - - Args: - data (dict): Data export - - Returns: - str: Company name - """ - path = [ - "tns:grunddaten", - "tns:verfahrensdaten", - "tns:beteiligung", - 0, - "tns:beteiligter", - "tns:auswahl_beteiligter", - "tns:organisation", - "tns:bezeichnung", - "tns:bezeichnung.aktuell", - ] - name = traversal(data, path) - return remove_traling_and_leading_quotes(name) - - -def map_rechtsform(company_name: str, data: dict) -> CompanyTypeEnum | None: - """Extracts the company type from a given Unternehmensregister export. - - Args: - company_name (str): Name of the company as a fallback solution - data (dict): Data export - - Returns: - CompanyTypeEnum | None: Company type if found - """ - try: - path = [ - "tns:fachdatenRegister", - "tns:basisdatenRegister", - "tns:rechtstraeger", - "tns:angabenZurRechtsform", - "tns:rechtsform", - "code", + Returns: + Location: location + """ + base_path = [ + "tns:grunddaten", + "tns:verfahrensdaten", + "tns:beteiligung", + 0, + "tns:beteiligter", + "tns:auswahl_beteiligter", + "tns:organisation", ] - return CompanyTypeEnum(traversal(data, path)) - except Exception: - if ( - company_name.endswith("GmbH") - or company_name.endswith("UG") - or company_name.endswith("UG (haftungsbeschränkt)") - ): - return CompanyTypeEnum("Gesellschaft mit beschränkter Haftung") - if company_name.endswith("SE"): - return CompanyTypeEnum("Europäische Aktiengesellschaft (SE)") - if company_name.endswith("KG"): - return CompanyTypeEnum("Kommanditgesellschaft") - return None + base = traversal(data, base_path) + base = base["tns:anschrift"] if "tns:anschrift" in base else base["tns:sitz"] - -def map_capital( # noqa: PLR0912 - data: dict, company_type: CompanyTypeEnum -) -> Capital | None: - """Extracts the company capital from the given Unternehmensregister export. - - Args: - data (dict): Data export - company_type (CompanyTypeEnum): Type of company (e.g., 'Gesellschaft mit beschränkter Haftung') - - Returns: - Capital | None: Company Capital if found - """ - # Early return - if "tns:auswahl_zusatzangaben" not in data["tns:fachdatenRegister"]: - return None - capital: dict = {"tns:zahl": 0.0, "tns:waehrung": {"code": None}} - if ( - company_type == CompanyTypeEnum.KG - and "tns:personengesellschaft" - in data["tns:fachdatenRegister"]["tns:auswahl_zusatzangaben"] - ): - capital_type = "Hafteinlage" - base = data["tns:fachdatenRegister"]["tns:auswahl_zusatzangaben"][ - "tns:personengesellschaft" - ]["tns:zusatzKG"]["tns:datenKommanditist"] if isinstance(base, list): - for entry in base: - # TODO link to persons using Ref_Rollennummer then extract ["Hafteinlage"] as below - capital["tns:zahl"] = capital["tns:zahl"] + float( - entry["tns:hafteinlage"]["tns:zahl"] - ) - capital["tns:waehrung"]["code"] = entry["tns:hafteinlage"][ - "tns:waehrung" - ]["code"] - elif isinstance(base, dict): - capital = base["tns:hafteinlage"] - elif company_type in [ - CompanyTypeEnum.GMBH, - CompanyTypeEnum.SE, - CompanyTypeEnum.AG, - CompanyTypeEnum.KGaA, - CompanyTypeEnum.AUSLAENDISCHE_RECHTSFORM, - CompanyTypeEnum.OHG, - ]: + base = base[0] + house_number = None + street = None + if "tns:strasse" in base: + regex = r".(\d+)$" + hits = re.findall(regex, base["tns:strasse"]) + if len(hits) == 1: + house_number = hits[0] + street = base["tns:strasse"][: (-1 * len(house_number))] + if "tns:hausnummer" in base: + house_number = house_number + base["tns:hausnummer"] + else: + if "tns:hausnummer" in base: + house_number = base["tns:hausnummer"] + street = base["tns:strasse"] + return Location( + **{ + "city": base["tns:ort"], + "zip_code": base["tns:postleitzahl"], + "street": normalize_street(street), # type: ignore + "house_number": house_number, + } + ) + + def name_from_beteiligung(self, data: dict) -> str: + """Extract the Company name from an Unternehmensregister export by using the first relationship found. + + Args: + data (dict): Data export + + Returns: + str: Company name + """ + path = [ + "tns:grunddaten", + "tns:verfahrensdaten", + "tns:beteiligung", + 0, + "tns:beteiligter", + "tns:auswahl_beteiligter", + "tns:organisation", + "tns:bezeichnung", + "tns:bezeichnung.aktuell", + ] + name = traversal(data, path) + return remove_traling_and_leading_quotes(name) + + def map_rechtsform(self, company_name: str, data: dict) -> CompanyTypeEnum | None: + """Extracts the company type from a given Unternehmensregister export. + + Args: + company_name (str): Name of the company as a fallback solution + data (dict): Data export + + Returns: + CompanyTypeEnum | None: Company type if found + """ + try: + path = [ + "tns:fachdatenRegister", + "tns:basisdatenRegister", + "tns:rechtstraeger", + "tns:angabenZurRechtsform", + "tns:rechtsform", + "code", + ] + return CompanyTypeEnum(traversal(data, path)) + except Exception: + if ( + company_name.endswith("GmbH") + or company_name.endswith("UG") + or company_name.endswith("UG (haftungsbeschränkt)") + ): + return CompanyTypeEnum("Gesellschaft mit beschränkter Haftung") + if company_name.endswith("SE"): + return CompanyTypeEnum("Europäische Aktiengesellschaft (SE)") + if company_name.endswith("KG"): + return CompanyTypeEnum("Kommanditgesellschaft") + return None + + def map_capital( # noqa: PLR0912 + self, data: dict, company_type: CompanyTypeEnum + ) -> Capital | None: + """Extracts the company capital from the given Unternehmensregister export. + + Args: + data (dict): Data export + company_type (CompanyTypeEnum): Type of company (e.g., 'Gesellschaft mit beschränkter Haftung') + + Returns: + Capital | None: Company Capital if found + """ + # Early return + if "tns:auswahl_zusatzangaben" not in data["tns:fachdatenRegister"]: + return None + capital: dict = {"tns:zahl": 0.0, "tns:waehrung": {"code": None}} if ( - "tns:kapitalgesellschaft" - not in data["tns:fachdatenRegister"]["tns:auswahl_zusatzangaben"] + company_type == CompanyTypeEnum.KG + and "tns:personengesellschaft" + in data["tns:fachdatenRegister"]["tns:auswahl_zusatzangaben"] ): + capital_type = "Hafteinlage" base = data["tns:fachdatenRegister"]["tns:auswahl_zusatzangaben"][ "tns:personengesellschaft" - ] - else: - base = data["tns:fachdatenRegister"]["tns:auswahl_zusatzangaben"][ + ]["tns:zusatzKG"]["tns:datenKommanditist"] + if isinstance(base, list): + for entry in base: + # TODO link to persons using Ref_Rollennummer then extract ["Hafteinlage"] as below + capital["tns:zahl"] = capital["tns:zahl"] + float( + entry["tns:hafteinlage"]["tns:zahl"] + ) + capital["tns:waehrung"]["code"] = entry["tns:hafteinlage"][ + "tns:waehrung" + ]["code"] + elif isinstance(base, dict): + capital = base["tns:hafteinlage"] + elif company_type in [ + CompanyTypeEnum.GMBH, + CompanyTypeEnum.SE, + CompanyTypeEnum.AG, + CompanyTypeEnum.KGaA, + CompanyTypeEnum.AUSLAENDISCHE_RECHTSFORM, + CompanyTypeEnum.OHG, + ]: + if ( "tns:kapitalgesellschaft" - ] - if "tns:zusatzGmbH" in base: - capital_type = "Stammkapital" - capital = base["tns:zusatzGmbH"]["tns:stammkapital"] - elif "tns:zusatzAktiengesellschaft" in base: - capital_type = "Grundkapital" - capital = base["tns:zusatzAktiengesellschaft"]["tns:grundkapital"][ - "tns:hoehe" - ] - elif company_type in [ - CompanyTypeEnum.EINZELKAUFMANN, - CompanyTypeEnum.EG, - CompanyTypeEnum.PARTNERSCHAFT, - CompanyTypeEnum.PARTNERGESELLSCHAFT, - CompanyTypeEnum.PARTNERSCHAFTSGESELLSCHAFT, - None, - ]: - return None - # Catch entries having the dict but with null values - if isinstance(capital, list): - capital = capital[0] - if not all(capital.values()): - return None - return Capital( - **{ # type: ignore - "value": float(capital["tns:zahl"]), - "currency": CurrencyEnum(capital["tns:waehrung"]["code"]), - "type": CapitalTypeEnum(capital_type), - } - ) - - -def map_business_purpose(data: dict) -> str | None: - """Extracts the "Geschäftszweck" from a given Unternehmensregister export. - - Args: - data (dict): Data export - - Returns: - str | None: Business purpose if found - """ - try: - path = ["tns:fachdatenRegister", "tns:basisdatenRegister", "tns:gegenstand"] - return traversal(data, path) - except KeyError: - return None - - -def map_founding_date(data: dict) -> str | None: - """Extracts the founding date from a given Unternehmensregister export. - - Args: - data (dict): Data export - - Returns: - str | None: Founding date if found - """ - text = str(data) - entry_date = re.findall( - r".Tag der ersten Eintragung:(\\n| )?(\d{1,2}\.\d{1,2}\.\d{2,4})", text - ) - if len(entry_date) == 1: - return transform_date_to_iso(entry_date[0][1]) - - entry_date = re.findall( - r".Gesellschaftsvertrag vom (\d{1,2}\.\d{1,2}\.\d{2,4})", text - ) - if len(entry_date) == 1: - return transform_date_to_iso(entry_date[0]) - if "tns:satzungsdatum" in data["tns:fachdatenRegister"]["tns:basisdatenRegister"]: - path = [ - "tns:fachdatenRegister", - "tns:basisdatenRegister", - "tns:satzungsdatum", - ] - base = traversal(data, path) - if "tns:aktuellesSatzungsdatum" in base: - return base["tns:aktuellesSatzungsdatum"] - # No reliable answer - return None - - -def map_hr_number(data: dict) -> str: - """Extract the HR number from a given Unternehmensregister export. - - Args: - data (dict): Data export - - Raises: - KeyError: If key not found - - Returns: - str: HR number - """ - base = data["tns:grunddaten"]["tns:verfahrensdaten"]["tns:instanzdaten"][ - "tns:aktenzeichen" - ]["tns:auswahl_aktenzeichen"] - if "tns:aktenzeichen.strukturiert" in base: - hr_prefix = base["tns:aktenzeichen.strukturiert"]["tns:register"]["code"] - hr_number = base["tns:aktenzeichen.strukturiert"]["tns:laufendeNummer"] - return f"{hr_prefix} {hr_number}" - if "tns:aktenzeichen.freitext" in base: - return base["tns:aktenzeichen.freitext"] - raise KeyError("Could not find HR number") - - -def map_district_court(data: dict) -> DistrictCourt: - """Extract the district court from a given Unternehmensregister export. - - Args: - data (dict): Data export - - Returns: - DistrictCourt: District court - """ - base_path = [ - "tns:grunddaten", - "tns:verfahrensdaten", - "tns:beteiligung", - 1, - "tns:beteiligter", - "tns:auswahl_beteiligter", - "tns:organisation", - ] - path = [*base_path, "tns:bezeichnung", "tns:bezeichnung.aktuell"] - name = traversal(data, path) - path = [*base_path, "tns:anschrift", "tns:ort"] - city = traversal(data, path) - return DistrictCourt(name=name, city=city) - - -def map_company_id(data: dict) -> CompanyID: - """Retrieve Company ID from export. - - Args: - data (dict): Data export - - Returns: - CompanyID: ID of the company - """ - try: - return CompanyID(map_hr_number(data), map_district_court(data)) # type: ignore - except KeyError: - hr_number = data["tns:grunddaten"]["tns:verfahrensdaten"]["tns:beteiligung"][0][ - "tns:beteiligter" - ]["tns:auswahl_beteiligter"]["tns:organisation"]["tns:registereintragung"][ - "tns:registernummer" - ] - district_court = map_district_court(data) - return CompanyID(hr_number=hr_number, district_court=district_court) - - -def map_last_update(data: dict) -> str: - """Extract last update date from export. - - Args: - data (dict): Unternehmensregister export - - Returns: - str: Last update date - """ - path = ["tns:fachdatenRegister", "tns:auszug", "tns:letzteEintragung"] - return traversal(data, path) - - -# TODO class model with inheritance - only difference: Determine root in __init__ -def map_unternehmensregister_json(data: dict) -> Company: - """Processes the Unternehmensregister structured export to a Company by using several helper methods. - - Args: - data (dict): Data export - - Returns: - Company: Transformed data - """ - root_key = list(data.keys())[0] - data = data[root_key] - result: dict = {"relationships": []} - - result["id"] = map_company_id(data) - result["name"] = name_from_beteiligung(data) - - result["location"] = loc_from_beteiligung(data) - result["last_update"] = map_last_update(data) - - result["company_type"] = map_rechtsform(result["name"], data) - result["capital"] = map_capital(data, result["company_type"]) - result["business_purpose"] = map_business_purpose(data) - result["founding_date"] = map_founding_date(data) - - for i in range( - 2, len(data["tns:grunddaten"]["tns:verfahrensdaten"]["tns:beteiligung"]) - ): - people = parse_stakeholder( - data["tns:grunddaten"]["tns:verfahrensdaten"]["tns:beteiligung"][i] + not in data["tns:fachdatenRegister"]["tns:auswahl_zusatzangaben"] + ): + base = data["tns:fachdatenRegister"]["tns:auswahl_zusatzangaben"][ + "tns:personengesellschaft" + ] + else: + base = data["tns:fachdatenRegister"]["tns:auswahl_zusatzangaben"][ + "tns:kapitalgesellschaft" + ] + if "tns:zusatzGmbH" in base: + capital_type = "Stammkapital" + capital = base["tns:zusatzGmbH"]["tns:stammkapital"] + elif "tns:zusatzAktiengesellschaft" in base: + capital_type = "Grundkapital" + capital = base["tns:zusatzAktiengesellschaft"]["tns:grundkapital"][ + "tns:hoehe" + ] + elif company_type in [ + CompanyTypeEnum.EINZELKAUFMANN, + CompanyTypeEnum.EG, + CompanyTypeEnum.PARTNERSCHAFT, + CompanyTypeEnum.PARTNERGESELLSCHAFT, + CompanyTypeEnum.PARTNERSCHAFTSGESELLSCHAFT, + None, + ]: + return None + # Catch entries having the dict but with null values + if isinstance(capital, list): + capital = capital[0] + if not all(capital.values()): + return None + return Capital( + **{ # type: ignore + "value": float(capital["tns:zahl"]), + "currency": CurrencyEnum(capital["tns:waehrung"]["code"]), + "type": CapitalTypeEnum(capital_type), + } ) - result["relationships"].append(people) - result = map_co_relation(result) - return Company(**result) + + def map_business_purpose(self, data: dict) -> str | None: + """Extracts the "Geschäftszweck" from a given Unternehmensregister export. + + Args: + data (dict): Data export + + Returns: + str | None: Business purpose if found + """ + try: + path = ["tns:fachdatenRegister", "tns:basisdatenRegister", "tns:gegenstand"] + return traversal(data, path) + except KeyError: + return None + + def map_founding_date(self, data: dict) -> str | None: + """Extracts the founding date from a given Unternehmensregister export. + + Args: + data (dict): Data export + + Returns: + str | None: Founding date if found + """ + text = str(data) + entry_date = re.findall( + r".Tag der ersten Eintragung:(\\n| )?(\d{1,2}\.\d{1,2}\.\d{2,4})", text + ) + if len(entry_date) == 1: + return transform_date_to_iso(entry_date[0][1]) + + entry_date = re.findall( + r".Gesellschaftsvertrag vom (\d{1,2}\.\d{1,2}\.\d{2,4})", text + ) + if len(entry_date) == 1: + return transform_date_to_iso(entry_date[0]) + if ( + "tns:satzungsdatum" + in data["tns:fachdatenRegister"]["tns:basisdatenRegister"] + ): + path = [ + "tns:fachdatenRegister", + "tns:basisdatenRegister", + "tns:satzungsdatum", + ] + base = traversal(data, path) + if "tns:aktuellesSatzungsdatum" in base: + return base["tns:aktuellesSatzungsdatum"] + # No reliable answer + return None + + def map_hr_number(self, data: dict) -> str: + """Extract the HR number from a given Unternehmensregister export. + + Args: + data (dict): Data export + + Raises: + KeyError: If key not found + + Returns: + str: HR number + """ + base = data["tns:grunddaten"]["tns:verfahrensdaten"]["tns:instanzdaten"][ + "tns:aktenzeichen" + ]["tns:auswahl_aktenzeichen"] + if "tns:aktenzeichen.strukturiert" in base: + hr_prefix = base["tns:aktenzeichen.strukturiert"]["tns:register"]["code"] + hr_number = base["tns:aktenzeichen.strukturiert"]["tns:laufendeNummer"] + return f"{hr_prefix} {hr_number}" + if "tns:aktenzeichen.freitext" in base: + return base["tns:aktenzeichen.freitext"] + raise KeyError("Could not find HR number") + + def map_district_court(self, data: dict) -> DistrictCourt: + """Extract the district court from a given Unternehmensregister export. + + Args: + data (dict): Data export + + Returns: + DistrictCourt: District court + """ + base_path = [ + "tns:grunddaten", + "tns:verfahrensdaten", + "tns:beteiligung", + 1, + "tns:beteiligter", + "tns:auswahl_beteiligter", + "tns:organisation", + ] + path = [*base_path, "tns:bezeichnung", "tns:bezeichnung.aktuell"] + name = traversal(data, path) + + path = [*base_path, "tns:anschrift", "tns:ort"] + city = traversal(data, path) + return DistrictCourt(name=name, city=city) + + def map_company_id(self, data: dict) -> CompanyID: + """Retrieve Company ID from export. + + Args: + data (dict): Data export + + Returns: + CompanyID: ID of the company + """ + try: + return CompanyID(hr_number=self.map_hr_number(data), district_court=self.map_district_court(data)) # type: ignore + except KeyError: + hr_number = data["tns:grunddaten"]["tns:verfahrensdaten"][ + "tns:beteiligung" + ][0]["tns:beteiligter"]["tns:auswahl_beteiligter"]["tns:organisation"][ + "tns:registereintragung" + ][ + "tns:registernummer" + ] + district_court = self.map_district_court(data) + return CompanyID(hr_number=hr_number, district_court=district_court) + + def map_last_update(self, data: dict) -> str: + """Extract last update date from export. + + Args: + data (dict): Unternehmensregister export + + Returns: + str: Last update date + """ + path = ["tns:fachdatenRegister", "tns:auszug", "tns:letzteEintragung"] + return traversal(data, path) + + # TODO class model with inheritance - only difference: Determine root in __init__ + def map_unternehmensregister_json(self, data: dict) -> Company: + """Processes the Unternehmensregister structured export to a Company by using several helper methods. + + Args: + data (dict): Data export + + Returns: + Company: Transformed data + """ + root_key = list(data.keys())[0] + data = data[root_key] + result: dict = {"relationships": []} + + result["id"] = self.map_company_id(data) + result["name"] = self.name_from_beteiligung(data) + + result["location"] = self.loc_from_beteiligung(data) + result["last_update"] = self.map_last_update(data) + + result["company_type"] = self.map_rechtsform(result["name"], data) + result["capital"] = self.map_capital(data, result["company_type"]) + result["business_purpose"] = self.map_business_purpose(data) + result["founding_date"] = self.map_founding_date(data) + + for i in range( + 2, len(data["tns:grunddaten"]["tns:verfahrensdaten"]["tns:beteiligung"]) + ): + people = self.parse_stakeholder( + data["tns:grunddaten"]["tns:verfahrensdaten"]["tns:beteiligung"][i] + ) + result["relationships"].append(people) + result = map_co_relation(result) + return Company(**result) diff --git a/tests/utils/data_extraction/unternehmensregister/transform/v1_test.py b/tests/utils/data_extraction/unternehmensregister/transform/v1_test.py index 34b8ead..4c89d35 100644 --- a/tests/utils/data_extraction/unternehmensregister/transform/v1_test.py +++ b/tests/utils/data_extraction/unternehmensregister/transform/v1_test.py @@ -16,10 +16,12 @@ from aki_prj23_transparenzregister.models.company import ( PersonToCompanyRelationship, RelationshipRoleEnum, ) -from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v1 import ( - v1 as transform, +from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v1.v1 import ( + V1_Transformer, ) +transform = V1_Transformer() + def test_parse_stakeholder_org_hidden_in_person() -> None: data = { @@ -656,31 +658,31 @@ def test_map_last_update() -> None: "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v1.v1.map_co_relation" ) @patch( - "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v1.v1.map_company_id" + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v1.v1.V1_Transformer.map_company_id" ) @patch( - "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v1.v1.name_from_beteiligung" + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v1.v1.V1_Transformer.name_from_beteiligung" ) @patch( - "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v1.v1.loc_from_beteiligung" + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v1.v1.V1_Transformer.loc_from_beteiligung" ) @patch( - "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v1.v1.map_last_update" + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v1.v1.V1_Transformer.map_last_update" ) @patch( - "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v1.v1.map_rechtsform" + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v1.v1.V1_Transformer.map_rechtsform" ) @patch( - "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v1.v1.map_capital" + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v1.v1.V1_Transformer.map_capital" ) @patch( - "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v1.v1.map_business_purpose" + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v1.v1.V1_Transformer.map_business_purpose" ) @patch( - "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v1.v1.map_founding_date" + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v1.v1.V1_Transformer.map_founding_date" ) @patch( - "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v1.v1.parse_stakeholder" + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v1.v1.V1_Transformer.parse_stakeholder" ) def test_map_unternehmensregister_json( # noqa: PLR0913 mock_map_parse_stakeholder: Mock, diff --git a/tests/utils/data_extraction/unternehmensregister/transform/v3_test.py b/tests/utils/data_extraction/unternehmensregister/transform/v3_test.py new file mode 100644 index 0000000..d23e048 --- /dev/null +++ b/tests/utils/data_extraction/unternehmensregister/transform/v3_test.py @@ -0,0 +1,731 @@ +"""Testing utils/data_extraction/unternehmensregister/transform.py.""" +from unittest.mock import Mock, patch + +from aki_prj23_transparenzregister.models.company import ( + Capital, + CapitalTypeEnum, + Company, + CompanyID, + CompanyRelationshipEnum, + CompanyToCompanyRelationship, + CompanyTypeEnum, + CurrencyEnum, + DistrictCourt, + Location, + PersonName, + PersonToCompanyRelationship, + RelationshipRoleEnum, +) +from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v3.v3 import ( + V3_Transformer, +) + +transform = V3_Transformer() + + +def test_parse_stakeholder_org_hidden_in_person() -> None: + data = { + "tns:beteiligter": { + "tns:auswahl_beteiligter": { + "tns:natuerlichePerson": { + "tns:vollerName": {"tns:nachname": '"Some Company KG'}, + "tns:anschrift": {"tns:ort": "Area 51"}, + } + } + }, + "tns:rolle": {"tns:rollenbezeichnung": {"code": "275"}}, + } + expected_result = CompanyToCompanyRelationship( + role=RelationshipRoleEnum.KOMMANDITIST, # type: ignore + name="Some Company KG", + type=CompanyRelationshipEnum.COMPANY, + location=Location(**{"city": "Area 51"}), + ) + assert transform.parse_stakeholder(data) == expected_result + + +def test_parse_stakeholder_person() -> None: + data = { + "tns:beteiligter": { + "tns:auswahl_beteiligter": { + "tns:natuerlichePerson": { + "tns:vollerName": { + "tns:vorname": "Stephen", + "tns:nachname": "King", + }, + "tns:anschrift": {"tns:ort": "Maine"}, + "tns:geburt": {"tns:geburtsdatum": "1947-09-21"}, + } + } + }, + "tns:rolle": {"tns:rollenbezeichnung": {"code": "269"}}, + } + expected_result = PersonToCompanyRelationship( + role=RelationshipRoleEnum.GESCHAEFTSLEITER, # type: ignore + date_of_birth="1947-09-21", + name=PersonName(**{"firstname": "Stephen", "lastname": "King"}), + type=CompanyRelationshipEnum.PERSON, + location=Location(**{"city": "Maine"}), + ) + assert transform.parse_stakeholder(data) == expected_result + + +def test_parse_stakeholder_person_missing_date_of_birth() -> None: + data = { + "tns:beteiligter": { + "tns:auswahl_beteiligter": { + "tns:natuerlichePerson": { + "tns:vollerName": { + "tns:vorname": "Stephen", + "tns:nachname": "King", + }, + "tns:anschrift": {"tns:ort": "Maine"}, + } + } + }, + "tns:rolle": {"tns:rollenbezeichnung": {"code": "269"}}, + } + expected_result = PersonToCompanyRelationship( + role=RelationshipRoleEnum.GESCHAEFTSLEITER, # type: ignore + date_of_birth=None, + name=PersonName(**{"firstname": "Stephen", "lastname": "King"}), + type=CompanyRelationshipEnum.PERSON, + location=Location(**{"city": "Maine"}), + ) + assert transform.parse_stakeholder(data) == expected_result + + +def test_parse_stakeholder_org() -> None: + data = { + "tns:beteiligter": { + "tns:auswahl_beteiligter": { + "tns:organisation": { + "tns:bezeichnung": { + "tns:bezeichnung.aktuell": "Transparenzregister kG" + }, + "tns:anschrift": { + "tns:ort": "Iserlohn", + "tns:strasse": "Hauptstrasse", + "tns:hausnummer": "42", + "tns:postleitzahl": "58636", + }, + } + } + }, + "tns:rolle": {"tns:rollenbezeichnung": {"code": "268"}}, + } + expected_result = CompanyToCompanyRelationship( + name="Transparenzregister kG", + role=RelationshipRoleEnum.DIREKTOR, # type: ignore + type=CompanyRelationshipEnum.COMPANY, + location=Location( + **{ + "city": "Iserlohn", + "zip_code": "58636", + "house_number": "42", + "street": "Hauptstrasse", + } + ), + ) + assert transform.parse_stakeholder(data) == expected_result + + +def test_parse_stakeholder_org_loc_from_sitz() -> None: + data = { + "tns:beteiligter": { + "tns:auswahl_beteiligter": { + "tns:organisation": { + "tns:bezeichnung": { + "tns:bezeichnung.aktuell": "Transparenzregister kG" + }, + "tns:sitz": { + "tns:ort": "Iserlohn", + "tns:strasse": "Hauptstrasse", + "tns:hausnummer": "42", + "tns:postleitzahl": "58636", + }, + } + } + }, + "tns:rolle": {"tns:rollenbezeichnung": {"code": "268"}}, + } + expected_result = CompanyToCompanyRelationship( + name="Transparenzregister kG", + role=RelationshipRoleEnum.DIREKTOR, # type: ignore + type=CompanyRelationshipEnum.COMPANY, + location=Location( + **{ + "city": "Iserlohn", + "zip_code": "58636", + "house_number": "42", + "street": "Hauptstrasse", + } + ), + ) + assert transform.parse_stakeholder(data) == expected_result + + +def test_parse_stakeholder_no_result() -> None: + data: dict = {"tns:beteiligter": {"tns:auswahl_beteiligter": {}}} # type: ignore + assert transform.parse_stakeholder(data) is None + + +def test_loc_from_beteiligung() -> None: + data = { + "tns:grunddaten": { + "tns:verfahrensdaten": { + "tns:beteiligung": [ + { + "tns:beteiligter": { + "tns:auswahl_beteiligter": { + "tns:organisation": { + "tns:anschrift": { + "tns:strasse": "Gewerbestraße", + "tns:hausnummer": "8", + "tns:postleitzahl": "72535", + "tns:ort": "Heroldstatt", + }, + }, + } + } + }, + ] + } + } + } + + expected_result = Location( + city="Heroldstatt", house_number="8", street="Gewerbestraße", zip_code="72535" + ) + assert transform.loc_from_beteiligung(data) == expected_result + + +def test_loc_from_beteiligung_number_contained_in_street() -> None: + data = { + "tns:grunddaten": { + "tns:verfahrensdaten": { + "tns:beteiligung": [ + { + "tns:beteiligter": { + "tns:auswahl_beteiligter": { + "tns:organisation": { + "tns:anschrift": { + "tns:strasse": "Gewerbestraße8", + "tns:postleitzahl": "72535", + "tns:ort": "Heroldstatt", + }, + }, + } + } + }, + ] + } + } + } + + expected_result = Location( + city="Heroldstatt", house_number="8", street="Gewerbestraße", zip_code="72535" + ) + assert transform.loc_from_beteiligung(data) == expected_result + + +def test_loc_from_beteiligung_no_result() -> None: + data = { + "tns:grunddaten": { + "tns:verfahrensdaten": { + "tns:beteiligung": [ + { + "tns:beteiligter": { + "tns:auswahl_beteiligter": { + "tns:organisation": { + "tns:anschrift": { + "tns:postleitzahl": "72535", + "tns:ort": "Heroldstatt", + }, + }, + } + } + }, + ] + } + } + } + + expected_result = Location( + city="Heroldstatt", house_number=None, street=None, zip_code="72535" + ) + assert transform.loc_from_beteiligung(data) == expected_result + + +def test_loc_from_beteiligung_combine() -> None: + data = { + "tns:grunddaten": { + "tns:verfahrensdaten": { + "tns:beteiligung": [ + { + "tns:beteiligter": { + "tns:auswahl_beteiligter": { + "tns:organisation": { + "tns:anschrift": { + "tns:postleitzahl": "72535", + "tns:strasse": "Pliangenserstr. 40", + "tns:hausnummer": "a", + "tns:ort": "Heroldstatt", + }, + }, + } + } + }, + ] + } + } + } + + expected_result = Location( + city="Heroldstatt", + house_number="40a", + street="Pliangenserstraße", + zip_code="72535", + ) + assert transform.loc_from_beteiligung(data) == expected_result + + +def test_name_from_beteiligung() -> None: + data = { + "tns:grunddaten": { + "tns:verfahrensdaten": { + "tns:beteiligung": [ + { + "tns:beteiligter": { + "tns:auswahl_beteiligter": { + "tns:organisation": { + "tns:bezeichnung": { + "tns:bezeichnung.aktuell": "1 A Autenrieth Kunststofftechnik GmbH & Co. KG" + }, + }, + } + }, + } + ] + } + } + } + + expected_result = "1 A Autenrieth Kunststofftechnik GmbH & Co. KG" + assert transform.name_from_beteiligung(data) == expected_result + + +def test_name_from_beteiligung_remove_quotes() -> None: + data = { + "tns:grunddaten": { + "tns:verfahrensdaten": { + "tns:beteiligung": [ + { + "tns:beteiligter": { + "tns:auswahl_beteiligter": { + "tns:organisation": { + "tns:bezeichnung": { + "tns:bezeichnung.aktuell": '"Siemes Verwaltungs-GmbH"' + }, + }, + } + }, + } + ] + } + } + } + + expected_result = "Siemes Verwaltungs-GmbH" + assert transform.name_from_beteiligung(data) == expected_result + + +def test_map_rechtsform() -> None: + data = { + "tns:fachdatenRegister": { + "tns:basisdatenRegister": { + "tns:rechtstraeger": { + "tns:angabenZurRechtsform": { + "tns:rechtsform": { + "code": "Gesellschaft mit beschränkter Haftung" + }, + } + }, + } + } + } + expected_result = CompanyTypeEnum.GMBH + assert transform.map_rechtsform("", data) == expected_result + + +def test_map_rechtsform_from_name() -> None: + data = [ + ("GEA Farm Technologies GmbH", "Gesellschaft mit beschränkter Haftung"), + ("Atos SE", "Europäische Aktiengesellschaft (SE)"), + ("Bilkenroth KG", "Kommanditgesellschaft"), + ("jfoiahfo8sah 98548902 öhz ö", None), + ] + + for company_name, expected_result in data: + assert transform.map_rechtsform(company_name, {}) == expected_result + + +def test_map_capital_kg_single() -> None: + capital = Capital( + currency=CurrencyEnum.EURO, value=69000, type=CapitalTypeEnum.HAFTEINLAGE # type: ignore + ) + data = { + "tns:fachdatenRegister": { + "tns:auswahl_zusatzangaben": { + "tns:personengesellschaft": { + "tns:zusatzKG": { + "tns:datenKommanditist": { + "tns:hafteinlage": { + "tns:zahl": str(capital.value), + "tns:waehrung": {"code": capital.currency}, + }, + } + } + } + } + } + } + + result = transform.map_capital(data, CompanyTypeEnum.KG) # type: ignore + assert result == capital + + +def test_map_capital_kg_sum() -> None: + capital = Capital( + currency=CurrencyEnum.EURO, value=20000, type=CapitalTypeEnum.HAFTEINLAGE # type: ignore + ) + data = { + "tns:fachdatenRegister": { + "tns:auswahl_zusatzangaben": { + "tns:personengesellschaft": { + "tns:zusatzKG": { + "tns:datenKommanditist": [ + { + "tns:hafteinlage": { + "tns:zahl": str(10000), + "tns:waehrung": {"code": capital.currency}, + } + }, + { + "tns:hafteinlage": { + "tns:zahl": str(10000), + "tns:waehrung": {"code": capital.currency}, + }, + }, + ] + } + } + } + } + } + + result = transform.map_capital(data, CompanyTypeEnum.KG) # type: ignore + assert result == capital + + +def test_map_capital_no_fachdaten() -> None: + data: dict = {"tns:fachdatenRegister": {}} + + result = transform.map_capital(data, CompanyTypeEnum.KG) # type: ignore + assert result is None + + +def test_map_capital_gmbh() -> None: + capital = Capital( + currency=CurrencyEnum.DEUTSCHE_MARK, value=42, type=CapitalTypeEnum.STAMMKAPITAL # type: ignore + ) + data = { + "tns:fachdatenRegister": { + "tns:auswahl_zusatzangaben": { + "tns:kapitalgesellschaft": { + "tns:zusatzGmbH": { + "tns:stammkapital": { + "tns:zahl": str(capital.value), + "tns:waehrung": {"code": capital.currency}, + }, + } + } + } + } + } + + result = transform.map_capital(data, CompanyTypeEnum.GMBH) # type: ignore + assert result == capital + + +def test_map_capital_ag() -> None: + capital = Capital( + currency=CurrencyEnum.DEUTSCHE_MARK, value=42, type=CapitalTypeEnum.GRUNDKAPITAL # type: ignore + ) + data = { + "tns:fachdatenRegister": { + "tns:auswahl_zusatzangaben": { + "tns:kapitalgesellschaft": { + "tns:zusatzAktiengesellschaft": { + "tns:grundkapital": { + "tns:hoehe": { + "tns:zahl": str(capital.value), + "tns:waehrung": {"code": capital.currency}, + } + }, + } + } + } + } + } + + result = transform.map_capital(data, CompanyTypeEnum.SE) # type: ignore + assert result == capital + + +def test_map_capital_personengesellschaft() -> None: + capital = Capital( + currency=CurrencyEnum.DEUTSCHE_MARK, value=42, type=CapitalTypeEnum.STAMMKAPITAL # type: ignore + ) + data = { + "tns:fachdatenRegister": { + "tns:auswahl_zusatzangaben": { + "tns:personengesellschaft": { + "tns:zusatzGmbH": { + "tns:stammkapital": { + "tns:zahl": str(capital.value), + "tns:waehrung": {"code": capital.currency}, + }, + } + } + } + } + } + + result = transform.map_capital(data, CompanyTypeEnum.OHG) # type: ignore + assert result == capital + + +def test_map_capital_einzelkaufmann() -> None: + capital = Capital( + currency=CurrencyEnum.DEUTSCHE_MARK, value=42, type=CapitalTypeEnum.STAMMKAPITAL # type: ignore + ) + data = { + "tns:fachdatenRegister": { + "tns:auswahl_zusatzangaben": { + "Personengesellschaft": { + "tns:zusatzGmbH": { + "tns:stammkapital": { + "tns:zahl": str(capital.value), + "tns:waehrung": {"code": capital.currency}, + }, + } + } + } + } + } + + result = transform.map_capital(data, CompanyTypeEnum.EINZELKAUFMANN) # type: ignore + assert result is None + + +def test_map_capital_partial_null_values() -> None: + capital = Capital( + currency=CurrencyEnum.DEUTSCHE_MARK, value=42, type=CapitalTypeEnum.STAMMKAPITAL # type: ignore + ) + data = { + "tns:fachdatenRegister": { + "tns:auswahl_zusatzangaben": { + "tns:personengesellschaft": { + "tns:zusatzGmbH": { + "tns:stammkapital": { + "tns:zahl": None, + "tns:waehrung": {"code": capital.currency}, + }, + } + } + } + } + } + + result = transform.map_capital(data, CompanyTypeEnum.OHG) # type: ignore + assert result is None + + +def test_map_business_purpose() -> None: + business_purpose = "Handel mit Betäubungsmitteln aller Art" + data = { + "tns:fachdatenRegister": { + "tns:basisdatenRegister": {"tns:gegenstand": business_purpose} + } + } + + result = transform.map_business_purpose(data) + assert result == business_purpose + + +def test_map_business_purpose_no_result() -> None: + data: dict = {} + + result = transform.map_business_purpose(data) + assert result is None + + +def test_map_founding_date_from_tag_der_ersten_eintragung() -> None: + data = { + "some entry": "Tag der ersten Eintragung: 01.05.2004", + "some other entry": "hfjdoöiashföahöf iodsazo8 5z4o fdsha8oü gfdsö", + } + expected_result = "2004-05-01" + result = transform.map_founding_date(data) + assert result == expected_result + + +def test_map_founding_date_from_gesellschaftsvertrag() -> None: + data = { + "some entry": "hfjdoöiashföahöf iodsazo8 5z4o fdsha8oü gfdsö", + "some other entry": "Das Wesen der Rekursion ist der Selbstaufruf Gesellschaftsvertrag vom 22.12.1996 Hallo Welt", + } + expected_result = "1996-12-22" + result = transform.map_founding_date(data) + assert result == expected_result + + +def test_map_founding_date_from_gruendungsdatum() -> None: + data = { + "tns:fachdatenRegister": { + "tns:basisdatenRegister": { + "tns:satzungsdatum": {"tns:aktuellesSatzungsdatum": "1998-01-01"} + } + } + } + expected_result = "1998-01-01" + result = transform.map_founding_date(data) + assert result == expected_result + + +def test_map_founding_date_no_result() -> None: + data: dict = {"tns:fachdatenRegister": {"tns:basisdatenRegister": {}}} + result = transform.map_founding_date(data) + assert result is None + + +def test_map_company_id() -> None: + district_court = DistrictCourt("Amtsgericht Ulm", "Ulm") + company_id = CompanyID(district_court, "HRA 4711") + data = { + "tns:grunddaten": { + "tns:verfahrensdaten": { + "tns:instanzdaten": { + "tns:aktenzeichen": { + "tns:auswahl_aktenzeichen": { + "tns:aktenzeichen.freitext": company_id.hr_number + } + }, + }, + "tns:beteiligung": [ + {}, + { + "tns:beteiligter": { + "tns:auswahl_beteiligter": { + "tns:organisation": { + "tns:bezeichnung": { + "tns:bezeichnung.aktuell": district_court.name + }, + "tns:anschrift": { + "tns:ort": district_court.city, + }, + } + } + }, + }, + ], + }, + }, + } + result = transform.map_company_id(data) + assert result == company_id + + +def test_map_last_update() -> None: + date = "2024-01-01" + data = {"tns:fachdatenRegister": {"tns:auszug": {"tns:letzteEintragung": date}}} + result = transform.map_last_update(data) + assert result == date + + +@patch( + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v3.v3.map_co_relation" +) +@patch( + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v3.v3.V3_Transformer.map_company_id" +) +@patch( + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v3.v3.V3_Transformer.name_from_beteiligung" +) +@patch( + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v3.v3.V3_Transformer.loc_from_beteiligung" +) +@patch( + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v3.v3.V3_Transformer.map_last_update" +) +@patch( + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v3.v3.V3_Transformer.map_rechtsform" +) +@patch( + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v3.v3.V3_Transformer.map_capital" +) +@patch( + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v3.v3.V3_Transformer.map_business_purpose" +) +@patch( + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v3.v3.V3_Transformer.map_founding_date" +) +@patch( + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v3.v3.V3_Transformer.parse_stakeholder" +) +def test_map_unternehmensregister_json( # noqa: PLR0913 + mock_map_parse_stakeholder: Mock, + mock_map_founding_date: Mock, + mock_map_business_purpose: Mock, + mock_map_capital: Mock, + mock_map_rechtsform: Mock, + mock_map_last_update: Mock, + mock_loc_from_beteiligung: Mock, + mock_map_name_from_beteiligung: Mock, + mock_map_company_id: Mock, + mock_map_co_relation: Mock, +) -> None: + expected_result = Company( + **{ # type: ignore + "id": Mock(), + "name": Mock(), + "location": Mock(), + "last_update": Mock(), + "company_type": Mock(), + "capital": Mock(), + "business_purpose": Mock(), + "founding_date": Mock(), + "relationships": [Mock()], + } + ) + + mock_map_company_id.return_value = expected_result.id + mock_map_name_from_beteiligung.return_value = expected_result.name + mock_loc_from_beteiligung.return_value = expected_result.location + mock_map_last_update.return_value = expected_result.last_update + mock_map_rechtsform.return_value = expected_result.company_type + mock_map_capital.return_value = expected_result.capital + mock_map_business_purpose.return_value = expected_result.business_purpose + mock_map_founding_date.return_value = expected_result.founding_date + mock_map_parse_stakeholder.return_value = expected_result.relationships[0] + mock_map_co_relation.side_effect = lambda x: x + + data: dict = { + "rootLayerWithSomeStuipStringNooneCaresAbout": { + "tns:grunddaten": {"tns:verfahrensdaten": {"tns:beteiligung": [{}, {}, {}]}} + } + } + + result = transform.map_unternehmensregister_json(data) + assert result == expected_result