diff --git a/.gitignore b/.gitignore index 38bc337..4e8f59e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ +# Data blobs +**/*.xml +**/*.json + # LaTeX temp files **/*.aux **/*-blx.bib diff --git a/src/aki_prj23_transparenzregister/apps/find_missing_companies.py b/src/aki_prj23_transparenzregister/apps/find_missing_companies.py new file mode 100644 index 0000000..29ec97d --- /dev/null +++ b/src/aki_prj23_transparenzregister/apps/find_missing_companies.py @@ -0,0 +1,145 @@ +"""Retrieve missing companies from unternehmensregister.""" +import argparse +import dataclasses +import glob +import json +import multiprocessing +import os +import sys +import tempfile + +from loguru import logger +from tqdm import tqdm + +from aki_prj23_transparenzregister.config.config_providers import ( + ConfigProvider, + get_config_provider, +) +from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister import ( + extract, + load, +) +from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform import ( + main as transform, +) +from aki_prj23_transparenzregister.utils.logger_config import ( + add_logger_options_to_argparse, + configer_logger, +) +from aki_prj23_transparenzregister.utils.mongo.company_mongo_service import ( + CompanyMongoService, +) +from aki_prj23_transparenzregister.utils.mongo.connector import MongoConnector +from aki_prj23_transparenzregister.utils.sql import connector, entities + + +def work(company_name: str, config_provider: ConfigProvider) -> None: + """Main method. + + Args: + company_name (str): Name of the company to search for + config_provider (ConfigProvider): ConfigProvider + """ + with tempfile.TemporaryDirectory() as tmp_dir: + xml_dir = os.path.join(*[tmp_dir, "xml"]) + os.makedirs(xml_dir, exist_ok=True) + try: + extract.scrape(company_name, xml_dir, True, True) # type: ignore + except Exception as e: + logger.error(e) + return + output_path = os.path.join(*[tmp_dir, "transformed"]) + os.makedirs(output_path, exist_ok=True) + json_dir = os.path.join(*[tmp_dir, "json"]) + os.makedirs(json_dir, exist_ok=True) + transform.transform_xml_to_json( + xml_dir, + json_dir, + ) + + for file in tqdm(glob.glob1(json_dir, "*.json")): + try: + path = os.path.join(json_dir, file) + with open(path, encoding="utf-8") as file_object: + company_mapped = transform.map_unternehmensregister_json( + json.loads(file_object.read()) + ) + + name = "".join(e for e in company_mapped.name if e.isalnum())[:50] + + with open( + os.path.join(output_path, f"{name}.json"), + "w+", + encoding="utf-8", + ) as export_file: + json.dump( + dataclasses.asdict(company_mapped), + export_file, + ensure_ascii=False, + ) + except Exception as e: + logger.error(e) + return + mongo_connector = MongoConnector(config_provider.get_mongo_connection_string()) + company_mongo_service = CompanyMongoService(mongo_connector) + num_processed = load.load_directory_to_mongo(output_path, company_mongo_service) + mongo_connector.client.close() + + try: + if num_processed > 0: + with connector.get_session(config_provider) as session: + company = ( + session.query(entities.MissingCompany) # type: ignore + .where(entities.MissingCompany.name == company_name) + .first() + ) + company.searched_for = True # type: ignore + session.commit() + logger.info(f"Processed {company_name}") + except Exception as e: + logger.error(e) + return + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + prog="Transparenzregister Webserver", + description="Starts an Dash Webserver that shows our Analysis.", + epilog="Example: webserver --log-level ERROR --log-path print.log", + ) + parser.add_argument( + "config", + metavar="config", + default="ENV", + ) + add_logger_options_to_argparse(parser) + + parsed = parser.parse_args(sys.argv[1:]) + configer_logger(namespace=parsed) + config = parsed.config + config_provider = get_config_provider(config) + session = connector.get_session(config_provider) + + company_mongo_service = CompanyMongoService( + MongoConnector(config_provider.get_mongo_connection_string()) + ) + + missing_companies = ( + session.query(entities.MissingCompany) + .where(entities.MissingCompany.searched_for == False) # noqa + .all() + ) + + batch_size = 5 + pool = multiprocessing.Pool(processes=batch_size) + # Scrape data from unternehmensregister + params = [(company.name, config_provider) for company in missing_companies] + # Map the process_handler function to the parameter list using the Pool + pool.starmap(work, params) + + # Close the Pool to prevent any more tasks from being submitted + pool.close() + + # Wait for all the processes to complete + pool.join() + # for company in tqdm(missing_companies): diff --git a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/extract.py b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/extract.py index c37b260..73f3d44 100644 --- a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/extract.py +++ b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/extract.py @@ -3,7 +3,6 @@ import glob import multiprocessing import os -from pathlib import Path from loguru import logger from selenium import webdriver @@ -13,14 +12,22 @@ from selenium.webdriver.support.ui import WebDriverWait from tqdm import tqdm -def scrape(query: str, download_dir: list[str]) -> None: +def scrape( + query: str, + download_dir: str, + full_match: bool = False, + early_stopping: bool = False, +) -> None: """Fetch results from Unternehmensregister for given query. Args: query (str): Search Query (RegEx supported) download_dir (list[str]): Directory to place output files in + full_match (bool, optional): Only scrape first result. Defaults to False. + early_stopping (bool, optional): Stop scraping after first page. Defaults to False. """ - download_path = os.path.join(str(Path.cwd()), *download_dir) + # download_path = os.path.join(str(Path.cwd()), *download_dir) + download_path = download_dir options = webdriver.ChromeOptions() preferences = { "profile.default_content_settings.popups": 0, @@ -34,6 +41,7 @@ def scrape(query: str, download_dir: list[str]) -> None: } options.add_argument("--headless=new") options.add_experimental_option("prefs", preferences) + options.add_experimental_option("excludeSwitches", ["enable-logging"]) driver = webdriver.Chrome(options=options) @@ -73,7 +81,9 @@ def scrape(query: str, download_dir: list[str]) -> None: ] for index, company_link in enumerate(companies_tab): company_name = company_names[index] - if company_name in processed_companies: + if company_name in processed_companies or ( + full_match is True and company_name != query + ): continue # Go to intermediary page company_link.click() @@ -107,7 +117,7 @@ def scrape(query: str, download_dir: list[str]) -> None: try: wait.until( - lambda: wait_for_download_condition(download_path, num_files) # type: ignore + lambda x: wait_for_download_condition(download_path, num_files) # type: ignore ) file_name = "".join(e for e in company_name if e.isalnum()) + ".xml" rename_latest_file( @@ -120,6 +130,10 @@ def scrape(query: str, download_dir: list[str]) -> None: finally: for _ in range(6): driver.back() + if company_name == query and full_match is True: + break # noqa: B012 + if early_stopping is True: + break driver.find_element(By.XPATH, '//*[@class="fas fa-angle-right"]').click() driver.close() diff --git a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/load.py b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/load.py index 621b723..f4f317e 100644 --- a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/load.py +++ b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/load.py @@ -14,17 +14,36 @@ from aki_prj23_transparenzregister.utils.mongo.connector import ( MongoConnector, ) + +def load_directory_to_mongo(base_path: str, service: CompanyMongoService) -> int: + """Load all json files in a directory to MongoDB company collection. + + Args: + base_path (str): Directory to scan + service (CompanyMongoService): MongoDB service + + Returns: + int: Number of processed files + """ + num_processed = 0 + for file in tqdm(glob.glob1(base_path, "*.json")): + path = os.path.join(base_path, file) + with open(path, encoding="utf-8") as file_object: + data = json.loads(file_object.read()) + company: Company = Company(**data) + + service.migrations_of_base_data(company) + num_processed += 1 + return num_processed + + if __name__ == "__main__": provider = JsonFileConfigProvider("secrets.json") conn_string = provider.get_mongo_connection_string() connector = MongoConnector(conn_string) service = CompanyMongoService(connector) - base_path = "./Jupyter/API-tests/Unternehmensregister/data/Unternehmensregister" - for file in tqdm(glob.glob1(f"{base_path}/transformed", "*.json")): - path = os.path.join(f"{base_path}/transformed", file) - with open(path, encoding="utf-8") as file_object: - data = json.loads(file_object.read()) - company: Company = Company(**data) - - service.migrations_of_base_data(company) + load_directory_to_mongo( + "./Jupyter/API-tests/Unternehmensregister/data/Unternehmensregister/transformed", + service, + ) diff --git a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py deleted file mode 100644 index 82a8028..0000000 --- a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py +++ /dev/null @@ -1,590 +0,0 @@ -"""Transform raw Unternehmensregister export (*.xml) to processed .json files for loading.""" -import dataclasses -import glob -import json -import os -import re -import sys - -import xmltodict -from tqdm import tqdm - -from aki_prj23_transparenzregister.models.company import ( - Capital, - CapitalTypeEnum, - Company, - CompanyID, - CompanyRelationship, - CompanyRelationshipEnum, - CompanyToCompanyRelationship, - CompanyTypeEnum, - CurrencyEnum, - DistrictCourt, - Location, - PersonName, - PersonToCompanyRelationship, - RelationshipRoleEnum, -) -from aki_prj23_transparenzregister.utils.string_tools import ( - remove_traling_and_leading_quotes, - transform_date_to_iso, -) - - -def transform_xml_to_json(source_dir: str, target_dir: str) -> None: - """Convert all xml files in a directory to json files. - - Args: - source_dir (str): Directory hosting the xml files - target_dir (str): Target directory to move json files to - """ - for source_path in [ - os.path.normpath(i) for i in glob.glob(source_dir + "**/*.xml", recursive=True) - ]: - target_path = os.path.join( - target_dir, source_path.split(os.sep)[-1].replace(".xml", ".json") - ) - - with open(source_path, encoding="utf-8") as source_file: - # deepcode ignore HandleUnicode: Weird XML format no other solution - data = xmltodict.parse(source_file.read().encode()) - with open(target_path, "w", encoding="utf-8") as json_file: - json_file.write(json.dumps(data)) - - -def parse_date_of_birth(data: dict) -> str | None: - """Retreives the date of birth from a stakeholder entry if possible. - - Args: - data (dict): Stakeholder data - - Returns: - str | None: date of birth or None if not found - """ - if "Geburt" in (base := data["Beteiligter"]["Natuerliche_Person"]): - base = base["Geburt"]["Geburtsdatum"] - if isinstance(base, str): - return base - return None - - -def parse_stakeholder(data: dict) -> CompanyRelationship | None: - """Extract the company stakeholder/relation from a single "Beteiligung". - - Args: - data (dict): Data export - - Returns: - CompanyRelationship | None: Relationship if it could be processed - """ - if "Natuerliche_Person" in data["Beteiligter"]: - # It's a Company serving as a "Kommanditist" or similar - if data["Beteiligter"]["Natuerliche_Person"]["Voller_Name"]["Vorname"] is None: - return CompanyToCompanyRelationship( - **{ # type: ignore - "name": remove_traling_and_leading_quotes( - data["Beteiligter"]["Natuerliche_Person"]["Voller_Name"][ - "Nachname" - ] - ), - "location": Location( - **{ - "city": data["Beteiligter"]["Natuerliche_Person"][ - "Anschrift" - ][-1]["Ort"] - if isinstance( - data["Beteiligter"]["Natuerliche_Person"]["Anschrift"], - list, - ) - else data["Beteiligter"]["Natuerliche_Person"]["Anschrift"][ - "Ort" - ] - } - ), - "role": RelationshipRoleEnum( - data["Rolle"]["Rollenbezeichnung"]["content"] - ), - "type": CompanyRelationshipEnum.COMPANY, - } - ) - return PersonToCompanyRelationship( - **{ # type: ignore - "name": PersonName( - **{ - "firstname": data["Beteiligter"]["Natuerliche_Person"][ - "Voller_Name" - ]["Vorname"], - "lastname": data["Beteiligter"]["Natuerliche_Person"][ - "Voller_Name" - ]["Nachname"], - } - ), - "date_of_birth": parse_date_of_birth(data), - "location": Location( - **{ - "city": data["Beteiligter"]["Natuerliche_Person"]["Anschrift"][ - -1 - ]["Ort"] - if isinstance( - data["Beteiligter"]["Natuerliche_Person"]["Anschrift"], list - ) - else data["Beteiligter"]["Natuerliche_Person"]["Anschrift"][ - "Ort" - ] - } - ), - "role": RelationshipRoleEnum( - data["Rolle"]["Rollenbezeichnung"]["content"] - ), - "type": CompanyRelationshipEnum.PERSON, - } - ) - if "Organisation" in data["Beteiligter"]: - return CompanyToCompanyRelationship( - **{ # type: ignore - "role": RelationshipRoleEnum( - data["Rolle"]["Rollenbezeichnung"]["content"] - ), - "name": remove_traling_and_leading_quotes( - data["Beteiligter"]["Organisation"]["Bezeichnung"][ - "Bezeichnung_Aktuell" - ] - ), - "location": Location( - **{ - "city": data["Beteiligter"]["Organisation"]["Anschrift"]["Ort"], - "street": data["Beteiligter"]["Organisation"]["Anschrift"][ - "Strasse" - ] - if "Strasse" in data["Beteiligter"]["Organisation"]["Anschrift"] - else None, - "house_number": data["Beteiligter"]["Organisation"][ - "Anschrift" - ]["Hausnummer"] - if "Hausnummer" - in data["Beteiligter"]["Organisation"]["Anschrift"] - else None, - "zip_code": data["Beteiligter"]["Organisation"]["Anschrift"][ - "Postleitzahl" - ] - if "Postleitzahl" - in data["Beteiligter"]["Organisation"]["Anschrift"] - else None, - } - ), - "type": CompanyRelationshipEnum.COMPANY, - } - ) - return None - - -def normalize_street(street: str) -> str: - """Normalize street names by extending them to `Straße` or `straße`. - - Args: - street (str): Name of street - - Returns: - str: Normalized street name - """ - if street is None: - return None - regex = r"(Str\.|Strasse)" - street = re.sub(regex, "Straße", street) - regex = r"(str\.|strasse)" - street = re.sub(regex, "straße", street) - return street.strip() - - -def loc_from_beteiligung(data: dict) -> Location: - """Extract the company location from the first relationship in the export. - - Args: - data (dict): Data export - - Returns: - Location: location - """ - base = data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"]["Beteiligung"][0][ - "Beteiligter" - ]["Organisation"]["Anschrift"] - - house_number = None - street = None - if "Strasse" in base: - regex = r".(\d+)$" - hits = re.findall(regex, base["Strasse"]) - if len(hits) == 1: - house_number = hits[0] - street = base["Strasse"][: (-1 * len(house_number))] - if "Hausnummer" in base: - house_number = house_number + base["Hausnummer"] - else: - if "Hausnummer" in base: - house_number = base["Hausnummer"] - street = base["Strasse"] - return Location( - **{ - "city": base["Ort"], - "zip_code": base["Postleitzahl"], - "street": normalize_street(street), # type: ignore - "house_number": house_number, - } - ) - - -def name_from_beteiligung(data: dict) -> str: - """Extract the Company name from an Unternehmensregister export by using the first relationship found. - - Args: - data (dict): Data export - - Returns: - str: Company name - """ - name = data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"]["Beteiligung"][0][ - "Beteiligter" - ]["Organisation"]["Bezeichnung"]["Bezeichnung_Aktuell"] - return remove_traling_and_leading_quotes(name) - - -def map_rechtsform(company_name: str, data: dict) -> CompanyTypeEnum | None: - """Extracts the company type from a given Unternehmensregister export. - - Args: - company_name (str): Name of the company as a fallback solution - data (dict): Data export - - Returns: - CompanyTypeEnum | None: Company type if found - """ - try: - return CompanyTypeEnum( - data["XJustiz_Daten"]["Fachdaten_Register"]["Basisdaten_Register"][ - "Rechtstraeger" - ]["Rechtsform"]["content"] - ) - except KeyError: - if ( - company_name.endswith("GmbH") - or company_name.endswith("UG") - or company_name.endswith("UG (haftungsbeschränkt)") - ): - return CompanyTypeEnum("Gesellschaft mit beschränkter Haftung") - if company_name.endswith("SE"): - return CompanyTypeEnum("Europäische Aktiengesellschaft (SE)") - if company_name.endswith("KG"): - return CompanyTypeEnum("Kommanditgesellschaft") - return None - - -def map_capital(data: dict, company_type: CompanyTypeEnum) -> Capital | None: - """Extracts the company capital from the given Unternehmensregister export. - - Args: - data (dict): Data export - company_type (CompanyTypeEnum): Type of company (e.g., 'Gesellschaft mit beschränkter Haftung') - - Returns: - Capital | None: Company Capital if found - """ - # Early return - if "Zusatzangaben" not in data["XJustiz_Daten"]["Fachdaten_Register"]: - return None - capital: dict = {"Zahl": 0.0, "Waehrung": ""} - if company_type == CompanyTypeEnum.KG: - capital_type = "Hafteinlage" - base = data["XJustiz_Daten"]["Fachdaten_Register"]["Zusatzangaben"][ - "Personengesellschaft" - ]["Zusatz_KG"]["Daten_Kommanditist"] - if isinstance(base, list): - for entry in base: - # TODO link to persons using Ref_Rollennummer then extract ["Hafteinlage"] as below - capital["Zahl"] = capital["Zahl"] + float(entry["Hafteinlage"]["Zahl"]) - capital["Waehrung"] = entry["Hafteinlage"]["Waehrung"] - elif isinstance(base, dict): - capital = base["Hafteinlage"] - elif company_type in [ - CompanyTypeEnum.GMBH, - CompanyTypeEnum.SE, - CompanyTypeEnum.AG, - CompanyTypeEnum.KGaA, - CompanyTypeEnum.AUSLAENDISCHE_RECHTSFORM, - CompanyTypeEnum.OHG, - ]: - if ( - "Kapitalgesellschaft" - not in data["XJustiz_Daten"]["Fachdaten_Register"]["Zusatzangaben"] - ): - base = data["XJustiz_Daten"]["Fachdaten_Register"]["Zusatzangaben"][ - "Personengesellschaft" - ] - else: - base = data["XJustiz_Daten"]["Fachdaten_Register"]["Zusatzangaben"][ - "Kapitalgesellschaft" - ] - if "Zusatz_GmbH" in base: - capital_type = "Stammkapital" - capital = base["Zusatz_GmbH"]["Stammkapital"] - elif "Zusatz_Aktiengesellschaft" in base: - capital_type = "Grundkapital" - capital = base["Zusatz_Aktiengesellschaft"]["Grundkapital"]["Hoehe"] - elif company_type in [ - CompanyTypeEnum.EINZELKAUFMANN, - CompanyTypeEnum.EG, - CompanyTypeEnum.PARTNERSCHAFT, - CompanyTypeEnum.PARTNERGESELLSCHAFT, - CompanyTypeEnum.PARTNERSCHAFTSGESELLSCHAFT, - None, - ]: - return None - # Catch entries having the dict but with null values - if not all(capital.values()): - return None - return Capital( - **{ # type: ignore - "value": float(capital["Zahl"]), - "currency": CurrencyEnum(capital["Waehrung"]), - "type": CapitalTypeEnum(capital_type), - } - ) - - -def map_business_purpose(data: dict) -> str | None: - """Extracts the "Geschäftszweck" from a given Unternehmensregister export. - - Args: - data (dict): Data export - - Returns: - str | None: Business purpose if found - """ - try: - return data["XJustiz_Daten"]["Fachdaten_Register"]["Basisdaten_Register"][ - "Gegenstand_oder_Geschaeftszweck" - ] - except KeyError: - return None - - -def extract_date_from_string(value: str) -> str | None: - """Extract a date in ISO format from the given string if possible. - - Args: - value (str): Input text - - Returns: - str | None: Date in ISO format, None if not found - """ - date_regex = [ # type: ignore - {"regex": r"\d{1,2}\.\d{1,2}\.\d{4}", "mapper": transform_date_to_iso}, - {"regex": r"\d{4}-\d{1,2}-\d{1,2}", "mapper": None}, - ] - results = [] - for regex in date_regex: - result = re.findall(regex["regex"], value) # type: ignore - if len(result) == 1: - relevant_data = result[0] - if regex["mapper"] is not None: # type: ignore - results.append(regex["mapper"](relevant_data)) # type: ignore - else: - results.append(relevant_data) - if len(results) != 1: - return None - return results[0] - - -def map_founding_date(data: dict) -> str | None: - """Extracts the founding date from a given Unternehmensregister export. - - Args: - data (dict): Data export - - Returns: - str | None: Founding date if found - """ - text = str(data) - entry_date = re.findall( - r".Tag der ersten Eintragung:(\\n| )?(\d{1,2}\.\d{1,2}\.\d{2,4})", text - ) - if len(entry_date) == 1: - return transform_date_to_iso(entry_date[0][1]) - - entry_date = re.findall( - r".Gesellschaftsvertrag vom (\d{1,2}\.\d{1,2}\.\d{2,4})", text - ) - if len(entry_date) == 1: - return transform_date_to_iso(entry_date[0]) - if ( - "Gruendungsmetadaten" - in data["XJustiz_Daten"]["Fachdaten_Register"]["Basisdaten_Register"] - ): - return extract_date_from_string( - data["XJustiz_Daten"]["Fachdaten_Register"]["Basisdaten_Register"][ - "Gruendungsmetadaten" - ]["Gruendungsdatum"] - ) - # No reliable answer - return None - - -def map_company_id(data: dict) -> CompanyID: - """Retrieve Company ID from export. - - Args: - data (dict): Data export - - Returns: - CompanyID: ID of the company - """ - return CompanyID( - **{ - "hr_number": data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"][ - "Instanzdaten" - ]["Aktenzeichen"], - "district_court": DistrictCourt( - **{ - "name": data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"][ - "Beteiligung" - ][1]["Beteiligter"]["Organisation"]["Bezeichnung"][ - "Bezeichnung_Aktuell" - ] - if "Organisation" - in data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"][ - "Beteiligung" - ][1]["Beteiligter"] - else data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"][ - "Beteiligung" - ][1]["Beteiligter"]["Natuerliche_Person"]["Voller_Name"][ - "Nachname" - ], - "city": data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"][ - "Beteiligung" - ][1]["Beteiligter"]["Organisation"]["Sitz"]["Ort"] - if "Organisation" - in data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"][ - "Beteiligung" - ][1]["Beteiligter"] - else data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"][ - "Beteiligung" - ][1]["Beteiligter"]["Natuerliche_Person"]["Anschrift"]["Ort"], - } - ), - } - ) - - -def map_last_update(data: dict) -> str: - """Extract last update date from export. - - Args: - data (dict): Unternehmensregister export - - Returns: - str: Last update date - """ - return data["XJustiz_Daten"]["Fachdaten_Register"]["Auszug"]["letzte_Eintragung"] - - -def map_co_relation(data: dict) -> dict: - """Search for and map the c/o relation from location.street if possible. - - Args: - data (dict): Company dict - - Returns: - dict: Modified Company dict - """ - street = data["location"].street - if street is None: - return data - parts = street.split(",") - co_company = None - co_company_index = None - for index, part in enumerate(parts): - trimmed_part = part.strip() - result = re.findall(r"^c\/o(.*)$", trimmed_part) - if len(result) == 1: - co_company = result[0].strip() - co_company_index = index - if co_company_index is not None: - del parts[co_company_index] - street = "".join(parts).strip() - data["location"].street = street - - if co_company is not None and co_company != "": - relation = CompanyToCompanyRelationship( - RelationshipRoleEnum.CARE_OF, # type: ignore - Location( - data["location"].city, - street, - data["location"].house_number, - data["location"].zip_code, - ), - CompanyRelationshipEnum.COMPANY, # type: ignore - co_company, - ) - data["relationships"].append(relation) - return data - - -def map_unternehmensregister_json(data: dict) -> Company: - """Processes the Unternehmensregister structured export to a Company by using several helper methods. - - Args: - data (dict): Data export - - Returns: - Company: Transformed data - """ - result: dict = {"relationships": []} - - # TODO Refactor mapping - this is a nightmare... - result["id"] = map_company_id(data) - result["name"] = name_from_beteiligung(data) - - result["location"] = loc_from_beteiligung(data) - result["last_update"] = map_last_update(data) - - result["company_type"] = map_rechtsform(result["name"], data) - result["capital"] = map_capital(data, result["company_type"]) - result["business_purpose"] = map_business_purpose(data) - result["founding_date"] = map_founding_date(data) - - for i in range( - 2, len(data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"]["Beteiligung"]) - ): - people = parse_stakeholder( - data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"]["Beteiligung"][i] - ) - result["relationships"].append(people) - result = map_co_relation(result) - return Company(**result) - - -if __name__ == "__main__": - from loguru import logger - - base_path = "./Jupyter/API-tests/Unternehmensregister/data/Unternehmensregister" - for file in tqdm(glob.glob1(f"{base_path}/export", "*.json")): - path = os.path.join(f"{base_path}/export", file) - with open(path, encoding="utf-8") as file_object: - try: - company: Company = map_unternehmensregister_json( - json.loads(file_object.read()) - ) - - name = "".join(e for e in company.name if e.isalnum())[:50] - - with open( - f"{base_path}/transformed/{name}.json", - "w+", - encoding="utf-8", - ) as export_file: - json.dump( - dataclasses.asdict(company), export_file, ensure_ascii=False - ) - except Exception as e: - logger.error(e) - logger.error(f"Error in processing {path}") - sys.exit(1) diff --git a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/__init__.py b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/__init__.py new file mode 100644 index 0000000..e6ede79 --- /dev/null +++ b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/__init__.py @@ -0,0 +1 @@ +"""Transform Unternehmensregister data to Transparenzregister API.""" diff --git a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/common.py b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/common.py new file mode 100644 index 0000000..8a75843 --- /dev/null +++ b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/common.py @@ -0,0 +1,256 @@ +"""Common functions for data transformation.""" +import abc +import re +import typing +from collections.abc import Sequence + +from aki_prj23_transparenzregister.models.company import ( + Capital, + Company, + CompanyID, + CompanyRelationship, + CompanyRelationshipEnum, + CompanyToCompanyRelationship, + CompanyTypeEnum, + Location, + RelationshipRoleEnum, +) +from aki_prj23_transparenzregister.utils.string_tools import ( + transform_date_to_iso, +) + + +def traversal(data: dict, path: Sequence[str | int | object]) -> typing.Any: + """Traverse a dict using list of keys. + + Args: + data (dict): Data export + path (Sequence[str | int | object]): List of keys + + Raises: + KeyError: If key not found + + Returns: + any: Value at the end of the path + """ + current = data + for key in path: + try: + current = current[key] + except KeyError as e: + raise KeyError(f"Key {key} not found") from e + return current + + +def normalize_street(street: str) -> str: + """Normalize street names by extending them to `Straße` or `straße`. + + Args: + street (str): Name of street + + Returns: + str: Normalized street name + """ + if street is None: + return None + regex = r"(Str\.|Strasse)" + street = re.sub(regex, "Straße", street) + regex = r"(str\.|strasse)" + street = re.sub(regex, "straße", street) + return street.strip() + + +def extract_date_from_string(value: str) -> str | None: + """Extract a date in ISO format from the given string if possible. + + Args: + value (str): Input text + + Returns: + str | None: Date in ISO format, None if not found + """ + date_regex = [ # type: ignore + {"regex": r"\d{1,2}\.\d{1,2}\.\d{4}", "mapper": transform_date_to_iso}, + {"regex": r"\d{4}-\d{1,2}-\d{1,2}", "mapper": None}, + ] + results = [] + for regex in date_regex: + result = re.findall(regex["regex"], value) # type: ignore + if len(result) == 1: + relevant_data = result[0] + if regex["mapper"] is not None: # type: ignore + results.append(regex["mapper"](relevant_data)) # type: ignore + else: + results.append(relevant_data) + if len(results) != 1: + return None + return results[0] + + +def map_co_relation(data: dict) -> dict: + """Search for and map the c/o relation from location.street if possible. + + Args: + data (dict): Company dict + + Returns: + dict: Modified Company dict + """ + street = data["location"].street + if street is None: + return data + parts = street.split(",") + co_company = None + co_company_index = None + for index, part in enumerate(parts): + trimmed_part = part.strip() + result = re.findall(r"^c\/o(.*)$", trimmed_part) + if len(result) == 1: + co_company = result[0].strip() + co_company_index = index + if co_company_index is not None: + del parts[co_company_index] + street = "".join(parts).strip() + data["location"].street = street + + if co_company is not None and co_company != "": + relation = CompanyToCompanyRelationship( + RelationshipRoleEnum.CARE_OF, # type: ignore + Location( + data["location"].city, + street, + data["location"].house_number, + data["location"].zip_code, + ), + CompanyRelationshipEnum.COMPANY, # type: ignore + co_company, + ) + data["relationships"].append(relation) + return data + + +class BaseTransformer(metaclass=abc.ABCMeta): + """Generic abstract class for data transformation between Unternehmensregister and Transparenzregister API.""" + + @abc.abstractmethod + def parse_date_of_birth(self, data: dict) -> str | None: + """Retreives the date of birth from a stakeholder entry if possible. + + Args: + data (dict): Stakeholder data + + Returns: + str | None: date of birth or None if not found + """ + + @abc.abstractmethod + def parse_stakeholder(self, data: dict) -> CompanyRelationship | None: + """Extract the company stakeholder/relation from a single "Beteiligung". + + Args: + data (dict): Data export + + Returns: + CompanyRelationship | None: Relationship if it could be processed + """ + + @abc.abstractmethod + def loc_from_beteiligung(self, data: dict) -> Location: + """Extract the company location from the first relationship in the export. + + Args: + data (dict): Data export + + Returns: + Location: location + """ + + @abc.abstractmethod + def name_from_beteiligung(self, data: dict) -> str: + """Extract the Company name from an Unternehmensregister export by using the first relationship found. + + Args: + data (dict): Data export + + Returns: + str: Company name + """ + + @abc.abstractmethod + def map_rechtsform(self, company_name: str, data: dict) -> CompanyTypeEnum | None: + """Extracts the company type from a given Unternehmensregister export. + + Args: + company_name (str): Name of the company as a fallback solution + data (dict): Data export + + Returns: + CompanyTypeEnum | None: Company type if found + """ + + @abc.abstractmethod + def map_capital(self, data: dict, company_type: CompanyTypeEnum) -> Capital | None: + """Extracts the company capital from the given Unternehmensregister export. + + Args: + data (dict): Data export + company_type (CompanyTypeEnum): Type of company (e.g., 'Gesellschaft mit beschränkter Haftung') + + Returns: + Capital | None: Company Capital if found + """ + + @abc.abstractmethod + def map_business_purpose(self, data: dict) -> str | None: + """Extracts the "Geschäftszweck" from a given Unternehmensregister export. + + Args: + data (dict): Data export + + Returns: + str | None: Business purpose if found + """ + + @abc.abstractmethod + def map_founding_date(self, data: dict) -> str | None: + """Extracts the founding date from a given Unternehmensregister export. + + Args: + data (dict): Data export + + Returns: + str | None: Founding date if found + """ + + @abc.abstractmethod + def map_company_id(self, data: dict) -> CompanyID: + """Retrieve Company ID from export. + + Args: + data (dict): Data export + + Returns: + CompanyID: ID of the company + """ + + @abc.abstractmethod + def map_last_update(self, data: dict) -> str: + """Extract last update date from export. + + Args: + data (dict): Unternehmensregister export + + Returns: + str: Last update date + """ + + @abc.abstractmethod + def map_unternehmensregister_json(self, data: dict) -> Company: + """Processes the Unternehmensregister structured export to a Company by using several helper methods. + + Args: + data (dict): Data export + + Returns: + Company: Transformed data + """ diff --git a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/main.py b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/main.py new file mode 100644 index 0000000..dc57093 --- /dev/null +++ b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/main.py @@ -0,0 +1,102 @@ +"""Transform raw Unternehmensregister export (*.xml) to processed .json files for loading.""" +import dataclasses +import glob +import json +import os +import sys + +import xmltodict +from loguru import logger +from tqdm import tqdm + +from aki_prj23_transparenzregister.models.company import Company +from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.common import ( + BaseTransformer, +) +from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v1 import ( + v1, +) +from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v3 import ( + v3, +) + + +def transform_xml_to_json(source_dir: str, target_dir: str) -> None: + """Convert all xml files in a directory to json files. + + Args: + source_dir (str): Directory hosting the xml files + target_dir (str): Target directory to move json files to + """ + for source_path in [ + os.path.normpath(i) for i in glob.glob(source_dir + "**/*.xml", recursive=True) + ]: + target_path = os.path.join( + target_dir, source_path.split(os.sep)[-1].replace(".xml", ".json") + ) + try: + with open(source_path, encoding="utf-8") as source_file: + # deepcode ignore HandleUnicode: Weird XML format no other solution + data = xmltodict.parse(source_file.read().encode()) + with open(target_path, "w", encoding="utf-8") as json_file: + json_file.write(json.dumps(data)) + except Exception as e: + logger.error(e) + + +def determine_version(data: dict) -> BaseTransformer: + """Determine Unternehmensregister data API version of given entry. + + Args: + data (dict): Unternehmensregister data + + Raises: + ValueError: If version could not be determined + + Returns: + module: Version module + """ + if "XJustiz_Daten" in data: + return v1.V1_Transformer() + if "tns:nachrichtenkopf" in data[list(data.keys())[0]]: + return v3.V3_Transformer() + raise ValueError("Could not determine Unternehmensregister version.") + + +def map_unternehmensregister_json(data: dict) -> Company: + """Processes the Unternehmensregister structured export to a Company by using several helper methods. + + Args: + data (dict): Data export + + Returns: + Company: Transformed data + """ + version = determine_version(data) + return version.map_unternehmensregister_json(data) + + +if __name__ == "__main__": + base_path = "./Jupyter/API-tests/Unternehmensregister/data/Unternehmensregister" + for file in tqdm(glob.glob1(f"{base_path}/export", "*.json")): + path = os.path.join(f"{base_path}/export", file) + with open(path, encoding="utf-8") as file_object: + try: + data = json.loads(file_object.read()) + transformer: BaseTransformer = determine_version(data) + company: Company = transformer.map_unternehmensregister_json(data) + + name = "".join(e for e in company.name if e.isalnum())[:50] + + with open( + f"{base_path}/transformed/{name}.json", + "w+", + encoding="utf-8", + ) as export_file: + json.dump( + dataclasses.asdict(company), export_file, ensure_ascii=False + ) + except Exception as e: + logger.error(e) + logger.error(f"Error in processing {path}") + sys.exit(1) diff --git a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/v1/__init__.py b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/v1/__init__.py new file mode 100644 index 0000000..a172906 --- /dev/null +++ b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/v1/__init__.py @@ -0,0 +1 @@ +"""Module for transforming Unternehmensregister data from v1 to Transparenzregister API data model.""" diff --git a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/v1/v1.py b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/v1/v1.py new file mode 100644 index 0000000..6cb60e7 --- /dev/null +++ b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/v1/v1.py @@ -0,0 +1,458 @@ +"""Transform raw Unternehmensregister export (*.xml) to processed .json files for loading.""" +import re + +from aki_prj23_transparenzregister.models.company import ( + Capital, + CapitalTypeEnum, + Company, + CompanyID, + CompanyRelationship, + CompanyRelationshipEnum, + CompanyToCompanyRelationship, + CompanyTypeEnum, + CurrencyEnum, + DistrictCourt, + Location, + PersonName, + PersonToCompanyRelationship, + RelationshipRoleEnum, +) +from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.common import ( + BaseTransformer, + extract_date_from_string, + map_co_relation, + normalize_street, +) +from aki_prj23_transparenzregister.utils.string_tools import ( + remove_traling_and_leading_quotes, + transform_date_to_iso, +) + + +class V1_Transformer(BaseTransformer): # noqa: N801 + """Transformer for data exports from Unternehmensregister (v1).""" + + def parse_date_of_birth(self, data: dict) -> str | None: + """Retreives the date of birth from a stakeholder entry if possible. + + Args: + data (dict): Stakeholder data + + Returns: + str | None: date of birth or None if not found + """ + if "Geburt" in (base := data["Beteiligter"]["Natuerliche_Person"]): + base = base["Geburt"]["Geburtsdatum"] + if isinstance(base, str): + return base + return None + + def parse_stakeholder(self, data: dict) -> CompanyRelationship | None: + """Extract the company stakeholder/relation from a single "Beteiligung". + + Args: + data (dict): Data export + + Returns: + CompanyRelationship | None: Relationship if it could be processed + """ + if "Natuerliche_Person" in data["Beteiligter"]: + # It's a Company serving as a "Kommanditist" or similar + if ( + data["Beteiligter"]["Natuerliche_Person"]["Voller_Name"]["Vorname"] + is None + ): + return CompanyToCompanyRelationship( + **{ # type: ignore + "name": remove_traling_and_leading_quotes( + data["Beteiligter"]["Natuerliche_Person"]["Voller_Name"][ + "Nachname" + ] + ), + "location": Location( + **{ + "city": data["Beteiligter"]["Natuerliche_Person"][ + "Anschrift" + ][-1]["Ort"] + if isinstance( + data["Beteiligter"]["Natuerliche_Person"][ + "Anschrift" + ], + list, + ) + else data["Beteiligter"]["Natuerliche_Person"][ + "Anschrift" + ]["Ort"] + } + ), + "role": RelationshipRoleEnum( + data["Rolle"]["Rollenbezeichnung"]["content"] + ), + "type": CompanyRelationshipEnum.COMPANY, + } + ) + return PersonToCompanyRelationship( + **{ # type: ignore + "name": PersonName( + **{ + "firstname": data["Beteiligter"]["Natuerliche_Person"][ + "Voller_Name" + ]["Vorname"], + "lastname": data["Beteiligter"]["Natuerliche_Person"][ + "Voller_Name" + ]["Nachname"], + } + ), + "date_of_birth": self.parse_date_of_birth(data), + "location": Location( + **{ + "city": data["Beteiligter"]["Natuerliche_Person"][ + "Anschrift" + ][-1]["Ort"] + if isinstance( + data["Beteiligter"]["Natuerliche_Person"]["Anschrift"], + list, + ) + else data["Beteiligter"]["Natuerliche_Person"]["Anschrift"][ + "Ort" + ] + } + ), + "role": RelationshipRoleEnum( + data["Rolle"]["Rollenbezeichnung"]["content"] + ), + "type": CompanyRelationshipEnum.PERSON, + } + ) + if "Organisation" in data["Beteiligter"]: + return CompanyToCompanyRelationship( + **{ # type: ignore + "role": RelationshipRoleEnum( + data["Rolle"]["Rollenbezeichnung"]["content"] + ), + "name": remove_traling_and_leading_quotes( + data["Beteiligter"]["Organisation"]["Bezeichnung"][ + "Bezeichnung_Aktuell" + ] + ), + "location": Location( + **{ + "city": data["Beteiligter"]["Organisation"]["Anschrift"][ + "Ort" + ], + "street": data["Beteiligter"]["Organisation"]["Anschrift"][ + "Strasse" + ] + if "Strasse" + in data["Beteiligter"]["Organisation"]["Anschrift"] + else None, + "house_number": data["Beteiligter"]["Organisation"][ + "Anschrift" + ]["Hausnummer"] + if "Hausnummer" + in data["Beteiligter"]["Organisation"]["Anschrift"] + else None, + "zip_code": data["Beteiligter"]["Organisation"][ + "Anschrift" + ]["Postleitzahl"] + if "Postleitzahl" + in data["Beteiligter"]["Organisation"]["Anschrift"] + else None, + } + ), + "type": CompanyRelationshipEnum.COMPANY, + } + ) + return None + + def loc_from_beteiligung(self, data: dict) -> Location: + """Extract the company location from the first relationship in the export. + + Args: + data (dict): Data export + + Returns: + Location: location + """ + base = data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"]["Beteiligung"][0][ + "Beteiligter" + ]["Organisation"]["Anschrift"] + + house_number = None + street = None + if "Strasse" in base: + regex = r".(\d+)$" + hits = re.findall(regex, base["Strasse"]) + if len(hits) == 1: + house_number = hits[0] + street = base["Strasse"][: (-1 * len(house_number))] + if "Hausnummer" in base: + house_number = house_number + base["Hausnummer"] + else: + if "Hausnummer" in base: + house_number = base["Hausnummer"] + street = base["Strasse"] + return Location( + **{ + "city": base["Ort"], + "zip_code": base["Postleitzahl"], + "street": normalize_street(street), # type: ignore + "house_number": house_number, + } + ) + + def name_from_beteiligung(self, data: dict) -> str: + """Extract the Company name from an Unternehmensregister export by using the first relationship found. + + Args: + data (dict): Data export + + Returns: + str: Company name + """ + name = data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"]["Beteiligung"][0][ + "Beteiligter" + ]["Organisation"]["Bezeichnung"]["Bezeichnung_Aktuell"] + return remove_traling_and_leading_quotes(name) + + def map_rechtsform(self, company_name: str, data: dict) -> CompanyTypeEnum | None: + """Extracts the company type from a given Unternehmensregister export. + + Args: + company_name (str): Name of the company as a fallback solution + data (dict): Data export + + Returns: + CompanyTypeEnum | None: Company type if found + """ + try: + return CompanyTypeEnum( + data["XJustiz_Daten"]["Fachdaten_Register"]["Basisdaten_Register"][ + "Rechtstraeger" + ]["Rechtsform"]["content"] + ) + except KeyError: + if ( + company_name.endswith("GmbH") + or company_name.endswith("UG") + or company_name.endswith("UG (haftungsbeschränkt)") + ): + return CompanyTypeEnum("Gesellschaft mit beschränkter Haftung") + if company_name.endswith("SE"): + return CompanyTypeEnum("Europäische Aktiengesellschaft (SE)") + if company_name.endswith("KG"): + return CompanyTypeEnum("Kommanditgesellschaft") + return None + + def map_capital(self, data: dict, company_type: CompanyTypeEnum) -> Capital | None: + """Extracts the company capital from the given Unternehmensregister export. + + Args: + data (dict): Data export + company_type (CompanyTypeEnum): Type of company (e.g., 'Gesellschaft mit beschränkter Haftung') + + Returns: + Capital | None: Company Capital if found + """ + # Early return + if "Zusatzangaben" not in data["XJustiz_Daten"]["Fachdaten_Register"]: + return None + capital: dict = {"Zahl": 0.0, "Waehrung": ""} + if company_type == CompanyTypeEnum.KG: + capital_type = "Hafteinlage" + base = data["XJustiz_Daten"]["Fachdaten_Register"]["Zusatzangaben"][ + "Personengesellschaft" + ]["Zusatz_KG"]["Daten_Kommanditist"] + if isinstance(base, list): + for entry in base: + # TODO link to persons using Ref_Rollennummer then extract ["Hafteinlage"] as below + capital["Zahl"] = capital["Zahl"] + float( + entry["Hafteinlage"]["Zahl"] + ) + capital["Waehrung"] = entry["Hafteinlage"]["Waehrung"] + elif isinstance(base, dict): + capital = base["Hafteinlage"] + elif company_type in [ + CompanyTypeEnum.GMBH, + CompanyTypeEnum.SE, + CompanyTypeEnum.AG, + CompanyTypeEnum.KGaA, + CompanyTypeEnum.AUSLAENDISCHE_RECHTSFORM, + CompanyTypeEnum.OHG, + ]: + if ( + "Kapitalgesellschaft" + not in data["XJustiz_Daten"]["Fachdaten_Register"]["Zusatzangaben"] + ): + base = data["XJustiz_Daten"]["Fachdaten_Register"]["Zusatzangaben"][ + "Personengesellschaft" + ] + else: + base = data["XJustiz_Daten"]["Fachdaten_Register"]["Zusatzangaben"][ + "Kapitalgesellschaft" + ] + if "Zusatz_GmbH" in base: + capital_type = "Stammkapital" + capital = base["Zusatz_GmbH"]["Stammkapital"] + elif "Zusatz_Aktiengesellschaft" in base: + capital_type = "Grundkapital" + capital = base["Zusatz_Aktiengesellschaft"]["Grundkapital"]["Hoehe"] + elif company_type in [ + CompanyTypeEnum.EINZELKAUFMANN, + CompanyTypeEnum.EG, + CompanyTypeEnum.PARTNERSCHAFT, + CompanyTypeEnum.PARTNERGESELLSCHAFT, + CompanyTypeEnum.PARTNERSCHAFTSGESELLSCHAFT, + None, + ]: + return None + # Catch entries having the dict but with null values + if not all(capital.values()): + return None + return Capital( + **{ # type: ignore + "value": float(capital["Zahl"]), + "currency": CurrencyEnum(capital["Waehrung"]), + "type": CapitalTypeEnum(capital_type), + } + ) + + def map_business_purpose(self, data: dict) -> str | None: + """Extracts the "Geschäftszweck" from a given Unternehmensregister export. + + Args: + data (dict): Data export + + Returns: + str | None: Business purpose if found + """ + try: + return data["XJustiz_Daten"]["Fachdaten_Register"]["Basisdaten_Register"][ + "Gegenstand_oder_Geschaeftszweck" + ] + except KeyError: + return None + + def map_founding_date(self, data: dict) -> str | None: + """Extracts the founding date from a given Unternehmensregister export. + + Args: + data (dict): Data export + + Returns: + str | None: Founding date if found + """ + text = str(data) + entry_date = re.findall( + r".Tag der ersten Eintragung:(\\n| )?(\d{1,2}\.\d{1,2}\.\d{2,4})", text + ) + if len(entry_date) == 1: + return transform_date_to_iso(entry_date[0][1]) + + entry_date = re.findall( + r".Gesellschaftsvertrag vom (\d{1,2}\.\d{1,2}\.\d{2,4})", text + ) + if len(entry_date) == 1: + return transform_date_to_iso(entry_date[0]) + if ( + "Gruendungsmetadaten" + in data["XJustiz_Daten"]["Fachdaten_Register"]["Basisdaten_Register"] + ): + return extract_date_from_string( + data["XJustiz_Daten"]["Fachdaten_Register"]["Basisdaten_Register"][ + "Gruendungsmetadaten" + ]["Gruendungsdatum"] + ) + # No reliable answer + return None + + def map_company_id(self, data: dict) -> CompanyID: + """Retrieve Company ID from export. + + Args: + data (dict): Data export + + Returns: + CompanyID: ID of the company + """ + return CompanyID( + **{ + "hr_number": data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"][ + "Instanzdaten" + ]["Aktenzeichen"], + "district_court": DistrictCourt( + **{ + "name": data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"][ + "Beteiligung" + ][1]["Beteiligter"]["Organisation"]["Bezeichnung"][ + "Bezeichnung_Aktuell" + ] + if "Organisation" + in data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"][ + "Beteiligung" + ][1]["Beteiligter"] + else data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"][ + "Beteiligung" + ][1]["Beteiligter"]["Natuerliche_Person"]["Voller_Name"][ + "Nachname" + ], + "city": data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"][ + "Beteiligung" + ][1]["Beteiligter"]["Organisation"]["Sitz"]["Ort"] + if "Organisation" + in data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"][ + "Beteiligung" + ][1]["Beteiligter"] + else data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"][ + "Beteiligung" + ][1]["Beteiligter"]["Natuerliche_Person"]["Anschrift"]["Ort"], + } + ), + } + ) + + def map_last_update(self, data: dict) -> str: + """Extract last update date from export. + + Args: + data (dict): Unternehmensregister export + + Returns: + str: Last update date + """ + return data["XJustiz_Daten"]["Fachdaten_Register"]["Auszug"][ + "letzte_Eintragung" + ] + + def map_unternehmensregister_json(self, data: dict) -> Company: + """Processes the Unternehmensregister structured export to a Company by using several helper methods. + + Args: + data (dict): Data export + + Returns: + Company: Transformed data + """ + result: dict = {"relationships": []} + + result["id"] = self.map_company_id(data) + result["name"] = self.name_from_beteiligung(data) + + result["location"] = self.loc_from_beteiligung(data) + result["last_update"] = self.map_last_update(data) + + result["company_type"] = self.map_rechtsform(result["name"], data) + result["capital"] = self.map_capital(data, result["company_type"]) + result["business_purpose"] = self.map_business_purpose(data) + result["founding_date"] = self.map_founding_date(data) + + for i in range( + 2, + len(data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"]["Beteiligung"]), + ): + people = self.parse_stakeholder( + data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"]["Beteiligung"][i] + ) + result["relationships"].append(people) + result = map_co_relation(result) + return Company(**result) diff --git a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/v3/__init__.py b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/v3/__init__.py new file mode 100644 index 0000000..dcfe19d --- /dev/null +++ b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/v3/__init__.py @@ -0,0 +1 @@ +"""Transforms data from the Unternehmensregister v3 API to the data model of the Transparenzregister API.""" diff --git a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/v3/assets/xjustiz_0040_cl_rollenbezeichnung_3_3.xsd b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/v3/assets/xjustiz_0040_cl_rollenbezeichnung_3_3.xsd new file mode 100644 index 0000000..429985d --- /dev/null +++ b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/v3/assets/xjustiz_0040_cl_rollenbezeichnung_3_3.xsd @@ -0,0 +1,2714 @@ + + + + + + Strukturierte Fachdaten für die Kommunikation im elektronischen Rechtsverkehr + XJustiz + xjustiz + urn:xoev-de:blk-ag-it-standards:standard:xjustiz + XJustiz ist der bundesweit einheitliche Standard für den Austausch strukturierter elektronischer Informationen mit der Justiz. + + + 3.3.1 + XJustiz beschreibt ein standardisiertes Datenaustauschformat für die elektronische Kommunikation innerhalb und mit der Justizverwaltung. + 1.7.1 + 2.3.0 + 3.0.1 + 19.0 SP3 + MagicDraw + + + + + + + + + GDS.Rollenbezeichnung + GDS.Rollenbezeichnung + gds.rollenbezeichnung + urn:xoev-de:xjustiz:codeliste:gds.rollenbezeichnung + Codeliste der verschiedenen Rollenbezeichnungen. + BLK-AG IT-Standards in der Justiz + AG IT-Standards + + + 3.3 + 1.1 + + + + Schlüssel + string + true + required + true + + + Wert + string + false + required + false + + + Aufgeführte Fachmodule nutzen ausschließlich die für sie gekennzeichneten Werte + string + false + optional + false + + + code + + + + + + + + + + + + + + + + + GDS.Rollenbezeichnung + GDS.Rollenbezeichnung + gds.rollenbezeichnung + urn:xoev-de:xjustiz:codeliste:gds.rollenbezeichnung + Codeliste der verschiedenen Rollenbezeichnungen. + BLK-AG IT-Standards in der Justiz + AG IT-Standards + + + 3.3 + 1.1 + + + + Schlüssel + string + true + required + true + + + Wert + string + false + required + false + + + Aufgeführte Fachmodule nutzen ausschließlich die für sie gekennzeichneten Werte + string + false + optional + false + + + + + + + + + Abwesenheitspfleger(in) + INSO + + + + + + + Aliasidentität + STRAF + + + + + + + Angehörige(r) + STRAF + + + + + + + Angeklagte(r) + STRAF + + + + + + + Angeschuldigte(r) + STRAF + + + + + + + Annehmende(r) + + + + + + + Anschlussberufungsbeklagte(r) + + + + + + + Anschlussberufungskläger(in) + + + + + + + Anschlussbeschwerdeführer(in) + + + + + + + Anschlussbeschwerdegegner(in) + + + + + + + Anschlussrechtsbeschwerdeführer(in) + + + + + + + Anschlussrechtsbeschwerdegegner(in) + + + + + + + Anschlussrevisionsbeklagte(r) + + + + + + + Anschlussrevisionskläger(in) + + + + + + + Antragsgegner(in) + INSO,VAG,ZSSR,STRAF + + + + + + + Antragsteller(in) + INSO,VAG,ZSSR,STRAF + + + + + + + Anzeigeerstatter(in) + STRAF + + + + + + + Anzunehmende(r) + + + + + + + Arrestgläubiger(in) + + + + + + + Arrestschuldner(in) + + + + + + + Aufsichtsbehörde + + + + + + + Ausschlagende(r) + + + + + + + Beamter (Beamtin) + + + + + + + Behörde + INSO,STRAF + + + + + + + Beigeladene(r) + + + + + + + Beistand + INSO,STRAF + + + + + + + Bekannte(r) + + + + + + + Beklagte(r) + STRAF + + + + + + + Berufungsbeklagte(r) + + + + + + + Berufungskläger(in) + + + + + + + Beschuldigte(r) + STRAF + + + + + + + Beschwerdeführer(in) + STRAF + + + + + + + Beschwerdegegner(in) + STRAF + + + + + + + Besucher(in) + + + + + + + Betreibende(r) Gläubige(r) + + + + + + + Betreuer(in) + INSO,STRAF + + + + + + + Betreute(r) + INSO,STRAF + + + + + + + Betreuungsbehörde + + + + + + + Betroffene(r) + EHUG,STRAF + + + + + + + Bevollmächtigte(r) + EHUG,INSO + + + + + + + Bewährungshelfer(in) + STRAF + + + + + + + Beweisanwalt (-anwältin) + + + + + + + Bruder (Schwester) + + + + + + + Bundeswehrdisziplinaranwalt (-anwältin) + + + + + + + Bußgeldempfänger(in) + STRAF + + + + + + + Cousin(e) + + + + + + + Dienstvorgesetzte(r) + + + + + + + director + INSO + + + + + + + Dolmetscher(in) + STRAF + + + + + + + Dritte(r) + + + + + + + Drittschuldner(in) + INSO,STRAF + + + + + + + Drittwiderbeklagte(r) + + + + + + + Drittwiderkläger(in) + + + + + + + Ehemann (Ehefrau) + + + + + + + Eigentümer(in) + + + + + + + Eingetragene(r) Lebenspartner(in) + STRAF + + + + + + + Einleitungsbehörde + STRAF + + + + + + + Eltern + INSO + + + + + + + Elternteil + INSO + + + + + + + Enkel(in) + + + + + + + Erbe (Erbin) + INSO + + + + + + + Erbe (Erbin) (ausschlagend) + + + + + + + Erbe (Erbin) (vorverstorben) + + + + + + + Erblasser(in) + INSO + + + + + + + Ergänzungspfleger(in) + INSO,STRAF + + + + + + + Erinnerungsführer(in) + INSO + + + + + + + Erinnerungsgegner(in) + INSO + + + + + + + Ersatzbetreuer(in) + INSO + + + + + + + Ersteher(in) + + + + + + + Erwerber(in) + + + + + + + Erziehungsberechtigte(r) + INSO,STRAF + + + + + + + Frühere(r) Ehegatte (Ehegattin) + + + + + + + Frühere(r) Beklagte(r) + + + + + + + Frühere(r) Beteiligte(r) + + + + + + + Frühere(r) Gläubiger(in) + + + + + + + Frühere(r) Kläger(in) + + + + + + + Frühere(r) Soldat(in) + + + + + + + Gegenvormund + + + + + + + Generalbundesanwalt (-anwältin) + STRAF + + + + + + + Gericht + STRAF + + + + + + + Gerichtsvollzieher(in) + ZPO,STRAF + + + + + + + Geschädigte(r) + STRAF + + + + + + + Geschäftsführende(r) Gesellschafter(in) + INSO,STRAF + + + + + + + Geschäftsführer(in) + INSO,STRAF + + + + + + + Gesetzliche(r) Erbe (Erbin) + + + + + + + Gesetzliche(r) Vertreter(in) + EHUG,INSO,ZSSR,STRAF + + + + + + + Gläubiger(in) + EZOLL,INSO,ZPO + + + + + + + Großeltern + + + + + + + Großvater (Großmutter) + + + + + + + Hauptbevollmächtigte(r) + EHUG,INSO + + + + + + + Hoferbe (Hoferbin) + + + + + + + Inhaber(in) der Firma + + + + + + + Insolvenzverwalter(in) + INSO,STRAF + + + + + + + Jugendamt + + + + + + + Kammer + + + + + + + Kammermitglied + + + + + + + Kind + + + + + + + Kläger(in) + STRAF + + + + + + + Kontrollbetreuer(in) + + + + + + + Korrespondenzanwalt (-anwältin) + + + + + + + Kostenschuldner(in) + STRAF + + + + + + + Landwirtschaftsrichter(in) + + + + + + + Lebenspartner(in) + + + + + + + Liquidator(in) + INSO + + + + + + + Minderjährige(r) + + + + + + + Mitvormund + + + + + + + Mündel + + + + + + + Nachbesserungsgläubiger(in) + + + + + + + Nachlasspfleger(in) + INSO + + + + + + + Nachlassverwalter(in) + INSO + + + + + + + Nebenkläger(in) + STRAF + + + + + + + Neffe (Nichte) + + + + + + + Nicht verwandt + + + + + + + Onkel (Tante) + + + + + + + Opfer + STRAF + + + + + + + Pächter(in) + + + + + + + Pflegeeltern + + + + + + + Pfleger(in) + INSO + + + + + + + Pfleger(in) für das Sammelvermögen + + + + + + + Pfleger(in) für die Leibesfrucht + + + + + + + Pflegevater (Pflegemutter) des Mündels + + + + + + + Pflegling + + + + + + + Pflichtverteidiger(in) + STRAF + + + + + + + Polizei + STRAF + + + + + + + Privatbeklagte(r) + + + + + + + Privatkläger(in) + + + + + + + Prozessbevollmächtigte(r) + EHUG,INSO,ZSSR,STRAF + + + + + + + Prozesskostenhilfe-Anwalt (-Anwältin) + INSO + + + + + + + Prozesskostenhilfe-Korrespondenzanwalt (-anwältin) + INSO + + + + + + + Rechtsanwalt (-anwältin) + INSO,STRAF + + + + + + + Rechtsbeistand + INSO,STRAF + + + + + + + Rechtsbeschwerdeführer(in) + + + + + + + Rechtsbeschwerdegegner(in) + + + + + + + Revisionsbeklagte(r) + + + + + + + Revisionskläger(in) + + + + + + + Sachbearbeiter(in) + INSO + + + + + + + Sachverständige(r) + INSO,STRAF + + + + + + + Schuldner(in) + EZOLL,INSO,VSTR,ZPO + + + + + + + Schwager (Schwägerin) + + + + + + + Schwiegersohn (Schwiegertochter) + + + + + + + Schwiegervater (Schwiegermutter) + + + + + + + Sohn (Tochter) + + + + + + + Soldat(in) + + + + + + + Sonstige(r) Beteiligte(r) + + + + + + + Sonstige(r) Vertreter(in) + EHUG,INSO + + + + + + + Staatsanwaltschaft + STRAF + + + + + + + Stiefeltern + + + + + + + Stiefvater (Stiefmutter) + + + + + + + Streithelfer(in) Beklagte(r) + + + + + + + Streithelfer(in) Kläger(in) + + + + + + + Streitverkündete(r) Beklagte(r) + + + + + + + Streitverkündete(r) Kläger(in) + + + + + + + Terminsbevollmächtigte(r) + + + + + + + Testamentsvollstrecker(in) + + + + + + + Testator(in) + + + + + + + Übernehmer(in) + + + + + + + Unterbevollmächtigte(r) + EHUG,INSO + + + + + + + Ur-Enkel(in) + + + + + + + Vater (Mutter) + INSO + + + + + + + Veräußerer (Veräußerin) + + + + + + + Verfahrensbevollmächtigte(r) + EHUG,INSO + + + + + + + Verfahrenskostenhilfe-Anwalt(-Anwältin) + INSO + + + + + + + Verfahrenskostenhilfe-Korrespondenzanwalt (-anwältin) + INSO + + + + + + + Verfahrenspfleger(in) + INSO + + + + + + + Verfahrensvertreter(in) (§787 ZPO) + EHUG,INSO + + + + + + + Verfügungsbeklagte(r) + + + + + + + Verfügungskläger(in) + + + + + + + Verkehrsanwalt (-anwältin) + + + + + + + Verlobte(r) + + + + + + + Vermächtnisnehmer(in) + + + + + + + Vermieter(in) + + + + + + + Verpächter(in) + + + + + + + Versorgungsträger(in) + VAG + + + + + + + Verteidiger(in) + + + + + + + Vertreter(in) der Interessen des Ausgleichsfonds + + + + + + + Vertreter(in) der Staatskasse + + + + + + + Vertreter(in) des Bundesinteresses beim Bundesverwaltungsgericht + + + + + + + Vertreter(in) des öffentlichen Interesses + + + + + + + Verurteilte(r) + STRAF + + + + + + + Verwalter(in) der Wohnungseigentümergemeinschaft + INSO + + + + + + + Verwaltungsbehörde + STRAF + + + + + + + Vollstreckungsgläubiger(in) + INSO + + + + + + + Vollstreckungsschuldner(in) + INSO + + + + + + + Vorläufige(r) Betreuer(in) + INSO + + + + + + + Vormund + INSO + + + + + + + Vorstand + INSO + + + + + + + Vorsorgebevollmächtigte(r) + + + + + + + Wahlverteidiger(in) + STRAF + + + + + + + Widerbeklagte(r) + + + + + + + Widerkläger(in) + + + + + + + Wiederaufnahmebeklagte(r) + + + + + + + Wiederaufnahmekläger(in) + + + + + + + Zahlungs- und Auflagenempfänger(in) + + + + + + + Zeuge (Zeugin) + INSO,STRAF + + + + + + + Zeugenbeistand + + + + + + + Zulassungsantragsgegner(in) + + + + + + + Zulassungsantragsteller(in) + + + + + + + Zustellungsbevollmächtigte(r) + EHUG,INSO,STRAF + + + + + + + Zustellungsvertreter(in) (§6 ZVG) + EHUG + + + + + + + Notar(in) + + + + + + + Auskunftsempfänger(in) + + + + + + + Melder(in) + + + + + + + Verwahrstelle + + + + + + + Aussteller(in) + + + + + + + Berechtigte(r) + + + + + + + Berechtigte(r) an einem Recht + + + + + + + Einreicher(in) + + + + + + + Erbbauberechtigte(r) + + + + + + + Finanzamt + + + + + + + Grundbuchvertreter(in) + + + + + + + Insolvenzgericht + + + + + + + Mitteilungsempfänger(in) + + + + + + + Nacherbe (Nacherbin) + + + + + + + Rechnungsempfänger(in) + + + + + + + Veranlasser(in) + + + + + + + Versteigerungsabteilung + + + + + + + Vertretungsberechtigte(r) + EHUG + + + + + + + Zweitschuldner(in) + INSO + + + + + + + Vertreter(in) + EHUG,INSO + + + + + + + Arbeitgeber(in) + EZOLL,ZPO + + + + + + + RV-Träger(in) + EZOLL,ZPO + + + + + + + Vollstreckungsstelle + EZOLL + + + + + + + Abkömmling + + + + + + + Kreditnehmer(in) + + + + + + + Neu vorzutragende(r) Eigentümer(in) + + + + + + + Notariatsverwalter(in) + + + + + + + Notarvertreter(in) + + + + + + + Partei kraft Amtes + INSO + + + + + + + Sequester + INSO + + + + + + + Treuhänder(in) + INSO + + + + + + + Zustimmende(r) + + + + + + + Gläubigervertreter(in) + INSO + + + + + + + Schuldnervertreter(in) + + + + + + + Zahlungsempfänger(in) + + + + + + + Anteilsinhaber(in) + INSO + + + + + + + Antragsteller(in) -Eröffnung + INSO + + + + + + + Debitor(in) + INSO + + + + + + + Gesellschafter(in) + INSO + + + + + + + Handlungsbevollmächtigte(r) + INSO + + + + + + + Mitglied einer Gesamt-Anteilsinhaberschaft + INSO + + + + + + + Mitglied einer Gläubigergemeinschaft + INSO + + + + + + + Mitreeder(in) + INSO + + + + + + + Partner(in) + INSO + + + + + + + Persönlich haftende(r) Gesellschafter(in) + INSO + + + + + + + Prozesspfleger(in) + + + + + + + Sachwalter(in) + INSO + + + + + + + Treuhänder(in) (Wohlverhaltensperiode) + INSO + + + + + + + Vermögensträger(in) + INSO + + + + + + + Vorläufige(r) Insolvenzverwalter(in) + INSO + + + + + + + Vorläufige(r) Treuhänder(in) + INSO + + + + + + + Sondersachwalter(in) + INSO + + + + + + + Sonderinsolvenzverwalter(in) + INSO + + + + + + + Vorläufige(r) Sachwalter(in) + INSO + + + + + + + Abwickler(in) + INSO + + + + + + + Übernehmender Rechtsträger + + + + + + + Aufsichtsrat (-rätin) + + + + + + + Besondere(r) Vertreter(in) nach § 30 BGB + EHUG + + + + + + + Betriebsleiter(in) + + + + + + + Empfangsberechtigte(r) + + + + + + + Geschäftsführende(r) Direktor(in) + + + + + + + Geschäftsleiter(in) + + + + + + + Gründer(in) + + + + + + + Gründungsprüfer(in) + + + + + + + Hauptniederlassung + + + + + + + Inhaber(in) + + + + + + + Kommanditist(in) + INSO + + + + + + + Konkursverwalter(in) + + + + + + + Mitglied des Leitungsorgans + + + + + + + Mitglied EWIV + + + + + + + Nachgründungsprüfer(in) + + + + + + + Nachtragsabwickler(in) + + + + + + + Nachtragsliquidator(in) + INSO + + + + + + + Notgeschäftsführer(in) + INSO + + + + + + + Notliquidator(in) + INSO + + + + + + + Notvorstand + INSO + + + + + + + Prokurist(in) + + + + + + + Rechtsträger(in) + + + + + + + Registergericht + + + + + + + Sacheinlagenprüfer(in) + + + + + + + Ständige(r) Vertreter(in) für die Zweigniederlassung + INSO + + + + + + + Übertragender Rechtsträger + + + + + + + Vergleichsverwalter(in) + + + + + + + Verwaltungsrat (-rätin) + + + + + + + Vorstandsvorsitzende(r) + + + + + + + Zweigniederlassung + + + + + + + Vertreter(in) des Klägers/der Klägerin + + + + + + + Vertreter(in) des/der Beklagten + + + + + + + Bewährungshilfe + STRAF + + + + + + + Gerichtshilfe + STRAF + + + + + + + Justizvollzug + STRAF + + + + + + + Pseudoname + STRAF + + + + + + + Gesetzliche(r) Vertreter(in) des Gläubigers/der Gläubigerin + + + + + + + Gesetzliche(r) Vertreter(in) des Schuldners/der Schuldnerin + + + + + + + Bevollmächtigte(r) des Gläubigers/der Gläubigerin + + + + + + + Bevollmächtigte(r) des Schuldners/der Schuldnerin + + + + + + + Fahrzeughalter(in) + STRAF + + + + + + + Frühere(r) Lebenspartner(in) + + + + + + + Sicherungsverwalter(in) + + + + + + + Zwangsverwalter(in) + + + + + + + Mieter(in) + + + + + + + Bürge (Bürgin) + + + + + + + Meistbietende(r) + + + + + + + Abschlussprüfer(in) + + + + + + + Antragstellervertreter(in) + + + + + + + Aufsichtsratsvorsitzende(r) + + + + + + + Berufskammer + + + + + + + Betroffenenvertreter(in) + + + + + + + Bürgermeister(in) + + + + + + + Eingliederungsbeteiligte(r) + + + + + + + Formwechselnder Rechtsträger + + + + + + + Gewerbeamt + + + + + + + Inhaber(in) (nicht eingetragen) + + + + + + + Kostenempfänger(in) + + + + + + + Nachlassgericht + + + + + + + Sonderprüfer(in) + + + + + + + Sonstige(r) gerichtlich bestellte(r) Vertreter(in) + + + + + + + Sonstige(r) gesetzliche(r) Vertreter(in) BGB + + + + + + + Sonstige(r) organschaftliche(r) Vertreter(in) HRB + + + + + + + Standardkostenschuldner(in) + + + + + + + Übernahmeschuldner(in) + + + + + + + Unternehmensvertragsbeteiligte(r) + + + + + + + Vertreter(in) des persönlich haftenden Gesellschafters + + + + + + + Werkleiter(in) + + + + + + + Mehrfachsitz + + + + + + + Mitglied VR + + + + + + + Mitglied e. BGB-Gesellschaft als Abwicklerin + + + + + + + Mitglied e. BGB-Gesellschaft als ges. Vertreterin + + + + + + + Mitglied e. BGB-Gesellschaft als Kommanditistin + + + + + + + Mitglied e. BGB-Gesellschaft als Liquidatorin + + + + + + + Mitglied e. BGB-Gesellschaft als Mitglied e. EWIV + + + + + + + Mitglied e. BGB-Gesellschaft als phG + + + + + + + Mitglied e. Erbengemeinschaft als ges. Vertreterin + + + + + + + Mitglied e. Erbengemeinschaft als Inhaberin + + + + + + + Mitglied e. Erbengemeinschaft als Kommanditistin + + + + + + + Mitglied e. Erbengemeinschaft als Mitglied e. EWIV + + + + + + + Nebensitz + + + + + + + Gesamthandsgemeinschaft + + + + + + + Mitglied einer Gesamthandsgemeinschaft + + + + + + + Leitungsperson i.S.v. § 30 Abs. 1 Nr. 1 bis 5 OWiG + + + + + + + Einziehungsbeteiligter + STRAF + + + + + + + Antragsgegnervertreter(in) + + + + + + + Verbraucher(in) + + + + + + + Vielmelder(in) + + + + + + + Vollmachtgeber(in) + + + + + + + Nebenbeteiligte(r) § 444 StPO + STRAF + + + + + + + Verfallsbeteiligte(r) § 442 StPO a.F. + STRAF + + + + + + + Verfolgte(r) § 34 IRG + STRAF + + + + + + + Rechtsnachfolger(in) + STRAF + + + + + + + Statuswechselnde(r) Rechtsträger(in) + + + + + + + Haftangehörige + STRAF + + + + + + diff --git a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/v3/role_mapper.py b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/v3/role_mapper.py new file mode 100644 index 0000000..586ccf9 --- /dev/null +++ b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/v3/role_mapper.py @@ -0,0 +1,60 @@ +"""RoleMapper for Unternehmensregister v3 API.""" +import os +from pathlib import Path + +import xmltodict + +from aki_prj23_transparenzregister.models.company import RelationshipRoleEnum + + +class RoleMapper: + """RoleMapper for Unternehmensregister v3 API.""" + + singleton = None + + def __init__(self) -> None: + """Initialize RoleMapper by ingesting XSD schema file.""" + # TODO Automated file retrieval + base_path = os.path.dirname(Path(__file__)) + path = os.path.join( + base_path, "assets", "xjustiz_0040_cl_rollenbezeichnung_3_3.xsd" + ) + with open(path, encoding="utf-8") as file: + content = file.read() + data = xmltodict.parse(content) + + mapping = {} + for entry in data["xs:schema"]["xs:simpleType"]["xs:restriction"][ + "xs:enumeration" + ]: + mapping[entry["@value"]] = entry["xs:annotation"]["xs:appinfo"]["wert"] + self.dictionary = mapping + + @staticmethod + def mapper() -> "RoleMapper": + """Singleton getter for RoleMapper. + + Returns: + RoleMapper: Singleton instance + """ + if RoleMapper.singleton is None: + RoleMapper.singleton = RoleMapper() + return RoleMapper.singleton + + def get(self, key: str) -> RelationshipRoleEnum: + """Get mapped value for given key. + + Args: + key (str): Key to map + + Returns: + RelationshipRoleEnum: Mapped value + """ + return RelationshipRoleEnum(self.dictionary[key]) + + +if __name__ == "__main__": + from loguru import logger + + mapper = RoleMapper() + logger.info(f"Mapped value for role 201 - {mapper.get('201')}") diff --git a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/v3/v3.py b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/v3/v3.py new file mode 100644 index 0000000..b787d4d --- /dev/null +++ b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform/v3/v3.py @@ -0,0 +1,561 @@ +"""Transform raw Unternehmensregister export (*.xml) to processed .json files for loading.""" + +import re + +from aki_prj23_transparenzregister.models.company import ( + Capital, + CapitalTypeEnum, + Company, + CompanyID, + CompanyRelationship, + CompanyRelationshipEnum, + CompanyToCompanyRelationship, + CompanyTypeEnum, + CurrencyEnum, + DistrictCourt, + Location, + PersonName, + PersonToCompanyRelationship, + RelationshipRoleEnum, +) +from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.common import ( + BaseTransformer, + map_co_relation, + normalize_street, + traversal, +) +from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v3.role_mapper import ( + RoleMapper, +) +from aki_prj23_transparenzregister.utils.string_tools import ( + remove_traling_and_leading_quotes, + transform_date_to_iso, +) + + +class V3_Transformer(BaseTransformer): # noqa: N801 + """Transformer for data exports from Unternehmensregister (v3).""" + + def parse_date_of_birth(self, data: dict) -> str | None: + """Retreives the date of birth from a stakeholder entry if possible. + + Args: + data (dict): Stakeholder data + + Returns: + str | None: date of birth or None if not found + """ + if "tns:geburt" in ( + base := data["tns:beteiligter"]["tns:auswahl_beteiligter"][ + "tns:natuerlichePerson" + ] + ): + base = base["tns:geburt"]["tns:geburtsdatum"] + if isinstance(base, str): + return base + return None + + def map_role_id_to_enum(self, role_id: str) -> RelationshipRoleEnum: + """Map Unternehmensregister role ID to RelationshipRoleEnum. + + Args: + role_id (str): Unternehmensregister role ID + + Returns: + RelationshipRoleEnum: Role enum + """ + mapper = RoleMapper.mapper() + return mapper.get(role_id) + + def parse_stakeholder(self, data: dict) -> CompanyRelationship | None: + """Extract the company stakeholder/relation from a single "Beteiligung". + + Args: + data (dict): Data export + + Returns: + CompanyRelationship | None: Relationship if it could be processed + """ + if ( + "tns:natuerlichePerson" + in data["tns:beteiligter"]["tns:auswahl_beteiligter"] + ): + # It's a Company serving as a "Kommanditist" or similar + if ( + "tns:vorname" + not in data["tns:beteiligter"]["tns:auswahl_beteiligter"][ + "tns:natuerlichePerson" + ]["tns:vollerName"] + ): + return CompanyToCompanyRelationship( + **{ # type: ignore + "name": remove_traling_and_leading_quotes( + data["tns:beteiligter"]["tns:auswahl_beteiligter"][ + "tns:natuerlichePerson" + ]["tns:vollerName"]["tns:nachname"] + ), + "location": Location( + **{ + "city": data["tns:beteiligter"][ + "tns:auswahl_beteiligter" + ]["tns:natuerlichePerson"]["tns:anschrift"][-1][ + "tns:ort" + ] + if isinstance( + data["tns:beteiligter"]["tns:auswahl_beteiligter"][ + "tns:natuerlichePerson" + ]["tns:anschrift"], + list, + ) + else data["tns:beteiligter"]["tns:auswahl_beteiligter"][ + "tns:natuerlichePerson" + ]["tns:anschrift"]["tns:ort"] + } + ), + "role": self.map_role_id_to_enum( + data["tns:rolle"]["tns:rollenbezeichnung"]["code"] + ), + "type": CompanyRelationshipEnum.COMPANY, + } + ) + return PersonToCompanyRelationship( + **{ # type: ignore + "name": PersonName( + **{ + "firstname": data["tns:beteiligter"][ + "tns:auswahl_beteiligter" + ]["tns:natuerlichePerson"]["tns:vollerName"]["tns:vorname"], + "lastname": data["tns:beteiligter"][ + "tns:auswahl_beteiligter" + ]["tns:natuerlichePerson"]["tns:vollerName"][ + "tns:nachname" + ], + } + ), + "date_of_birth": self.parse_date_of_birth(data), + "location": Location( + **{ + "city": data["tns:beteiligter"]["tns:auswahl_beteiligter"][ + "tns:natuerlichePerson" + ]["tns:anschrift"][-1]["tns:ort"] + if isinstance( + data["tns:beteiligter"]["tns:auswahl_beteiligter"][ + "tns:natuerlichePerson" + ]["tns:anschrift"], + list, + ) + else data["tns:beteiligter"]["tns:auswahl_beteiligter"][ + "tns:natuerlichePerson" + ]["tns:anschrift"]["tns:ort"] + } + ), + "role": self.map_role_id_to_enum( + data["tns:rolle"]["tns:rollenbezeichnung"]["code"] + ), + "type": CompanyRelationshipEnum.PERSON, + } + ) + if "tns:organisation" in data["tns:beteiligter"]["tns:auswahl_beteiligter"]: + base = data["tns:beteiligter"]["tns:auswahl_beteiligter"][ + "tns:organisation" + ] + + location = None + if "tns:anschrift" in base: + location = Location( + **{ + "city": base["tns:anschrift"]["tns:ort"], + "street": base["tns:anschrift"]["tns:strasse"] + if "tns:strasse" in base["tns:anschrift"] + else None, + "house_number": base["tns:anschrift"]["tns:hausnummer"] + if "tns:hausnummer" in base["tns:anschrift"] + else None, + "zip_code": base["tns:anschrift"]["tns:postleitzahl"] + if "tns:postleitzahl" in base["tns:anschrift"] + else None, + } + ) + else: + location = Location( + **{ + "city": base["tns:sitz"]["tns:ort"], + "street": base["tns:sitz"]["tns:strasse"] + if "tns:strasse" in base["tns:sitz"] + else None, + "house_number": base["tns:sitz"]["tns:hausnummer"] + if "tns:hausnummer" in base["tns:sitz"] + else None, + "zip_code": base["tns:sitz"]["tns:postleitzahl"] + if "tns:postleitzahl" in base["tns:sitz"] + else None, + } + ) + + return CompanyToCompanyRelationship( + **{ # type: ignore + "role": self.map_role_id_to_enum( + data["tns:rolle"]["tns:rollenbezeichnung"]["code"] + ), + "name": remove_traling_and_leading_quotes( + base["tns:bezeichnung"]["tns:bezeichnung.aktuell"] + ), + "location": location, + "type": CompanyRelationshipEnum.COMPANY, + } + ) + return None + + def loc_from_beteiligung(self, data: dict) -> Location: + """Extract the company location from the first relationship in the export. + + Args: + data (dict): Data export + + Returns: + Location: location + """ + base_path = [ + "tns:grunddaten", + "tns:verfahrensdaten", + "tns:beteiligung", + 0, + "tns:beteiligter", + "tns:auswahl_beteiligter", + "tns:organisation", + ] + base = traversal(data, base_path) + base = base["tns:anschrift"] if "tns:anschrift" in base else base["tns:sitz"] + + if isinstance(base, list): + base = base[0] + house_number = None + street = None + if "tns:strasse" in base: + regex = r".(\d+)$" + hits = re.findall(regex, base["tns:strasse"]) + if len(hits) == 1: + house_number = hits[0] + street = base["tns:strasse"][: (-1 * len(house_number))] + if "tns:hausnummer" in base: + house_number = house_number + base["tns:hausnummer"] + else: + if "tns:hausnummer" in base: + house_number = base["tns:hausnummer"] + street = base["tns:strasse"] + return Location( + **{ + "city": base["tns:ort"], + "zip_code": base["tns:postleitzahl"], + "street": normalize_street(street), # type: ignore + "house_number": house_number, + } + ) + + def name_from_beteiligung(self, data: dict) -> str: + """Extract the Company name from an Unternehmensregister export by using the first relationship found. + + Args: + data (dict): Data export + + Returns: + str: Company name + """ + path = [ + "tns:grunddaten", + "tns:verfahrensdaten", + "tns:beteiligung", + 0, + "tns:beteiligter", + "tns:auswahl_beteiligter", + "tns:organisation", + "tns:bezeichnung", + "tns:bezeichnung.aktuell", + ] + name = traversal(data, path) + return remove_traling_and_leading_quotes(name) + + def map_rechtsform(self, company_name: str, data: dict) -> CompanyTypeEnum | None: + """Extracts the company type from a given Unternehmensregister export. + + Args: + company_name (str): Name of the company as a fallback solution + data (dict): Data export + + Returns: + CompanyTypeEnum | None: Company type if found + """ + try: + path = [ + "tns:fachdatenRegister", + "tns:basisdatenRegister", + "tns:rechtstraeger", + "tns:angabenZurRechtsform", + "tns:rechtsform", + "code", + ] + return CompanyTypeEnum(traversal(data, path)) + except Exception: + if ( + company_name.endswith("GmbH") + or company_name.endswith("UG") + or company_name.endswith("UG (haftungsbeschränkt)") + ): + return CompanyTypeEnum("Gesellschaft mit beschränkter Haftung") + if company_name.endswith("SE"): + return CompanyTypeEnum("Europäische Aktiengesellschaft (SE)") + if company_name.endswith("KG"): + return CompanyTypeEnum("Kommanditgesellschaft") + return None + + def map_capital( # noqa: PLR0912 + self, data: dict, company_type: CompanyTypeEnum + ) -> Capital | None: + """Extracts the company capital from the given Unternehmensregister export. + + Args: + data (dict): Data export + company_type (CompanyTypeEnum): Type of company (e.g., 'Gesellschaft mit beschränkter Haftung') + + Returns: + Capital | None: Company Capital if found + """ + # Early return + if "tns:auswahl_zusatzangaben" not in data["tns:fachdatenRegister"]: + return None + capital: dict = {"tns:zahl": 0.0, "tns:waehrung": {"code": None}} + if ( + company_type == CompanyTypeEnum.KG + and "tns:personengesellschaft" + in data["tns:fachdatenRegister"]["tns:auswahl_zusatzangaben"] + ): + capital_type = "Hafteinlage" + base = data["tns:fachdatenRegister"]["tns:auswahl_zusatzangaben"][ + "tns:personengesellschaft" + ]["tns:zusatzKG"]["tns:datenKommanditist"] + if isinstance(base, list): + for entry in base: + # TODO link to persons using Ref_Rollennummer then extract ["Hafteinlage"] as below + capital["tns:zahl"] = capital["tns:zahl"] + float( + entry["tns:hafteinlage"]["tns:zahl"] + ) + capital["tns:waehrung"]["code"] = entry["tns:hafteinlage"][ + "tns:waehrung" + ]["code"] + elif isinstance(base, dict): + capital = base["tns:hafteinlage"] + elif company_type in [ + CompanyTypeEnum.GMBH, + CompanyTypeEnum.SE, + CompanyTypeEnum.AG, + CompanyTypeEnum.KGaA, + CompanyTypeEnum.AUSLAENDISCHE_RECHTSFORM, + CompanyTypeEnum.OHG, + ]: + if ( + "tns:kapitalgesellschaft" + not in data["tns:fachdatenRegister"]["tns:auswahl_zusatzangaben"] + ): + base = data["tns:fachdatenRegister"]["tns:auswahl_zusatzangaben"][ + "tns:personengesellschaft" + ] + else: + base = data["tns:fachdatenRegister"]["tns:auswahl_zusatzangaben"][ + "tns:kapitalgesellschaft" + ] + if "tns:zusatzGmbH" in base: + capital_type = "Stammkapital" + capital = base["tns:zusatzGmbH"]["tns:stammkapital"] + elif "tns:zusatzAktiengesellschaft" in base: + capital_type = "Grundkapital" + capital = base["tns:zusatzAktiengesellschaft"]["tns:grundkapital"][ + "tns:hoehe" + ] + elif company_type in [ + CompanyTypeEnum.EINZELKAUFMANN, + CompanyTypeEnum.EG, + CompanyTypeEnum.PARTNERSCHAFT, + CompanyTypeEnum.PARTNERGESELLSCHAFT, + CompanyTypeEnum.PARTNERSCHAFTSGESELLSCHAFT, + None, + ]: + return None + # Catch entries having the dict but with null values + if isinstance(capital, list): + capital = capital[0] + if not all(capital.values()): + return None + return Capital( + **{ # type: ignore + "value": float(capital["tns:zahl"]), + "currency": CurrencyEnum(capital["tns:waehrung"]["code"]), + "type": CapitalTypeEnum(capital_type), + } + ) + + def map_business_purpose(self, data: dict) -> str | None: + """Extracts the "Geschäftszweck" from a given Unternehmensregister export. + + Args: + data (dict): Data export + + Returns: + str | None: Business purpose if found + """ + try: + path = ["tns:fachdatenRegister", "tns:basisdatenRegister", "tns:gegenstand"] + return traversal(data, path) + except KeyError: + return None + + def map_founding_date(self, data: dict) -> str | None: + """Extracts the founding date from a given Unternehmensregister export. + + Args: + data (dict): Data export + + Returns: + str | None: Founding date if found + """ + text = str(data) + entry_date = re.findall( + r".Tag der ersten Eintragung:(\\n| )?(\d{1,2}\.\d{1,2}\.\d{2,4})", text + ) + if len(entry_date) == 1: + return transform_date_to_iso(entry_date[0][1]) + + entry_date = re.findall( + r".Gesellschaftsvertrag vom (\d{1,2}\.\d{1,2}\.\d{2,4})", text + ) + if len(entry_date) == 1: + return transform_date_to_iso(entry_date[0]) + if ( + "tns:satzungsdatum" + in data["tns:fachdatenRegister"]["tns:basisdatenRegister"] + ): + path = [ + "tns:fachdatenRegister", + "tns:basisdatenRegister", + "tns:satzungsdatum", + ] + base = traversal(data, path) + if "tns:aktuellesSatzungsdatum" in base: + return base["tns:aktuellesSatzungsdatum"] + # No reliable answer + return None + + def map_hr_number(self, data: dict) -> str: + """Extract the HR number from a given Unternehmensregister export. + + Args: + data (dict): Data export + + Raises: + KeyError: If key not found + + Returns: + str: HR number + """ + base = data["tns:grunddaten"]["tns:verfahrensdaten"]["tns:instanzdaten"][ + "tns:aktenzeichen" + ]["tns:auswahl_aktenzeichen"] + if "tns:aktenzeichen.strukturiert" in base: + hr_prefix = base["tns:aktenzeichen.strukturiert"]["tns:register"]["code"] + hr_number = base["tns:aktenzeichen.strukturiert"]["tns:laufendeNummer"] + return f"{hr_prefix} {hr_number}" + if "tns:aktenzeichen.freitext" in base: + return base["tns:aktenzeichen.freitext"] + raise KeyError("Could not find HR number") + + def map_district_court(self, data: dict) -> DistrictCourt: + """Extract the district court from a given Unternehmensregister export. + + Args: + data (dict): Data export + + Returns: + DistrictCourt: District court + """ + base_path = [ + "tns:grunddaten", + "tns:verfahrensdaten", + "tns:beteiligung", + 1, + "tns:beteiligter", + "tns:auswahl_beteiligter", + "tns:organisation", + ] + path = [*base_path, "tns:bezeichnung", "tns:bezeichnung.aktuell"] + name = traversal(data, path) + + path = [*base_path, "tns:anschrift", "tns:ort"] + city = traversal(data, path) + return DistrictCourt(name=name, city=city) + + def map_company_id(self, data: dict) -> CompanyID: + """Retrieve Company ID from export. + + Args: + data (dict): Data export + + Returns: + CompanyID: ID of the company + """ + try: + return CompanyID(hr_number=self.map_hr_number(data), district_court=self.map_district_court(data)) # type: ignore + except KeyError: + hr_number = data["tns:grunddaten"]["tns:verfahrensdaten"][ + "tns:beteiligung" + ][0]["tns:beteiligter"]["tns:auswahl_beteiligter"]["tns:organisation"][ + "tns:registereintragung" + ][ + "tns:registernummer" + ] + district_court = self.map_district_court(data) + return CompanyID(hr_number=hr_number, district_court=district_court) + + def map_last_update(self, data: dict) -> str: + """Extract last update date from export. + + Args: + data (dict): Unternehmensregister export + + Returns: + str: Last update date + """ + path = ["tns:fachdatenRegister", "tns:auszug", "tns:letzteEintragung"] + return traversal(data, path) + + def map_unternehmensregister_json(self, data: dict) -> Company: + """Processes the Unternehmensregister structured export to a Company by using several helper methods. + + Args: + data (dict): Data export + + Returns: + Company: Transformed data + """ + root_key = list(data.keys())[0] + data = data[root_key] + result: dict = {"relationships": []} + + result["id"] = self.map_company_id(data) + result["name"] = self.name_from_beteiligung(data) + + result["location"] = self.loc_from_beteiligung(data) + result["last_update"] = self.map_last_update(data) + + result["company_type"] = self.map_rechtsform(result["name"], data) + result["capital"] = self.map_capital(data, result["company_type"]) + result["business_purpose"] = self.map_business_purpose(data) + result["founding_date"] = self.map_founding_date(data) + + for i in range( + 2, len(data["tns:grunddaten"]["tns:verfahrensdaten"]["tns:beteiligung"]) + ): + people = self.parse_stakeholder( + data["tns:grunddaten"]["tns:verfahrensdaten"]["tns:beteiligung"][i] + ) + result["relationships"].append(people) + result = map_co_relation(result) + return Company(**result) diff --git a/src/aki_prj23_transparenzregister/utils/mongo/company_mongo_service.py b/src/aki_prj23_transparenzregister/utils/mongo/company_mongo_service.py index d175be2..51c1309 100644 --- a/src/aki_prj23_transparenzregister/utils/mongo/company_mongo_service.py +++ b/src/aki_prj23_transparenzregister/utils/mongo/company_mongo_service.py @@ -45,7 +45,6 @@ class CompanyMongoService: query = { "id.hr_number": id["hr_number"], "id.district_court.name": id["district_court"]["name"], - "id.district_court.city": id["district_court"]["city"], } with self.lock: result = list(self.collection.find(query)) diff --git a/tests/apps/find_missing_companies_test.py b/tests/apps/find_missing_companies_test.py new file mode 100644 index 0000000..f985763 --- /dev/null +++ b/tests/apps/find_missing_companies_test.py @@ -0,0 +1,31 @@ +"""Testing find_missing_companies.py.""" +from unittest.mock import Mock, patch + +from aki_prj23_transparenzregister.apps import find_missing_companies + + +def test_import_find_missing_companies() -> None: + assert find_missing_companies + + +@patch("aki_prj23_transparenzregister.apps.find_missing_companies.MongoConnector") +@patch("aki_prj23_transparenzregister.apps.find_missing_companies.CompanyMongoService") +@patch( + "aki_prj23_transparenzregister.apps.find_missing_companies.load.load_directory_to_mongo" +) +@patch("aki_prj23_transparenzregister.apps.find_missing_companies.connector") +def test_work( + connector_mock: Mock, + load_directory_to_mongo_mock: Mock, + company_mongo_service_mock: Mock, + mongo_connector_mock: Mock, +) -> None: + config_provider_mock = Mock() + config_provider_mock.session.return_value = Mock() + + load_directory_to_mongo_mock.return_value = 42 + + find_missing_companies.work( + "Atos IT-Dienstleistung und Beratung GmbH", config_provider_mock + ) + assert True diff --git a/tests/utils/data_extraction/unternehmensregister/extract_test.py b/tests/utils/data_extraction/unternehmensregister/extract_test.py index ca2fca5..0e13273 100644 --- a/tests/utils/data_extraction/unternehmensregister/extract_test.py +++ b/tests/utils/data_extraction/unternehmensregister/extract_test.py @@ -86,4 +86,4 @@ def test_wait_for_download_condition() -> None: def test_scrape() -> None: with TemporaryDirectory(dir="./") as temp_dir: - extract.scrape("GEA Farm Technologies GmbH", [temp_dir]) + extract.scrape("GEA Farm Technologies GmbH", temp_dir) diff --git a/tests/utils/data_extraction/unternehmensregister/load_test.py b/tests/utils/data_extraction/unternehmensregister/load_test.py index 6f6b58b..dd71859 100644 --- a/tests/utils/data_extraction/unternehmensregister/load_test.py +++ b/tests/utils/data_extraction/unternehmensregister/load_test.py @@ -1,4 +1,8 @@ """Test load utils from Unternehmensregister.""" +import json +import tempfile +from unittest.mock import Mock, patch + from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister import ( load, ) @@ -6,3 +10,37 @@ from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister im def test_smoke() -> None: assert load + + +@patch( + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.load.CompanyMongoService" +) +def test_load_directory_to_mongo(mock_company_service: Mock) -> None: + mock_company_service.migration_of_base_data.return_value = None + with tempfile.TemporaryDirectory() as tmp_dir: + with open(f"{tmp_dir}/test.json", "w") as f: + mock_company = { + "id": { + "district_court": { + "name": "Amtsgericht Hamburg", + "city": "Hamburg", + }, + "hr_number": "HRB 47899", + }, + "location": { + "city": "Hamburg", + "street": "Heußweg", + "house_number": "35", + "zip_code": "20255", + }, + "name": "Aurelius Immo GmbH", + "last_update": "2021-07-05", + "relationships": [], + "business_purpose": "Erwerb und Verwaltung von Immobilien; Geschäftsführung von Immobilienfonds und anderen Gesellschaften; Dienstleistungen in diesem Zusammenhang.", + "capital": {"value": 50000, "currency": "DM", "type": "Stammkapital"}, + "company_type": "Gesellschaft mit beschränkter Haftung", + "founding_date": "1977-03-03", + } + json.dump(mock_company, f) + result = load.load_directory_to_mongo(tmp_dir, mock_company_service) + assert result == 1 diff --git a/tests/utils/data_extraction/unternehmensregister/transform/common_test.py b/tests/utils/data_extraction/unternehmensregister/transform/common_test.py new file mode 100644 index 0000000..3c62864 --- /dev/null +++ b/tests/utils/data_extraction/unternehmensregister/transform/common_test.py @@ -0,0 +1,140 @@ +"""Testing data_extraction/unternehmensregister/transform/common.py.""" +import pytest + +from aki_prj23_transparenzregister.models.company import ( + CompanyRelationshipEnum, + CompanyToCompanyRelationship, + Location, + RelationshipRoleEnum, +) +from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform import ( + common, +) + + +def test_import_common() -> None: + assert common + + +def test_traversal() -> None: + data = {"a": {"b": {"c": "d"}}} + assert common.traversal(data, ["a", "b", "c"]) == "d" + + +def test_traversal_raises_key_error() -> None: + data = {"a": {"b": {"c": "d"}}} + with pytest.raises(KeyError): + common.traversal(data, ["a", "b", "d"]) + + +@pytest.mark.parametrize( + ("value", "expected_result"), + [ + (None, None), + ("Ludwig-Ganghofer-Str.", "Ludwig-Ganghofer-Straße"), + ("Ludwig-Ganghofer-Strasse", "Ludwig-Ganghofer-Straße"), + ("Str. des Tests", "Straße des Tests"), + ], +) +def test_normalize_street(value: str, expected_result: str) -> None: + result = common.normalize_street(value) + assert result == expected_result + + +@pytest.mark.parametrize( + ("value", "expected_result"), + [ + ("", None), + ("Tag der ersten Eintragung: 01.05.2004", "2004-05-01"), + ("Tag der ersten Eintragung: 1.05.2004", "2004-05-01"), + ("Tag der ersten Eintragung: 1.5.2004", "2004-05-01"), + ("Tag der ersten Eintragung: 01.5.2004", "2004-05-01"), + ("Gesellschaftsvertrag vom 06.04.2016 Hallo Welt", "2016-04-06"), + ("Str. des Tests vom 1999-04-05", "1999-04-05"), + ("Once upon a midnight dreary while I pondered weak and weary...", None), + ( + "This company was first founded in 2016-06-10 and then again on 1.5.2004", + None, + ), + ], +) +def test_extract_date_from_string(value: str, expected_result: str) -> None: + result = common.extract_date_from_string(value) + assert result == expected_result + + +@pytest.mark.parametrize( + ("value", "expected_result"), + [ + ( + { + "location": Location( + "", "c/o Youco24 Business Center, Abc ffda", None, None + ), + "relationships": [], + }, + { + "location": Location("", "Abc ffda", None, None), + "relationships": [ + CompanyToCompanyRelationship( + RelationshipRoleEnum.CARE_OF, # type: ignore + Location("", "Abc ffda", None, None), + CompanyRelationshipEnum.COMPANY, + "Youco24 Business Center", + ) + ], + }, + ), + ( + { + "location": Location( + "Iserlohn", "c/o Youco24 Business Center, Abc Str.", "42", "58644" + ), + "relationships": [], + }, + { + "location": Location("Iserlohn", "Abc Str.", "42", "58644"), + "relationships": [ + CompanyToCompanyRelationship( + RelationshipRoleEnum.CARE_OF, # type: ignore + Location("Iserlohn", "Abc Str.", "42", "58644"), + CompanyRelationshipEnum.COMPANY, + "Youco24 Business Center", + ) + ], + }, + ), + ( + { + "location": Location( + "Iserlohn", "Abc Str., c/o Youco24 Business Center", "42", "58644" + ), + "relationships": [], + }, + { + "location": Location("Iserlohn", "Abc Str.", "42", "58644"), + "relationships": [ + CompanyToCompanyRelationship( + RelationshipRoleEnum.CARE_OF, # type: ignore + Location("Iserlohn", "Abc Str.", "42", "58644"), + CompanyRelationshipEnum.COMPANY, + "Youco24 Business Center", + ) + ], + }, + ), + ( + { + "location": Location("Iserlohn", "Abc Str., c/o", "42", "58644"), + "relationships": [], + }, + { + "location": Location("Iserlohn", "Abc Str.", "42", "58644"), + "relationships": [], + }, + ), + ], +) +def test_map_co_relation(value: dict, expected_result: dict) -> None: + result = common.map_co_relation(value) + assert result == expected_result diff --git a/tests/utils/data_extraction/unternehmensregister/transform/main_test.py b/tests/utils/data_extraction/unternehmensregister/transform/main_test.py new file mode 100644 index 0000000..a9c4799 --- /dev/null +++ b/tests/utils/data_extraction/unternehmensregister/transform/main_test.py @@ -0,0 +1,24 @@ +"""Testing main.py.""" +import json +import os +from tempfile import TemporaryDirectory + +from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform import ( + main, +) + + +def test_transform_xml_to_json() -> None: + with TemporaryDirectory(dir="./") as temp_source_dir: + with open(os.path.join(temp_source_dir, "test.xml"), "w") as file: + xml_input = """ + + Hello World! + + """ + file.write(xml_input) + with TemporaryDirectory(dir="./") as temp_target_dir: + main.transform_xml_to_json(temp_source_dir, temp_target_dir) + with open(os.path.join(temp_target_dir, "test.json")) as file: + json_output = json.load(file) + assert json_output == {"test": {"message": "Hello World!"}} diff --git a/tests/utils/data_extraction/unternehmensregister/transform/role_mapper_test.py b/tests/utils/data_extraction/unternehmensregister/transform/role_mapper_test.py new file mode 100644 index 0000000..f94f205 --- /dev/null +++ b/tests/utils/data_extraction/unternehmensregister/transform/role_mapper_test.py @@ -0,0 +1,13 @@ +"""Test role_mapper.py.""" +from aki_prj23_transparenzregister.models.company import RelationshipRoleEnum +from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v3.role_mapper import ( + RoleMapper, +) + + +def test_init() -> None: + assert isinstance(RoleMapper.mapper(), RoleMapper) + + +def test_map_role() -> None: + assert RoleMapper.mapper().get("285") == RelationshipRoleEnum.PROKURIST diff --git a/tests/utils/data_extraction/unternehmensregister/transform_test.py b/tests/utils/data_extraction/unternehmensregister/transform/v1_test.py similarity index 82% rename from tests/utils/data_extraction/unternehmensregister/transform_test.py rename to tests/utils/data_extraction/unternehmensregister/transform/v1_test.py index 08f24a7..4c89d35 100644 --- a/tests/utils/data_extraction/unternehmensregister/transform_test.py +++ b/tests/utils/data_extraction/unternehmensregister/transform/v1_test.py @@ -1,11 +1,6 @@ """Testing utils/data_extraction/unternehmensregister/transform.py.""" -import json -import os -from tempfile import TemporaryDirectory from unittest.mock import Mock, patch -import pytest - from aki_prj23_transparenzregister.models.company import ( Capital, CapitalTypeEnum, @@ -21,25 +16,11 @@ from aki_prj23_transparenzregister.models.company import ( PersonToCompanyRelationship, RelationshipRoleEnum, ) -from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister import ( - transform, +from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v1.v1 import ( + V1_Transformer, ) - -def test_transform_xml_to_json() -> None: - with TemporaryDirectory(dir="./") as temp_source_dir: - with open(os.path.join(temp_source_dir, "test.xml"), "w") as file: - xml_input = """ - - Hello World! - - """ - file.write(xml_input) - with TemporaryDirectory(dir="./") as temp_target_dir: - transform.transform_xml_to_json(temp_source_dir, temp_target_dir) - with open(os.path.join(temp_target_dir, "test.json")) as file: - json_output = json.load(file) - assert json_output == {"test": {"message": "Hello World!"}} +transform = V1_Transformer() def test_parse_stakeholder_org_hidden_in_person() -> None: @@ -285,20 +266,6 @@ def test_loc_from_beteiligung_combine() -> None: assert transform.loc_from_beteiligung(data) == expected_result -@pytest.mark.parametrize( - ("value", "expected_result"), - [ - (None, None), - ("Ludwig-Ganghofer-Str.", "Ludwig-Ganghofer-Straße"), - ("Ludwig-Ganghofer-Strasse", "Ludwig-Ganghofer-Straße"), - ("Str. des Tests", "Straße des Tests"), - ], -) -def test_normalize_street(value: str, expected_result: str) -> None: - result = transform.normalize_street(value) - assert result == expected_result - - def test_name_from_beteiligung() -> None: data = { "XJustiz_Daten": { @@ -601,28 +568,6 @@ def test_map_business_purpose_no_result() -> None: assert result is None -@pytest.mark.parametrize( - ("value", "expected_result"), - [ - ("", None), - ("Tag der ersten Eintragung: 01.05.2004", "2004-05-01"), - ("Tag der ersten Eintragung: 1.05.2004", "2004-05-01"), - ("Tag der ersten Eintragung: 1.5.2004", "2004-05-01"), - ("Tag der ersten Eintragung: 01.5.2004", "2004-05-01"), - ("Gesellschaftsvertrag vom 06.04.2016 Hallo Welt", "2016-04-06"), - ("Str. des Tests vom 1999-04-05", "1999-04-05"), - ("Once upon a midnight dreary while I pondered weak and weary...", None), - ( - "This company was first founded in 2016-06-10 and then again on 1.5.2004", - None, - ), - ], -) -def test_extract_date_from_string(value: str, expected_result: str) -> None: - result = transform.extract_date_from_string(value) - assert result == expected_result - - def test_map_founding_date_from_tag_der_ersten_eintragung() -> None: data = { "some entry": "Tag der ersten Eintragung: 01.05.2004", @@ -709,112 +654,35 @@ def test_map_last_update() -> None: assert result == date -@pytest.mark.parametrize( - ("value", "expected_result"), - [ - ( - { - "location": Location( - "", "c/o Youco24 Business Center, Abc ffda", None, None - ), - "relationships": [], - }, - { - "location": Location("", "Abc ffda", None, None), - "relationships": [ - CompanyToCompanyRelationship( - RelationshipRoleEnum.CARE_OF, # type: ignore - Location("", "Abc ffda", None, None), - CompanyRelationshipEnum.COMPANY, - "Youco24 Business Center", - ) - ], - }, - ), - ( - { - "location": Location( - "Iserlohn", "c/o Youco24 Business Center, Abc Str.", "42", "58644" - ), - "relationships": [], - }, - { - "location": Location("Iserlohn", "Abc Str.", "42", "58644"), - "relationships": [ - CompanyToCompanyRelationship( - RelationshipRoleEnum.CARE_OF, # type: ignore - Location("Iserlohn", "Abc Str.", "42", "58644"), - CompanyRelationshipEnum.COMPANY, - "Youco24 Business Center", - ) - ], - }, - ), - ( - { - "location": Location( - "Iserlohn", "Abc Str., c/o Youco24 Business Center", "42", "58644" - ), - "relationships": [], - }, - { - "location": Location("Iserlohn", "Abc Str.", "42", "58644"), - "relationships": [ - CompanyToCompanyRelationship( - RelationshipRoleEnum.CARE_OF, # type: ignore - Location("Iserlohn", "Abc Str.", "42", "58644"), - CompanyRelationshipEnum.COMPANY, - "Youco24 Business Center", - ) - ], - }, - ), - ( - { - "location": Location("Iserlohn", "Abc Str., c/o", "42", "58644"), - "relationships": [], - }, - { - "location": Location("Iserlohn", "Abc Str.", "42", "58644"), - "relationships": [], - }, - ), - ], -) -def test_map_co_relation(value: dict, expected_result: dict) -> None: - result = transform.map_co_relation(value) - assert result == expected_result - - @patch( - "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.map_co_relation" + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v1.v1.map_co_relation" ) @patch( - "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.map_company_id" + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v1.v1.V1_Transformer.map_company_id" ) @patch( - "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.name_from_beteiligung" + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v1.v1.V1_Transformer.name_from_beteiligung" ) @patch( - "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.loc_from_beteiligung" + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v1.v1.V1_Transformer.loc_from_beteiligung" ) @patch( - "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.map_last_update" + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v1.v1.V1_Transformer.map_last_update" ) @patch( - "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.map_rechtsform" + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v1.v1.V1_Transformer.map_rechtsform" ) @patch( - "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.map_capital" + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v1.v1.V1_Transformer.map_capital" ) @patch( - "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.map_business_purpose" + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v1.v1.V1_Transformer.map_business_purpose" ) @patch( - "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.map_founding_date" + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v1.v1.V1_Transformer.map_founding_date" ) @patch( - "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.parse_stakeholder" + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v1.v1.V1_Transformer.parse_stakeholder" ) def test_map_unternehmensregister_json( # noqa: PLR0913 mock_map_parse_stakeholder: Mock, diff --git a/tests/utils/data_extraction/unternehmensregister/transform/v3_test.py b/tests/utils/data_extraction/unternehmensregister/transform/v3_test.py new file mode 100644 index 0000000..d23e048 --- /dev/null +++ b/tests/utils/data_extraction/unternehmensregister/transform/v3_test.py @@ -0,0 +1,731 @@ +"""Testing utils/data_extraction/unternehmensregister/transform.py.""" +from unittest.mock import Mock, patch + +from aki_prj23_transparenzregister.models.company import ( + Capital, + CapitalTypeEnum, + Company, + CompanyID, + CompanyRelationshipEnum, + CompanyToCompanyRelationship, + CompanyTypeEnum, + CurrencyEnum, + DistrictCourt, + Location, + PersonName, + PersonToCompanyRelationship, + RelationshipRoleEnum, +) +from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v3.v3 import ( + V3_Transformer, +) + +transform = V3_Transformer() + + +def test_parse_stakeholder_org_hidden_in_person() -> None: + data = { + "tns:beteiligter": { + "tns:auswahl_beteiligter": { + "tns:natuerlichePerson": { + "tns:vollerName": {"tns:nachname": '"Some Company KG'}, + "tns:anschrift": {"tns:ort": "Area 51"}, + } + } + }, + "tns:rolle": {"tns:rollenbezeichnung": {"code": "275"}}, + } + expected_result = CompanyToCompanyRelationship( + role=RelationshipRoleEnum.KOMMANDITIST, # type: ignore + name="Some Company KG", + type=CompanyRelationshipEnum.COMPANY, + location=Location(**{"city": "Area 51"}), + ) + assert transform.parse_stakeholder(data) == expected_result + + +def test_parse_stakeholder_person() -> None: + data = { + "tns:beteiligter": { + "tns:auswahl_beteiligter": { + "tns:natuerlichePerson": { + "tns:vollerName": { + "tns:vorname": "Stephen", + "tns:nachname": "King", + }, + "tns:anschrift": {"tns:ort": "Maine"}, + "tns:geburt": {"tns:geburtsdatum": "1947-09-21"}, + } + } + }, + "tns:rolle": {"tns:rollenbezeichnung": {"code": "269"}}, + } + expected_result = PersonToCompanyRelationship( + role=RelationshipRoleEnum.GESCHAEFTSLEITER, # type: ignore + date_of_birth="1947-09-21", + name=PersonName(**{"firstname": "Stephen", "lastname": "King"}), + type=CompanyRelationshipEnum.PERSON, + location=Location(**{"city": "Maine"}), + ) + assert transform.parse_stakeholder(data) == expected_result + + +def test_parse_stakeholder_person_missing_date_of_birth() -> None: + data = { + "tns:beteiligter": { + "tns:auswahl_beteiligter": { + "tns:natuerlichePerson": { + "tns:vollerName": { + "tns:vorname": "Stephen", + "tns:nachname": "King", + }, + "tns:anschrift": {"tns:ort": "Maine"}, + } + } + }, + "tns:rolle": {"tns:rollenbezeichnung": {"code": "269"}}, + } + expected_result = PersonToCompanyRelationship( + role=RelationshipRoleEnum.GESCHAEFTSLEITER, # type: ignore + date_of_birth=None, + name=PersonName(**{"firstname": "Stephen", "lastname": "King"}), + type=CompanyRelationshipEnum.PERSON, + location=Location(**{"city": "Maine"}), + ) + assert transform.parse_stakeholder(data) == expected_result + + +def test_parse_stakeholder_org() -> None: + data = { + "tns:beteiligter": { + "tns:auswahl_beteiligter": { + "tns:organisation": { + "tns:bezeichnung": { + "tns:bezeichnung.aktuell": "Transparenzregister kG" + }, + "tns:anschrift": { + "tns:ort": "Iserlohn", + "tns:strasse": "Hauptstrasse", + "tns:hausnummer": "42", + "tns:postleitzahl": "58636", + }, + } + } + }, + "tns:rolle": {"tns:rollenbezeichnung": {"code": "268"}}, + } + expected_result = CompanyToCompanyRelationship( + name="Transparenzregister kG", + role=RelationshipRoleEnum.DIREKTOR, # type: ignore + type=CompanyRelationshipEnum.COMPANY, + location=Location( + **{ + "city": "Iserlohn", + "zip_code": "58636", + "house_number": "42", + "street": "Hauptstrasse", + } + ), + ) + assert transform.parse_stakeholder(data) == expected_result + + +def test_parse_stakeholder_org_loc_from_sitz() -> None: + data = { + "tns:beteiligter": { + "tns:auswahl_beteiligter": { + "tns:organisation": { + "tns:bezeichnung": { + "tns:bezeichnung.aktuell": "Transparenzregister kG" + }, + "tns:sitz": { + "tns:ort": "Iserlohn", + "tns:strasse": "Hauptstrasse", + "tns:hausnummer": "42", + "tns:postleitzahl": "58636", + }, + } + } + }, + "tns:rolle": {"tns:rollenbezeichnung": {"code": "268"}}, + } + expected_result = CompanyToCompanyRelationship( + name="Transparenzregister kG", + role=RelationshipRoleEnum.DIREKTOR, # type: ignore + type=CompanyRelationshipEnum.COMPANY, + location=Location( + **{ + "city": "Iserlohn", + "zip_code": "58636", + "house_number": "42", + "street": "Hauptstrasse", + } + ), + ) + assert transform.parse_stakeholder(data) == expected_result + + +def test_parse_stakeholder_no_result() -> None: + data: dict = {"tns:beteiligter": {"tns:auswahl_beteiligter": {}}} # type: ignore + assert transform.parse_stakeholder(data) is None + + +def test_loc_from_beteiligung() -> None: + data = { + "tns:grunddaten": { + "tns:verfahrensdaten": { + "tns:beteiligung": [ + { + "tns:beteiligter": { + "tns:auswahl_beteiligter": { + "tns:organisation": { + "tns:anschrift": { + "tns:strasse": "Gewerbestraße", + "tns:hausnummer": "8", + "tns:postleitzahl": "72535", + "tns:ort": "Heroldstatt", + }, + }, + } + } + }, + ] + } + } + } + + expected_result = Location( + city="Heroldstatt", house_number="8", street="Gewerbestraße", zip_code="72535" + ) + assert transform.loc_from_beteiligung(data) == expected_result + + +def test_loc_from_beteiligung_number_contained_in_street() -> None: + data = { + "tns:grunddaten": { + "tns:verfahrensdaten": { + "tns:beteiligung": [ + { + "tns:beteiligter": { + "tns:auswahl_beteiligter": { + "tns:organisation": { + "tns:anschrift": { + "tns:strasse": "Gewerbestraße8", + "tns:postleitzahl": "72535", + "tns:ort": "Heroldstatt", + }, + }, + } + } + }, + ] + } + } + } + + expected_result = Location( + city="Heroldstatt", house_number="8", street="Gewerbestraße", zip_code="72535" + ) + assert transform.loc_from_beteiligung(data) == expected_result + + +def test_loc_from_beteiligung_no_result() -> None: + data = { + "tns:grunddaten": { + "tns:verfahrensdaten": { + "tns:beteiligung": [ + { + "tns:beteiligter": { + "tns:auswahl_beteiligter": { + "tns:organisation": { + "tns:anschrift": { + "tns:postleitzahl": "72535", + "tns:ort": "Heroldstatt", + }, + }, + } + } + }, + ] + } + } + } + + expected_result = Location( + city="Heroldstatt", house_number=None, street=None, zip_code="72535" + ) + assert transform.loc_from_beteiligung(data) == expected_result + + +def test_loc_from_beteiligung_combine() -> None: + data = { + "tns:grunddaten": { + "tns:verfahrensdaten": { + "tns:beteiligung": [ + { + "tns:beteiligter": { + "tns:auswahl_beteiligter": { + "tns:organisation": { + "tns:anschrift": { + "tns:postleitzahl": "72535", + "tns:strasse": "Pliangenserstr. 40", + "tns:hausnummer": "a", + "tns:ort": "Heroldstatt", + }, + }, + } + } + }, + ] + } + } + } + + expected_result = Location( + city="Heroldstatt", + house_number="40a", + street="Pliangenserstraße", + zip_code="72535", + ) + assert transform.loc_from_beteiligung(data) == expected_result + + +def test_name_from_beteiligung() -> None: + data = { + "tns:grunddaten": { + "tns:verfahrensdaten": { + "tns:beteiligung": [ + { + "tns:beteiligter": { + "tns:auswahl_beteiligter": { + "tns:organisation": { + "tns:bezeichnung": { + "tns:bezeichnung.aktuell": "1 A Autenrieth Kunststofftechnik GmbH & Co. KG" + }, + }, + } + }, + } + ] + } + } + } + + expected_result = "1 A Autenrieth Kunststofftechnik GmbH & Co. KG" + assert transform.name_from_beteiligung(data) == expected_result + + +def test_name_from_beteiligung_remove_quotes() -> None: + data = { + "tns:grunddaten": { + "tns:verfahrensdaten": { + "tns:beteiligung": [ + { + "tns:beteiligter": { + "tns:auswahl_beteiligter": { + "tns:organisation": { + "tns:bezeichnung": { + "tns:bezeichnung.aktuell": '"Siemes Verwaltungs-GmbH"' + }, + }, + } + }, + } + ] + } + } + } + + expected_result = "Siemes Verwaltungs-GmbH" + assert transform.name_from_beteiligung(data) == expected_result + + +def test_map_rechtsform() -> None: + data = { + "tns:fachdatenRegister": { + "tns:basisdatenRegister": { + "tns:rechtstraeger": { + "tns:angabenZurRechtsform": { + "tns:rechtsform": { + "code": "Gesellschaft mit beschränkter Haftung" + }, + } + }, + } + } + } + expected_result = CompanyTypeEnum.GMBH + assert transform.map_rechtsform("", data) == expected_result + + +def test_map_rechtsform_from_name() -> None: + data = [ + ("GEA Farm Technologies GmbH", "Gesellschaft mit beschränkter Haftung"), + ("Atos SE", "Europäische Aktiengesellschaft (SE)"), + ("Bilkenroth KG", "Kommanditgesellschaft"), + ("jfoiahfo8sah 98548902 öhz ö", None), + ] + + for company_name, expected_result in data: + assert transform.map_rechtsform(company_name, {}) == expected_result + + +def test_map_capital_kg_single() -> None: + capital = Capital( + currency=CurrencyEnum.EURO, value=69000, type=CapitalTypeEnum.HAFTEINLAGE # type: ignore + ) + data = { + "tns:fachdatenRegister": { + "tns:auswahl_zusatzangaben": { + "tns:personengesellschaft": { + "tns:zusatzKG": { + "tns:datenKommanditist": { + "tns:hafteinlage": { + "tns:zahl": str(capital.value), + "tns:waehrung": {"code": capital.currency}, + }, + } + } + } + } + } + } + + result = transform.map_capital(data, CompanyTypeEnum.KG) # type: ignore + assert result == capital + + +def test_map_capital_kg_sum() -> None: + capital = Capital( + currency=CurrencyEnum.EURO, value=20000, type=CapitalTypeEnum.HAFTEINLAGE # type: ignore + ) + data = { + "tns:fachdatenRegister": { + "tns:auswahl_zusatzangaben": { + "tns:personengesellschaft": { + "tns:zusatzKG": { + "tns:datenKommanditist": [ + { + "tns:hafteinlage": { + "tns:zahl": str(10000), + "tns:waehrung": {"code": capital.currency}, + } + }, + { + "tns:hafteinlage": { + "tns:zahl": str(10000), + "tns:waehrung": {"code": capital.currency}, + }, + }, + ] + } + } + } + } + } + + result = transform.map_capital(data, CompanyTypeEnum.KG) # type: ignore + assert result == capital + + +def test_map_capital_no_fachdaten() -> None: + data: dict = {"tns:fachdatenRegister": {}} + + result = transform.map_capital(data, CompanyTypeEnum.KG) # type: ignore + assert result is None + + +def test_map_capital_gmbh() -> None: + capital = Capital( + currency=CurrencyEnum.DEUTSCHE_MARK, value=42, type=CapitalTypeEnum.STAMMKAPITAL # type: ignore + ) + data = { + "tns:fachdatenRegister": { + "tns:auswahl_zusatzangaben": { + "tns:kapitalgesellschaft": { + "tns:zusatzGmbH": { + "tns:stammkapital": { + "tns:zahl": str(capital.value), + "tns:waehrung": {"code": capital.currency}, + }, + } + } + } + } + } + + result = transform.map_capital(data, CompanyTypeEnum.GMBH) # type: ignore + assert result == capital + + +def test_map_capital_ag() -> None: + capital = Capital( + currency=CurrencyEnum.DEUTSCHE_MARK, value=42, type=CapitalTypeEnum.GRUNDKAPITAL # type: ignore + ) + data = { + "tns:fachdatenRegister": { + "tns:auswahl_zusatzangaben": { + "tns:kapitalgesellschaft": { + "tns:zusatzAktiengesellschaft": { + "tns:grundkapital": { + "tns:hoehe": { + "tns:zahl": str(capital.value), + "tns:waehrung": {"code": capital.currency}, + } + }, + } + } + } + } + } + + result = transform.map_capital(data, CompanyTypeEnum.SE) # type: ignore + assert result == capital + + +def test_map_capital_personengesellschaft() -> None: + capital = Capital( + currency=CurrencyEnum.DEUTSCHE_MARK, value=42, type=CapitalTypeEnum.STAMMKAPITAL # type: ignore + ) + data = { + "tns:fachdatenRegister": { + "tns:auswahl_zusatzangaben": { + "tns:personengesellschaft": { + "tns:zusatzGmbH": { + "tns:stammkapital": { + "tns:zahl": str(capital.value), + "tns:waehrung": {"code": capital.currency}, + }, + } + } + } + } + } + + result = transform.map_capital(data, CompanyTypeEnum.OHG) # type: ignore + assert result == capital + + +def test_map_capital_einzelkaufmann() -> None: + capital = Capital( + currency=CurrencyEnum.DEUTSCHE_MARK, value=42, type=CapitalTypeEnum.STAMMKAPITAL # type: ignore + ) + data = { + "tns:fachdatenRegister": { + "tns:auswahl_zusatzangaben": { + "Personengesellschaft": { + "tns:zusatzGmbH": { + "tns:stammkapital": { + "tns:zahl": str(capital.value), + "tns:waehrung": {"code": capital.currency}, + }, + } + } + } + } + } + + result = transform.map_capital(data, CompanyTypeEnum.EINZELKAUFMANN) # type: ignore + assert result is None + + +def test_map_capital_partial_null_values() -> None: + capital = Capital( + currency=CurrencyEnum.DEUTSCHE_MARK, value=42, type=CapitalTypeEnum.STAMMKAPITAL # type: ignore + ) + data = { + "tns:fachdatenRegister": { + "tns:auswahl_zusatzangaben": { + "tns:personengesellschaft": { + "tns:zusatzGmbH": { + "tns:stammkapital": { + "tns:zahl": None, + "tns:waehrung": {"code": capital.currency}, + }, + } + } + } + } + } + + result = transform.map_capital(data, CompanyTypeEnum.OHG) # type: ignore + assert result is None + + +def test_map_business_purpose() -> None: + business_purpose = "Handel mit Betäubungsmitteln aller Art" + data = { + "tns:fachdatenRegister": { + "tns:basisdatenRegister": {"tns:gegenstand": business_purpose} + } + } + + result = transform.map_business_purpose(data) + assert result == business_purpose + + +def test_map_business_purpose_no_result() -> None: + data: dict = {} + + result = transform.map_business_purpose(data) + assert result is None + + +def test_map_founding_date_from_tag_der_ersten_eintragung() -> None: + data = { + "some entry": "Tag der ersten Eintragung: 01.05.2004", + "some other entry": "hfjdoöiashföahöf iodsazo8 5z4o fdsha8oü gfdsö", + } + expected_result = "2004-05-01" + result = transform.map_founding_date(data) + assert result == expected_result + + +def test_map_founding_date_from_gesellschaftsvertrag() -> None: + data = { + "some entry": "hfjdoöiashföahöf iodsazo8 5z4o fdsha8oü gfdsö", + "some other entry": "Das Wesen der Rekursion ist der Selbstaufruf Gesellschaftsvertrag vom 22.12.1996 Hallo Welt", + } + expected_result = "1996-12-22" + result = transform.map_founding_date(data) + assert result == expected_result + + +def test_map_founding_date_from_gruendungsdatum() -> None: + data = { + "tns:fachdatenRegister": { + "tns:basisdatenRegister": { + "tns:satzungsdatum": {"tns:aktuellesSatzungsdatum": "1998-01-01"} + } + } + } + expected_result = "1998-01-01" + result = transform.map_founding_date(data) + assert result == expected_result + + +def test_map_founding_date_no_result() -> None: + data: dict = {"tns:fachdatenRegister": {"tns:basisdatenRegister": {}}} + result = transform.map_founding_date(data) + assert result is None + + +def test_map_company_id() -> None: + district_court = DistrictCourt("Amtsgericht Ulm", "Ulm") + company_id = CompanyID(district_court, "HRA 4711") + data = { + "tns:grunddaten": { + "tns:verfahrensdaten": { + "tns:instanzdaten": { + "tns:aktenzeichen": { + "tns:auswahl_aktenzeichen": { + "tns:aktenzeichen.freitext": company_id.hr_number + } + }, + }, + "tns:beteiligung": [ + {}, + { + "tns:beteiligter": { + "tns:auswahl_beteiligter": { + "tns:organisation": { + "tns:bezeichnung": { + "tns:bezeichnung.aktuell": district_court.name + }, + "tns:anschrift": { + "tns:ort": district_court.city, + }, + } + } + }, + }, + ], + }, + }, + } + result = transform.map_company_id(data) + assert result == company_id + + +def test_map_last_update() -> None: + date = "2024-01-01" + data = {"tns:fachdatenRegister": {"tns:auszug": {"tns:letzteEintragung": date}}} + result = transform.map_last_update(data) + assert result == date + + +@patch( + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v3.v3.map_co_relation" +) +@patch( + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v3.v3.V3_Transformer.map_company_id" +) +@patch( + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v3.v3.V3_Transformer.name_from_beteiligung" +) +@patch( + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v3.v3.V3_Transformer.loc_from_beteiligung" +) +@patch( + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v3.v3.V3_Transformer.map_last_update" +) +@patch( + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v3.v3.V3_Transformer.map_rechtsform" +) +@patch( + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v3.v3.V3_Transformer.map_capital" +) +@patch( + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v3.v3.V3_Transformer.map_business_purpose" +) +@patch( + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v3.v3.V3_Transformer.map_founding_date" +) +@patch( + "aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v3.v3.V3_Transformer.parse_stakeholder" +) +def test_map_unternehmensregister_json( # noqa: PLR0913 + mock_map_parse_stakeholder: Mock, + mock_map_founding_date: Mock, + mock_map_business_purpose: Mock, + mock_map_capital: Mock, + mock_map_rechtsform: Mock, + mock_map_last_update: Mock, + mock_loc_from_beteiligung: Mock, + mock_map_name_from_beteiligung: Mock, + mock_map_company_id: Mock, + mock_map_co_relation: Mock, +) -> None: + expected_result = Company( + **{ # type: ignore + "id": Mock(), + "name": Mock(), + "location": Mock(), + "last_update": Mock(), + "company_type": Mock(), + "capital": Mock(), + "business_purpose": Mock(), + "founding_date": Mock(), + "relationships": [Mock()], + } + ) + + mock_map_company_id.return_value = expected_result.id + mock_map_name_from_beteiligung.return_value = expected_result.name + mock_loc_from_beteiligung.return_value = expected_result.location + mock_map_last_update.return_value = expected_result.last_update + mock_map_rechtsform.return_value = expected_result.company_type + mock_map_capital.return_value = expected_result.capital + mock_map_business_purpose.return_value = expected_result.business_purpose + mock_map_founding_date.return_value = expected_result.founding_date + mock_map_parse_stakeholder.return_value = expected_result.relationships[0] + mock_map_co_relation.side_effect = lambda x: x + + data: dict = { + "rootLayerWithSomeStuipStringNooneCaresAbout": { + "tns:grunddaten": {"tns:verfahrensdaten": {"tns:beteiligung": [{}, {}, {}]}} + } + } + + result = transform.map_unternehmensregister_json(data) + assert result == expected_result