From 9d7bb07989ffbf837a81121eb38cfa14eda0b4a2 Mon Sep 17 00:00:00 2001 From: TrisNol Date: Sun, 29 Oct 2023 14:46:06 +0100 Subject: [PATCH] checkpoint: Adapt data transformation to new structure --- .gitignore | 4 + .../apps/find_missing_companies.py | 7 +- tmp/transform.py | 645 ++++++++++++++++++ tmp/transformation.ipynb | 90 +++ 4 files changed, 743 insertions(+), 3 deletions(-) create mode 100644 tmp/transform.py create mode 100644 tmp/transformation.ipynb diff --git a/.gitignore b/.gitignore index 38bc337..4e8f59e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ +# Data blobs +**/*.xml +**/*.json + # LaTeX temp files **/*.aux **/*-blx.bib diff --git a/src/aki_prj23_transparenzregister/apps/find_missing_companies.py b/src/aki_prj23_transparenzregister/apps/find_missing_companies.py index 513b256..d4cf188 100644 --- a/src/aki_prj23_transparenzregister/apps/find_missing_companies.py +++ b/src/aki_prj23_transparenzregister/apps/find_missing_companies.py @@ -43,13 +43,14 @@ if __name__ == "__main__": configer_logger(namespace=parsed) config = parsed.config session = connector.get_session(get_config_provider(config)) - missing_companies = session.query(entities.MissingCompany).all() + # missing_companies = session.query(entities.MissingCompany).all() + missing_companies = ["GEA Farm Technologies"] counter = 0 # Scrape data from unternehmensregister for company in missing_companies: - print(company.name) - extract.scrape(company.name, ["tmp", "xml"]) + print(company) + extract.scrape(company, ["tmp", "xml"]) counter = counter + 1 if counter == 5: break diff --git a/tmp/transform.py b/tmp/transform.py new file mode 100644 index 0000000..b876d41 --- /dev/null +++ b/tmp/transform.py @@ -0,0 +1,645 @@ +"""Transform raw Unternehmensregister export (*.xml) to processed .json files for loading.""" +import dataclasses +import glob +import json +import os +import re +import sys + +import xmltodict +from tqdm import tqdm + +from aki_prj23_transparenzregister.models.company import ( + Capital, + CapitalTypeEnum, + Company, + CompanyID, + CompanyRelationship, + CompanyRelationshipEnum, + CompanyToCompanyRelationship, + CompanyTypeEnum, + CurrencyEnum, + DistrictCourt, + Location, + PersonName, + PersonToCompanyRelationship, + RelationshipRoleEnum, +) +from aki_prj23_transparenzregister.utils.string_tools import ( + remove_traling_and_leading_quotes, + transform_date_to_iso, +) + + +def transform_xml_to_json(source_dir: str, target_dir: str) -> None: + """Convert all xml files in a directory to json files. + + Args: + source_dir (str): Directory hosting the xml files + target_dir (str): Target directory to move json files to + """ + if not os.path.exists(target_dir): + os.makedirs(target_dir) + for source_path in [ + os.path.normpath(i) for i in glob.glob(source_dir + "**/*.xml", recursive=True) + ]: + target_path = os.path.join( + target_dir, source_path.split(os.sep)[-1].replace(".xml", ".json") + ) + + with open(source_path, encoding="utf-8") as source_file: + # deepcode ignore HandleUnicode: Weird XML format no other solution + data = xmltodict.parse(source_file.read().encode()) + with open(target_path, "w", encoding="utf-8") as json_file: + json_file.write(json.dumps(data)) + + +def parse_date_of_birth(data: dict) -> str | None: + """Retreives the date of birth from a stakeholder entry if possible. + + Args: + data (dict): Stakeholder data + + Returns: + str | None: date of birth or None if not found + """ + if "tns:geburt" in (base := data["tns:beteiligter"]["tns:auswahl_beteililgter"]["tns:natuerlichePerson"]): + base = base["tns:geburt"]["tns:geburtsdatum"] + if isinstance(base, str): + return base + return None + +# def map_role_id_to_enum(role_id: str) -> RelationshipRoleEnum: + + +def parse_stakeholder(data: dict) -> CompanyRelationship | None: + """Extract the company stakeholder/relation from a single "Beteiligung". + + Args: + data (dict): Data export + + Returns: + CompanyRelationship | None: Relationship if it could be processed + """ + if "tns:natuerlichePerson" in data["tns:beteiligter"]["tns:auswahl_beteiligter"]: + # It's a Company serving as a "Kommanditist" or similar + # if data["Beteiligter"]["Natuerliche_Person"]["Voller_Name"]["Vorname"] is None: + # return CompanyToCompanyRelationship( + # **{ # type: ignore + # "name": remove_traling_and_leading_quotes( + # data["Beteiligter"]["Natuerliche_Person"]["Voller_Name"][ + # "Nachname" + # ] + # ), + # "location": Location( + # **{ + # "city": data["Beteiligter"]["Natuerliche_Person"][ + # "Anschrift" + # ][-1]["Ort"] + # if isinstance( + # data["Beteiligter"]["Natuerliche_Person"]["Anschrift"], + # list, + # ) + # else data["Beteiligter"]["Natuerliche_Person"]["Anschrift"][ + # "Ort" + # ] + # } + # ), + # "role": RelationshipRoleEnum( + # data["Rolle"]["Rollenbezeichnung"]["content"] + # ), + # "type": CompanyRelationshipEnum.COMPANY, + # } + # ) + return PersonToCompanyRelationship( + **{ # type: ignore + "name": PersonName( + **{ + "firstname": data["tns:beteiligter"]["tns:auswahl_beteiligter"]["tns:natuerlichePerson"][ + "tns:vollerName" + ]["tns:vorname"], + "lastname": data["tns:beteiligter"]["tns:auswahl_beteiligter"]["tns:natuerlichePerson"][ + "tns:vollerName" + ]["tns:nachname"], + } + ), + "date_of_birth": parse_date_of_birth(data), + "location": Location( + **{ + "city": data["tns:beteiligter"]["tns:auswahl_beteiligter"]["tns:natuerlichePerson"]["tns:anschrift"][ + -1 + ]["tns:ort"] + if isinstance( + data["tns:beteiligter"]["tns:auswahl_beteiligter"]["tns:natuerlichePerson"]["tns:anschrift"], list + ) + else data["tns:beteiligter"]["tns:auswahl_beteiligter"]["tns:natuerlichePerson"]["tns:anschrift"][ + "tns:ort" + ] + } + ), + # TODO get role via ID + "role": RelationshipRoleEnum( + data["Rolle"]["Rollenbezeichnung"]["content"] + ), + "type": CompanyRelationshipEnum.PERSON, + } + ) + if "Organisation" in data["Beteiligter"]: + return CompanyToCompanyRelationship( + **{ # type: ignore + "role": RelationshipRoleEnum( + data["Rolle"]["Rollenbezeichnung"]["content"] + ), + "name": remove_traling_and_leading_quotes( + data["Beteiligter"]["Organisation"]["Bezeichnung"][ + "Bezeichnung_Aktuell" + ] + ), + "location": Location( + **{ + "city": data["Beteiligter"]["Organisation"]["Anschrift"]["Ort"], + "street": data["Beteiligter"]["Organisation"]["Anschrift"][ + "Strasse" + ] + if "Strasse" in data["Beteiligter"]["Organisation"]["Anschrift"] + else None, + "house_number": data["Beteiligter"]["Organisation"][ + "Anschrift" + ]["Hausnummer"] + if "Hausnummer" + in data["Beteiligter"]["Organisation"]["Anschrift"] + else None, + "zip_code": data["Beteiligter"]["Organisation"]["Anschrift"][ + "Postleitzahl" + ] + if "Postleitzahl" + in data["Beteiligter"]["Organisation"]["Anschrift"] + else None, + } + ), + "type": CompanyRelationshipEnum.COMPANY, + } + ) + return None + + +def normalize_street(street: str) -> str: + """Normalize street names by extending them to `Straße` or `straße`. + + Args: + street (str): Name of street + + Returns: + str: Normalized street name + """ + if street is None: + return None + regex = r"(Str\.|Strasse)" + street = re.sub(regex, "Straße", street) + regex = r"(str\.|strasse)" + street = re.sub(regex, "straße", street) + return street.strip() + + +def loc_from_beteiligung(data: dict) -> Location: + """Extract the company location from the first relationship in the export. + + Args: + data (dict): Data export + + Returns: + Location: location + """ + base_path = [ + "tns:grunddaten", + "tns:verfahrensdaten", + "tns:beteiligung", + 0, + "tns:beteiligter", + "tns:auswahl_beteiligter", + "tns:organisation", + "tns:anschrift" + ] + base = traversal(data, base_path) + + house_number = None + street = None + if "tns:strasse" in base: + regex = r".(\d+)$" + hits = re.findall(regex, base["tns:strasse"]) + if len(hits) == 1: + house_number = hits[0] + street = base["tns:strasse"][: (-1 * len(house_number))] + if "tns:hausnummer" in base: + house_number = house_number + base["tns:hausnummer"] + else: + if "tns:hausnummer" in base: + house_number = base["tns:hausnummer"] + street = base["tns:strasse"] + return Location( + **{ + "city": base["tns:ort"], + "zip_code": base["tns:postleitzahl"], + "street": normalize_street(street), # type: ignore + "house_number": house_number, + } + ) + + +def name_from_beteiligung(data: dict) -> str: + """Extract the Company name from an Unternehmensregister export by using the first relationship found. + + Args: + data (dict): Data export + + Returns: + str: Company name + """ + path = [ + "tns:grunddaten", + "tns:verfahrensdaten", + "tns:beteiligung", + 0, + "tns:beteiligter", + "tns:auswahl_beteiligter", + "tns:organisation", + "tns:bezeichnung", + "tns:bezeichnung.aktuell" + ] + name = traversal(data, path) + return remove_traling_and_leading_quotes(name) + + +def map_rechtsform(company_name: str, data: dict) -> CompanyTypeEnum | None: + """Extracts the company type from a given Unternehmensregister export. + + Args: + company_name (str): Name of the company as a fallback solution + data (dict): Data export + + Returns: + CompanyTypeEnum | None: Company type if found + """ + try: + path = [ + "tns:fachdatenRegister", + "tns:basisdatenRegister", + "tns:rechtstraeger", + "tns:angabenZurRechtsform", + "tns:rechtsform", + "code" + ] + return CompanyTypeEnum( + traversal(data, path) + ) + except Exception: + if ( + company_name.endswith("GmbH") + or company_name.endswith("UG") + or company_name.endswith("UG (haftungsbeschränkt)") + ): + return CompanyTypeEnum("Gesellschaft mit beschränkter Haftung") + if company_name.endswith("SE"): + return CompanyTypeEnum("Europäische Aktiengesellschaft (SE)") + if company_name.endswith("KG"): + return CompanyTypeEnum("Kommanditgesellschaft") + return None + + +def map_capital(data: dict, company_type: CompanyTypeEnum) -> Capital | None: + """Extracts the company capital from the given Unternehmensregister export. + + Args: + data (dict): Data export + company_type (CompanyTypeEnum): Type of company (e.g., 'Gesellschaft mit beschränkter Haftung') + + Returns: + Capital | None: Company Capital if found + """ + # Early return + if "tns:auswahl_zusatzangaben" not in data["tns:fachdatenRegister"]: + return None + capital: dict = {"Zahl": 0.0, "Waehrung": ""} + if company_type == CompanyTypeEnum.KG: + capital_type = "Hafteinlage" + base = data["tns:fachdatenRegister"]["tns:auswahl_zusatzangaben"][ + "tns:personengesellschaft" + ]["tns:zusatzKG"]["tns:datenKommanditist"] + if isinstance(base, list): + for entry in base: + # TODO link to persons using Ref_Rollennummer then extract ["Hafteinlage"] as below + capital["Zahl"] = capital["Zahl"] + float(entry["Hafteinlage"]["Zahl"]) + capital["Waehrung"] = entry["Hafteinlage"]["Waehrung"] + elif isinstance(base, dict): + capital = base["Hafteinlage"] + elif company_type in [ + CompanyTypeEnum.GMBH, + CompanyTypeEnum.SE, + CompanyTypeEnum.AG, + CompanyTypeEnum.KGaA, + CompanyTypeEnum.AUSLAENDISCHE_RECHTSFORM, + CompanyTypeEnum.OHG, + ]: + if ( + "tns:kapitalgesellschaft" + not in data["tns:fachdatenRegister"]["tns:auswahl_zusatzangaben"] + ): + base = data["tns:fachdatenRegister"]["tns:auswahl_zusatzangaben"][ + "tns:personengesellschaft" + ] + else: + base = data["tns:fachdatenRegister"]["tns:auswahl_zusatzangaben"][ + "tns:kapitalgesellschaft" + ] + if "tns:zusatzGmbH" in base: + capital_type = "Stammkapital" + capital = base["tns:zusatzGmbH"]["tns:stammkapital"] + elif "tns:zusatzAktiengesellschaft" in base: + capital_type = "Grundkapital" + capital = base["tns:zusatzAktiengesellschaft"]["tns:grundkapital"]["tns:zahl"] + elif company_type in [ + CompanyTypeEnum.EINZELKAUFMANN, + CompanyTypeEnum.EG, + CompanyTypeEnum.PARTNERSCHAFT, + CompanyTypeEnum.PARTNERGESELLSCHAFT, + CompanyTypeEnum.PARTNERSCHAFTSGESELLSCHAFT, + None, + ]: + return None + # Catch entries having the dict but with null values + if not all(capital.values()): + return None + return Capital( + **{ # type: ignore + "value": float(capital["tns:zahl"]), + "currency": CurrencyEnum(capital["tns:waehrung"]["code"]), + "type": CapitalTypeEnum(capital_type), + } + ) + + +def map_business_purpose(data: dict) -> str | None: + """Extracts the "Geschäftszweck" from a given Unternehmensregister export. + + Args: + data (dict): Data export + + Returns: + str | None: Business purpose if found + """ + try: + path = [ + "tns:fachdatenRegister", + "tns:basisdatenRegister", + "tns:gegenstand" + ] + return traversal(data, path) + except KeyError: + return None + + +def extract_date_from_string(value: str) -> str | None: + """Extract a date in ISO format from the given string if possible. + + Args: + value (str): Input text + + Returns: + str | None: Date in ISO format, None if not found + """ + date_regex = [ # type: ignore + {"regex": r"\d{1,2}\.\d{1,2}\.\d{4}", "mapper": transform_date_to_iso}, + {"regex": r"\d{4}-\d{1,2}-\d{1,2}", "mapper": None}, + ] + results = [] + for regex in date_regex: + result = re.findall(regex["regex"], value) # type: ignore + if len(result) == 1: + relevant_data = result[0] + if regex["mapper"] is not None: # type: ignore + results.append(regex["mapper"](relevant_data)) # type: ignore + else: + results.append(relevant_data) + if len(results) != 1: + return None + return results[0] + + +def map_founding_date(data: dict) -> str | None: + """Extracts the founding date from a given Unternehmensregister export. + + Args: + data (dict): Data export + + Returns: + str | None: Founding date if found + """ + text = str(data) + entry_date = re.findall( + r".Tag der ersten Eintragung:(\\n| )?(\d{1,2}\.\d{1,2}\.\d{2,4})", text + ) + if len(entry_date) == 1: + return transform_date_to_iso(entry_date[0][1]) + + entry_date = re.findall( + r".Gesellschaftsvertrag vom (\d{1,2}\.\d{1,2}\.\d{2,4})", text + ) + if len(entry_date) == 1: + return transform_date_to_iso(entry_date[0]) + if ( + "tns:satzungsdatum" + in data["tns:fachdatenRegister"]["tns:basisdatenRegister"] + ): + path = [ + "tns:fachdatenRegister", + "tns:basisdatenRegister", + "tns:satzungsdatum", + "tns:aktuellesSatzungsdatum" + ] + return traversal(data, path) + # No reliable answer + return None + +def traversal(data: dict, path: list[str | int]) -> any: + current = data + for key in path: + try: + current = current[key] + except: + raise KeyError(f"Key {key} not found") + return current + + +def map_hr_number(data: dict) -> str: + hr_prefix = data["tns:grunddaten"]["tns:verfahrensdaten"]["tns:instanzdaten"][ + "tns:aktenzeichen" + ]["tns:auswahl_aktenzeichen"]["tns:aktenzeichen.strukturiert"]["tns:register"][ + "code" + ] + hr_number = data["tns:grunddaten"]["tns:verfahrensdaten"]["tns:instanzdaten"][ + "tns:aktenzeichen" + ]["tns:auswahl_aktenzeichen"]["tns:aktenzeichen.strukturiert"]["tns:laufendeNummer"] + hr_full = f"{hr_prefix} {hr_number}" + return hr_full + +def map_district_court(data: dict) -> DistrictCourt: + base_path = [ + "tns:grunddaten", + "tns:verfahrensdaten", + "tns:beteiligung", + 1, + "tns:beteiligter", + "tns:auswahl_beteiligter", + "tns:organisation" + ] + path = [*base_path, + "tns:bezeichnung", + "tns:bezeichnung.aktuell" + ] + name = traversal(data, path) + path = [*base_path, + "tns:sitz", + "tns:ort" + ] + city = traversal(data, path) + return DistrictCourt(name=name, city=city) + + +def map_company_id(data: dict) -> CompanyID: + """Retrieve Company ID from export. + + Args: + data (dict): Data export + + Returns: + CompanyID: ID of the company + """ + return CompanyID( + **{ + "hr_number": map_hr_number(data), + "district_court": map_district_court(data) + } + ) + + +def map_last_update(data: dict) -> str: + """Extract last update date from export. + + Args: + data (dict): Unternehmensregister export + + Returns: + str: Last update date + """ + path = [ + "tns:fachdatenRegister", + "tns:auszug", + "tns:letzteEintragung" + ] + return traversal(data, path) + + +def map_co_relation(data: dict) -> dict: + """Search for and map the c/o relation from location.street if possible. + + Args: + data (dict): Company dict + + Returns: + dict: Modified Company dict + """ + street = data["location"].street + if street is None: + return data + parts = street.split(",") + co_company = None + co_company_index = None + for index, part in enumerate(parts): + trimmed_part = part.strip() + result = re.findall(r"^c\/o(.*)$", trimmed_part) + if len(result) == 1: + co_company = result[0].strip() + co_company_index = index + if co_company_index is not None: + del parts[co_company_index] + street = "".join(parts).strip() + data["location"].street = street + + if co_company is not None and co_company != "": + relation = CompanyToCompanyRelationship( + RelationshipRoleEnum.CARE_OF, # type: ignore + Location( + data["location"].city, + street, + data["location"].house_number, + data["location"].zip_code, + ), + CompanyRelationshipEnum.COMPANY, # type: ignore + co_company, + ) + data["relationships"].append(relation) + return data + + +def map_unternehmensregister_json(data: dict) -> Company: + """Processes the Unternehmensregister structured export to a Company by using several helper methods. + + Args: + data (dict): Data export + + Returns: + Company: Transformed data + """ + root_key = list(data.keys())[0] + data = data[root_key] + result: dict = {"relationships": []} + + result["id"] = map_company_id(data) + result["name"] = name_from_beteiligung(data) + + result["location"] = loc_from_beteiligung(data) + result["last_update"] = map_last_update(data) + + result["company_type"] = map_rechtsform(result["name"], data) + result["capital"] = map_capital(data, result["company_type"]) + result["business_purpose"] = map_business_purpose(data) + result["founding_date"] = map_founding_date(data) + + # TODO adapt... + # for i in range( + # 2, len(data["tns:grunddaten"]["tns:verfahrensdaten"]["tns:beteiligung"]) + # ): + # people = parse_stakeholder( + # data["tns:grunddaten"]["tns:verfahrensdaten"]["tns:beteiligung"][i] + # ) + # result["relationships"].append(people) + result = map_co_relation(result) + return Company(**result) + + +if __name__ == "__main__": + from loguru import logger + + base_path = "./Jupyter/API-tests/Unternehmensregister/data/Unternehmensregister" + for file in tqdm(glob.glob1(f"{base_path}/export", "*.json")): + path = os.path.join(f"{base_path}/export", file) + with open(path, encoding="utf-8") as file_object: + try: + company: Company = map_unternehmensregister_json( + json.loads(file_object.read()) + ) + + name = "".join(e for e in company.name if e.isalnum())[:50] + + with open( + f"{base_path}/transformed/{name}.json", + "w+", + encoding="utf-8", + ) as export_file: + json.dump( + dataclasses.asdict(company), export_file, ensure_ascii=False + ) + except Exception as e: + logger.error(e) + logger.error(f"Error in processing {path}") + sys.exit(1) diff --git a/tmp/transformation.ipynb b/tmp/transformation.ipynb new file mode 100644 index 0000000..6fd84ae --- /dev/null +++ b/tmp/transformation.ipynb @@ -0,0 +1,90 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "'Beteiligter'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32mc:\\Users\\trist\\Documents\\Code\\M.Sc\\aki_prj23_transparenzregister\\tmp\\transformation.ipynb Cell 1\u001b[0m line \u001b[0;36m6\n\u001b[0;32m 4\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39m(\u001b[39m'\u001b[39m\u001b[39m../tmp/json/GEAFarmTechnologiesGmbH.json\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mr\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39mas\u001b[39;00m file:\n\u001b[0;32m 5\u001b[0m content \u001b[39m=\u001b[39m json\u001b[39m.\u001b[39mload(file)\n\u001b[1;32m----> 6\u001b[0m company_data \u001b[39m=\u001b[39m map_unternehmensregister_json(content)\n", + "File \u001b[1;32mc:\\Users\\trist\\Documents\\Code\\M.Sc\\aki_prj23_transparenzregister\\tmp\\transform.py:609\u001b[0m, in \u001b[0;36mmap_unternehmensregister_json\u001b[1;34m(data)\u001b[0m\n\u001b[0;32m 605\u001b[0m \u001b[39m# TODO adapt...\u001b[39;00m\n\u001b[0;32m 606\u001b[0m \u001b[39mfor\u001b[39;00m i \u001b[39min\u001b[39;00m \u001b[39mrange\u001b[39m(\n\u001b[0;32m 607\u001b[0m \u001b[39m2\u001b[39m, \u001b[39mlen\u001b[39m(data[\u001b[39m\"\u001b[39m\u001b[39mtns:grunddaten\u001b[39m\u001b[39m\"\u001b[39m][\u001b[39m\"\u001b[39m\u001b[39mtns:verfahrensdaten\u001b[39m\u001b[39m\"\u001b[39m][\u001b[39m\"\u001b[39m\u001b[39mtns:beteiligung\u001b[39m\u001b[39m\"\u001b[39m])\n\u001b[0;32m 608\u001b[0m ):\n\u001b[1;32m--> 609\u001b[0m people \u001b[39m=\u001b[39m parse_stakeholder(\n\u001b[0;32m 610\u001b[0m data[\u001b[39m\"\u001b[39;49m\u001b[39mtns:grunddaten\u001b[39;49m\u001b[39m\"\u001b[39;49m][\u001b[39m\"\u001b[39;49m\u001b[39mtns:verfahrensdaten\u001b[39;49m\u001b[39m\"\u001b[39;49m][\u001b[39m\"\u001b[39;49m\u001b[39mtns:beteiligung\u001b[39;49m\u001b[39m\"\u001b[39;49m][i]\n\u001b[0;32m 611\u001b[0m )\n\u001b[0;32m 612\u001b[0m result[\u001b[39m\"\u001b[39m\u001b[39mrelationships\u001b[39m\u001b[39m\"\u001b[39m]\u001b[39m.\u001b[39mappend(people)\n\u001b[0;32m 613\u001b[0m result \u001b[39m=\u001b[39m map_co_relation(result)\n", + "File \u001b[1;32mc:\\Users\\trist\\Documents\\Code\\M.Sc\\aki_prj23_transparenzregister\\tmp\\transform.py:82\u001b[0m, in \u001b[0;36mparse_stakeholder\u001b[1;34m(data)\u001b[0m\n\u001b[0;32m 73\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mparse_stakeholder\u001b[39m(data: \u001b[39mdict\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m CompanyRelationship \u001b[39m|\u001b[39m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m 74\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Extract the company stakeholder/relation from a single \"Beteiligung\".\u001b[39;00m\n\u001b[0;32m 75\u001b[0m \n\u001b[0;32m 76\u001b[0m \u001b[39m Args:\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 80\u001b[0m \u001b[39m CompanyRelationship | None: Relationship if it could be processed\u001b[39;00m\n\u001b[0;32m 81\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m---> 82\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39mNatuerliche_Person\u001b[39m\u001b[39m\"\u001b[39m \u001b[39min\u001b[39;00m data[\u001b[39m\"\u001b[39;49m\u001b[39mBeteiligter\u001b[39;49m\u001b[39m\"\u001b[39;49m]:\n\u001b[0;32m 83\u001b[0m \u001b[39m# It's a Company serving as a \"Kommanditist\" or similar\u001b[39;00m\n\u001b[0;32m 84\u001b[0m \u001b[39mif\u001b[39;00m data[\u001b[39m\"\u001b[39m\u001b[39mBeteiligter\u001b[39m\u001b[39m\"\u001b[39m][\u001b[39m\"\u001b[39m\u001b[39mNatuerliche_Person\u001b[39m\u001b[39m\"\u001b[39m][\u001b[39m\"\u001b[39m\u001b[39mVoller_Name\u001b[39m\u001b[39m\"\u001b[39m][\u001b[39m\"\u001b[39m\u001b[39mVorname\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m 85\u001b[0m \u001b[39mreturn\u001b[39;00m CompanyToCompanyRelationship(\n\u001b[0;32m 86\u001b[0m \u001b[39m*\u001b[39m\u001b[39m*\u001b[39m{ \u001b[39m# type: ignore\u001b[39;00m\n\u001b[0;32m 87\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mname\u001b[39m\u001b[39m\"\u001b[39m: remove_traling_and_leading_quotes(\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 110\u001b[0m }\n\u001b[0;32m 111\u001b[0m )\n", + "\u001b[1;31mKeyError\u001b[0m: 'Beteiligter'" + ] + } + ], + "source": [ + "import json\n", + "from transform import map_unternehmensregister_json\n", + "\n", + "with open('../tmp/json/GEAFarmTechnologiesGmbH.json', \"r\") as file:\n", + " content = json.load(file)\n", + " company_data = map_unternehmensregister_json(content)\n", + " print(company_data)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "ename": "AssertionError", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mAssertionError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32mc:\\Users\\trist\\Documents\\Code\\M.Sc\\aki_prj23_transparenzregister\\tmp\\transformation.ipynb Cell 1\u001b[0m line \u001b[0;36m9\n\u001b[0;32m 7\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39m(\u001b[39m'\u001b[39m\u001b[39m../tmp/tests/GEAFarmTechnologiesGmbH.json\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mr\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39mas\u001b[39;00m file:\n\u001b[0;32m 8\u001b[0m expected_result \u001b[39m=\u001b[39m json\u001b[39m.\u001b[39mload(file)\n\u001b[1;32m----> 9\u001b[0m \u001b[39massert\u001b[39;00m result \u001b[39m==\u001b[39m expected_result\n", + "\u001b[1;31mAssertionError\u001b[0m: " + ] + } + ], + "source": [ + "import json\n", + "\n", + "result = None\n", + "expected_result = None\n", + "with open('../tmp/transformed/GEAFarmTechnologiesGmbH.json', 'r') as file_a:\n", + " result = json.load(file_a)\n", + "with open('../tmp/tests/GEAFarmTechnologiesGmbH.json', \"r\") as file:\n", + " expected_result = json.load(file)\n", + " assert result == expected_result" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "aki-prj23-transparenzregister-jVJfu35g-py3.11", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}