From 9d7bb07989ffbf837a81121eb38cfa14eda0b4a2 Mon Sep 17 00:00:00 2001
From: TrisNol <tristan.nolde@yahoo.de>
Date: Sun, 29 Oct 2023 14:46:06 +0100
Subject: [PATCH] checkpoint: Adapt data transformation to new structure

---
 .gitignore                                    |   4 +
 .../apps/find_missing_companies.py            |   7 +-
 tmp/transform.py                              | 645 ++++++++++++++++++
 tmp/transformation.ipynb                      |  90 +++
 4 files changed, 743 insertions(+), 3 deletions(-)
 create mode 100644 tmp/transform.py
 create mode 100644 tmp/transformation.ipynb

diff --git a/.gitignore b/.gitignore
index 38bc337..4e8f59e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,7 @@
+# Data blobs
+**/*.xml
+**/*.json
+
 # LaTeX temp files
 **/*.aux
 **/*-blx.bib
diff --git a/src/aki_prj23_transparenzregister/apps/find_missing_companies.py b/src/aki_prj23_transparenzregister/apps/find_missing_companies.py
index 513b256..d4cf188 100644
--- a/src/aki_prj23_transparenzregister/apps/find_missing_companies.py
+++ b/src/aki_prj23_transparenzregister/apps/find_missing_companies.py
@@ -43,13 +43,14 @@ if __name__ == "__main__":
     configer_logger(namespace=parsed)
     config = parsed.config
     session = connector.get_session(get_config_provider(config))
-    missing_companies = session.query(entities.MissingCompany).all()
+    # missing_companies = session.query(entities.MissingCompany).all()
+    missing_companies = ["GEA Farm Technologies"]
 
     counter = 0
     # Scrape data from unternehmensregister
     for company in missing_companies:
-        print(company.name)
-        extract.scrape(company.name, ["tmp", "xml"])
+        print(company)
+        extract.scrape(company, ["tmp", "xml"])
         counter = counter + 1
         if counter == 5:
             break
diff --git a/tmp/transform.py b/tmp/transform.py
new file mode 100644
index 0000000..b876d41
--- /dev/null
+++ b/tmp/transform.py
@@ -0,0 +1,645 @@
+"""Transform raw Unternehmensregister export (*.xml) to processed .json files for loading."""
+import dataclasses
+import glob
+import json
+import os
+import re
+import sys
+
+import xmltodict
+from tqdm import tqdm
+
+from aki_prj23_transparenzregister.models.company import (
+    Capital,
+    CapitalTypeEnum,
+    Company,
+    CompanyID,
+    CompanyRelationship,
+    CompanyRelationshipEnum,
+    CompanyToCompanyRelationship,
+    CompanyTypeEnum,
+    CurrencyEnum,
+    DistrictCourt,
+    Location,
+    PersonName,
+    PersonToCompanyRelationship,
+    RelationshipRoleEnum,
+)
+from aki_prj23_transparenzregister.utils.string_tools import (
+    remove_traling_and_leading_quotes,
+    transform_date_to_iso,
+)
+
+
+def transform_xml_to_json(source_dir: str, target_dir: str) -> None:
+    """Convert all xml files in a directory to json files.
+
+    Args:
+        source_dir (str): Directory hosting the xml files
+        target_dir (str): Target directory to move json files to
+    """
+    if not os.path.exists(target_dir):
+        os.makedirs(target_dir)
+    for source_path in [
+        os.path.normpath(i) for i in glob.glob(source_dir + "**/*.xml", recursive=True)
+    ]:
+        target_path = os.path.join(
+            target_dir, source_path.split(os.sep)[-1].replace(".xml", ".json")
+        )
+
+        with open(source_path, encoding="utf-8") as source_file:
+            # deepcode ignore HandleUnicode: Weird XML format no other solution
+            data = xmltodict.parse(source_file.read().encode())
+            with open(target_path, "w", encoding="utf-8") as json_file:
+                json_file.write(json.dumps(data))
+
+
+def parse_date_of_birth(data: dict) -> str | None:
+    """Retreives the date of birth from a stakeholder entry if possible.
+
+    Args:
+        data (dict): Stakeholder data
+
+    Returns:
+        str | None: date of birth or None if not found
+    """
+    if "tns:geburt" in (base := data["tns:beteiligter"]["tns:auswahl_beteililgter"]["tns:natuerlichePerson"]):
+        base = base["tns:geburt"]["tns:geburtsdatum"]
+        if isinstance(base, str):
+            return base
+    return None
+
+# def map_role_id_to_enum(role_id: str) -> RelationshipRoleEnum:
+
+
+def parse_stakeholder(data: dict) -> CompanyRelationship | None:
+    """Extract the company stakeholder/relation from a single "Beteiligung".
+
+    Args:
+        data (dict): Data export
+
+    Returns:
+        CompanyRelationship | None: Relationship if it could be processed
+    """
+    if "tns:natuerlichePerson" in data["tns:beteiligter"]["tns:auswahl_beteiligter"]:
+        # It's a Company serving as a "Kommanditist" or similar
+        # if data["Beteiligter"]["Natuerliche_Person"]["Voller_Name"]["Vorname"] is None:
+        #     return CompanyToCompanyRelationship(
+        #         **{  # type: ignore
+        #             "name": remove_traling_and_leading_quotes(
+        #                 data["Beteiligter"]["Natuerliche_Person"]["Voller_Name"][
+        #                     "Nachname"
+        #                 ]
+        #             ),
+        #             "location": Location(
+        #                 **{
+        #                     "city": data["Beteiligter"]["Natuerliche_Person"][
+        #                         "Anschrift"
+        #                     ][-1]["Ort"]
+        #                     if isinstance(
+        #                         data["Beteiligter"]["Natuerliche_Person"]["Anschrift"],
+        #                         list,
+        #                     )
+        #                     else data["Beteiligter"]["Natuerliche_Person"]["Anschrift"][
+        #                         "Ort"
+        #                     ]
+        #                 }
+        #             ),
+        #             "role": RelationshipRoleEnum(
+        #                 data["Rolle"]["Rollenbezeichnung"]["content"]
+        #             ),
+        #             "type": CompanyRelationshipEnum.COMPANY,
+        #         }
+        #     )
+        return PersonToCompanyRelationship(
+            **{  # type: ignore
+                "name": PersonName(
+                    **{
+                        "firstname": data["tns:beteiligter"]["tns:auswahl_beteiligter"]["tns:natuerlichePerson"][
+                            "tns:vollerName"
+                        ]["tns:vorname"],
+                        "lastname": data["tns:beteiligter"]["tns:auswahl_beteiligter"]["tns:natuerlichePerson"][
+                            "tns:vollerName"
+                        ]["tns:nachname"],
+                    }
+                ),
+                "date_of_birth": parse_date_of_birth(data),
+                "location": Location(
+                    **{
+                        "city": data["tns:beteiligter"]["tns:auswahl_beteiligter"]["tns:natuerlichePerson"]["tns:anschrift"][
+                            -1
+                        ]["tns:ort"]
+                        if isinstance(
+                            data["tns:beteiligter"]["tns:auswahl_beteiligter"]["tns:natuerlichePerson"]["tns:anschrift"], list
+                        )
+                        else data["tns:beteiligter"]["tns:auswahl_beteiligter"]["tns:natuerlichePerson"]["tns:anschrift"][
+                            "tns:ort"
+                        ]
+                    }
+                ),
+                # TODO get role via ID
+                "role": RelationshipRoleEnum(
+                    data["Rolle"]["Rollenbezeichnung"]["content"]
+                ),
+                "type": CompanyRelationshipEnum.PERSON,
+            }
+        )
+    if "Organisation" in data["Beteiligter"]:
+        return CompanyToCompanyRelationship(
+            **{  # type: ignore
+                "role": RelationshipRoleEnum(
+                    data["Rolle"]["Rollenbezeichnung"]["content"]
+                ),
+                "name": remove_traling_and_leading_quotes(
+                    data["Beteiligter"]["Organisation"]["Bezeichnung"][
+                        "Bezeichnung_Aktuell"
+                    ]
+                ),
+                "location": Location(
+                    **{
+                        "city": data["Beteiligter"]["Organisation"]["Anschrift"]["Ort"],
+                        "street": data["Beteiligter"]["Organisation"]["Anschrift"][
+                            "Strasse"
+                        ]
+                        if "Strasse" in data["Beteiligter"]["Organisation"]["Anschrift"]
+                        else None,
+                        "house_number": data["Beteiligter"]["Organisation"][
+                            "Anschrift"
+                        ]["Hausnummer"]
+                        if "Hausnummer"
+                        in data["Beteiligter"]["Organisation"]["Anschrift"]
+                        else None,
+                        "zip_code": data["Beteiligter"]["Organisation"]["Anschrift"][
+                            "Postleitzahl"
+                        ]
+                        if "Postleitzahl"
+                        in data["Beteiligter"]["Organisation"]["Anschrift"]
+                        else None,
+                    }
+                ),
+                "type": CompanyRelationshipEnum.COMPANY,
+            }
+        )
+    return None
+
+
+def normalize_street(street: str) -> str:
+    """Normalize street names by extending them to `Straße` or `straße`.
+
+    Args:
+        street (str): Name of street
+
+    Returns:
+        str: Normalized street name
+    """
+    if street is None:
+        return None
+    regex = r"(Str\.|Strasse)"
+    street = re.sub(regex, "Straße", street)
+    regex = r"(str\.|strasse)"
+    street = re.sub(regex, "straße", street)
+    return street.strip()
+
+
+def loc_from_beteiligung(data: dict) -> Location:
+    """Extract the company location from the first relationship in the export.
+
+    Args:
+        data (dict): Data export
+
+    Returns:
+        Location: location
+    """
+    base_path = [
+        "tns:grunddaten",
+        "tns:verfahrensdaten",
+        "tns:beteiligung",
+        0,
+        "tns:beteiligter",
+        "tns:auswahl_beteiligter",
+        "tns:organisation",
+        "tns:anschrift"
+    ]
+    base = traversal(data, base_path)
+
+    house_number = None
+    street = None
+    if "tns:strasse" in base:
+        regex = r".(\d+)$"
+        hits = re.findall(regex, base["tns:strasse"])
+        if len(hits) == 1:
+            house_number = hits[0]
+            street = base["tns:strasse"][: (-1 * len(house_number))]
+            if "tns:hausnummer" in base:
+                house_number = house_number + base["tns:hausnummer"]
+        else:
+            if "tns:hausnummer" in base:
+                house_number = base["tns:hausnummer"]
+            street = base["tns:strasse"]
+    return Location(
+        **{
+            "city": base["tns:ort"],
+            "zip_code": base["tns:postleitzahl"],
+            "street": normalize_street(street),  # type: ignore
+            "house_number": house_number,
+        }
+    )
+
+
+def name_from_beteiligung(data: dict) -> str:
+    """Extract the Company name from an Unternehmensregister export by using the first relationship found.
+
+    Args:
+        data (dict): Data export
+
+    Returns:
+        str: Company name
+    """
+    path = [
+        "tns:grunddaten",
+        "tns:verfahrensdaten",
+        "tns:beteiligung",
+        0,
+        "tns:beteiligter",
+        "tns:auswahl_beteiligter",
+        "tns:organisation",
+        "tns:bezeichnung",
+        "tns:bezeichnung.aktuell"
+    ]
+    name = traversal(data, path)
+    return remove_traling_and_leading_quotes(name)
+
+
+def map_rechtsform(company_name: str, data: dict) -> CompanyTypeEnum | None:
+    """Extracts the company type from a given Unternehmensregister export.
+
+    Args:
+        company_name (str): Name of the company as a fallback solution
+        data (dict): Data export
+
+    Returns:
+        CompanyTypeEnum | None: Company type if found
+    """
+    try:
+        path = [
+            "tns:fachdatenRegister",
+            "tns:basisdatenRegister",
+            "tns:rechtstraeger",
+            "tns:angabenZurRechtsform",
+            "tns:rechtsform",
+            "code"
+        ]
+        return CompanyTypeEnum(
+            traversal(data, path)
+        )
+    except Exception:
+        if (
+            company_name.endswith("GmbH")
+            or company_name.endswith("UG")
+            or company_name.endswith("UG (haftungsbeschränkt)")
+        ):
+            return CompanyTypeEnum("Gesellschaft mit beschränkter Haftung")
+        if company_name.endswith("SE"):
+            return CompanyTypeEnum("Europäische Aktiengesellschaft (SE)")
+        if company_name.endswith("KG"):
+            return CompanyTypeEnum("Kommanditgesellschaft")
+        return None
+
+
+def map_capital(data: dict, company_type: CompanyTypeEnum) -> Capital | None:
+    """Extracts the company capital from the given Unternehmensregister export.
+
+    Args:
+        data (dict): Data export
+        company_type (CompanyTypeEnum): Type of company (e.g., 'Gesellschaft mit beschränkter Haftung')
+
+    Returns:
+        Capital | None: Company Capital if found
+    """
+    # Early return
+    if "tns:auswahl_zusatzangaben" not in data["tns:fachdatenRegister"]:
+        return None
+    capital: dict = {"Zahl": 0.0, "Waehrung": ""}
+    if company_type == CompanyTypeEnum.KG:
+        capital_type = "Hafteinlage"
+        base = data["tns:fachdatenRegister"]["tns:auswahl_zusatzangaben"][
+            "tns:personengesellschaft"
+        ]["tns:zusatzKG"]["tns:datenKommanditist"]
+        if isinstance(base, list):
+            for entry in base:
+                # TODO link to persons using Ref_Rollennummer then extract ["Hafteinlage"] as below
+                capital["Zahl"] = capital["Zahl"] + float(entry["Hafteinlage"]["Zahl"])
+                capital["Waehrung"] = entry["Hafteinlage"]["Waehrung"]
+        elif isinstance(base, dict):
+            capital = base["Hafteinlage"]
+    elif company_type in [
+        CompanyTypeEnum.GMBH,
+        CompanyTypeEnum.SE,
+        CompanyTypeEnum.AG,
+        CompanyTypeEnum.KGaA,
+        CompanyTypeEnum.AUSLAENDISCHE_RECHTSFORM,
+        CompanyTypeEnum.OHG,
+    ]:
+        if (
+            "tns:kapitalgesellschaft"
+            not in data["tns:fachdatenRegister"]["tns:auswahl_zusatzangaben"]
+        ):
+            base = data["tns:fachdatenRegister"]["tns:auswahl_zusatzangaben"][
+                "tns:personengesellschaft"
+            ]
+        else:
+            base = data["tns:fachdatenRegister"]["tns:auswahl_zusatzangaben"][
+                "tns:kapitalgesellschaft"
+            ]
+        if "tns:zusatzGmbH" in base:
+            capital_type = "Stammkapital"
+            capital = base["tns:zusatzGmbH"]["tns:stammkapital"]
+        elif "tns:zusatzAktiengesellschaft" in base:
+            capital_type = "Grundkapital"
+            capital = base["tns:zusatzAktiengesellschaft"]["tns:grundkapital"]["tns:zahl"]
+    elif company_type in [
+        CompanyTypeEnum.EINZELKAUFMANN,
+        CompanyTypeEnum.EG,
+        CompanyTypeEnum.PARTNERSCHAFT,
+        CompanyTypeEnum.PARTNERGESELLSCHAFT,
+        CompanyTypeEnum.PARTNERSCHAFTSGESELLSCHAFT,
+        None,
+    ]:
+        return None
+    # Catch entries having the dict but with null values
+    if not all(capital.values()):
+        return None
+    return Capital(
+        **{  # type: ignore
+            "value": float(capital["tns:zahl"]),
+            "currency": CurrencyEnum(capital["tns:waehrung"]["code"]),
+            "type": CapitalTypeEnum(capital_type),
+        }
+    )
+
+
+def map_business_purpose(data: dict) -> str | None:
+    """Extracts the "Geschäftszweck" from a given Unternehmensregister export.
+
+    Args:
+        data (dict): Data export
+
+    Returns:
+        str | None: Business purpose if found
+    """
+    try:
+        path = [
+            "tns:fachdatenRegister",
+            "tns:basisdatenRegister",
+            "tns:gegenstand"
+        ]
+        return traversal(data, path)
+    except KeyError:
+        return None
+
+
+def extract_date_from_string(value: str) -> str | None:
+    """Extract a date in ISO format from the given string if possible.
+
+    Args:
+        value (str): Input text
+
+    Returns:
+        str | None: Date in ISO format, None if not found
+    """
+    date_regex = [  # type: ignore
+        {"regex": r"\d{1,2}\.\d{1,2}\.\d{4}", "mapper": transform_date_to_iso},
+        {"regex": r"\d{4}-\d{1,2}-\d{1,2}", "mapper": None},
+    ]
+    results = []
+    for regex in date_regex:
+        result = re.findall(regex["regex"], value)  # type: ignore
+        if len(result) == 1:
+            relevant_data = result[0]
+            if regex["mapper"] is not None:  # type: ignore
+                results.append(regex["mapper"](relevant_data))  # type: ignore
+            else:
+                results.append(relevant_data)
+    if len(results) != 1:
+        return None
+    return results[0]
+
+
+def map_founding_date(data: dict) -> str | None:
+    """Extracts the founding date from a given Unternehmensregister export.
+
+    Args:
+        data (dict): Data export
+
+    Returns:
+        str | None: Founding date if found
+    """
+    text = str(data)
+    entry_date = re.findall(
+        r".Tag der ersten Eintragung:(\\n| )?(\d{1,2}\.\d{1,2}\.\d{2,4})", text
+    )
+    if len(entry_date) == 1:
+        return transform_date_to_iso(entry_date[0][1])
+
+    entry_date = re.findall(
+        r".Gesellschaftsvertrag vom (\d{1,2}\.\d{1,2}\.\d{2,4})", text
+    )
+    if len(entry_date) == 1:
+        return transform_date_to_iso(entry_date[0])
+    if (
+        "tns:satzungsdatum"
+        in data["tns:fachdatenRegister"]["tns:basisdatenRegister"]
+    ):
+        path = [
+            "tns:fachdatenRegister",
+            "tns:basisdatenRegister",
+            "tns:satzungsdatum",
+            "tns:aktuellesSatzungsdatum"
+        ]
+        return traversal(data, path)
+    # No reliable answer
+    return None
+
+def traversal(data: dict, path: list[str | int]) -> any:
+    current = data
+    for key in path:
+        try:
+            current = current[key]
+        except:
+            raise KeyError(f"Key {key} not found")
+    return current
+
+
+def map_hr_number(data: dict) -> str:
+    hr_prefix = data["tns:grunddaten"]["tns:verfahrensdaten"]["tns:instanzdaten"][
+        "tns:aktenzeichen"
+    ]["tns:auswahl_aktenzeichen"]["tns:aktenzeichen.strukturiert"]["tns:register"][
+        "code"
+    ]
+    hr_number = data["tns:grunddaten"]["tns:verfahrensdaten"]["tns:instanzdaten"][
+        "tns:aktenzeichen"
+    ]["tns:auswahl_aktenzeichen"]["tns:aktenzeichen.strukturiert"]["tns:laufendeNummer"]
+    hr_full = f"{hr_prefix} {hr_number}"
+    return hr_full
+
+def map_district_court(data: dict) -> DistrictCourt:
+    base_path = [
+        "tns:grunddaten",
+        "tns:verfahrensdaten",
+        "tns:beteiligung",
+        1,
+        "tns:beteiligter",
+        "tns:auswahl_beteiligter",
+        "tns:organisation"
+    ]
+    path = [*base_path,
+        "tns:bezeichnung",
+        "tns:bezeichnung.aktuell"
+    ]
+    name = traversal(data, path)
+    path = [*base_path,
+        "tns:sitz",
+        "tns:ort"
+    ]
+    city = traversal(data, path)
+    return DistrictCourt(name=name, city=city)
+
+
+def map_company_id(data: dict) -> CompanyID:
+    """Retrieve Company ID from export.
+
+    Args:
+        data (dict): Data export
+
+    Returns:
+        CompanyID: ID of the company
+    """
+    return CompanyID(
+        **{
+            "hr_number": map_hr_number(data),
+            "district_court": map_district_court(data)
+        }
+    )
+
+
+def map_last_update(data: dict) -> str:
+    """Extract last update date from export.
+
+    Args:
+        data (dict): Unternehmensregister export
+
+    Returns:
+        str: Last update date
+    """
+    path = [
+        "tns:fachdatenRegister",
+        "tns:auszug",
+        "tns:letzteEintragung"
+    ]
+    return traversal(data, path)
+
+
+def map_co_relation(data: dict) -> dict:
+    """Search for and map the c/o relation from location.street if possible.
+
+    Args:
+        data (dict): Company dict
+
+    Returns:
+        dict: Modified Company dict
+    """
+    street = data["location"].street
+    if street is None:
+        return data
+    parts = street.split(",")
+    co_company = None
+    co_company_index = None
+    for index, part in enumerate(parts):
+        trimmed_part = part.strip()
+        result = re.findall(r"^c\/o(.*)$", trimmed_part)
+        if len(result) == 1:
+            co_company = result[0].strip()
+            co_company_index = index
+    if co_company_index is not None:
+        del parts[co_company_index]
+        street = "".join(parts).strip()
+        data["location"].street = street
+
+        if co_company is not None and co_company != "":
+            relation = CompanyToCompanyRelationship(
+                RelationshipRoleEnum.CARE_OF,  # type: ignore
+                Location(
+                    data["location"].city,
+                    street,
+                    data["location"].house_number,
+                    data["location"].zip_code,
+                ),
+                CompanyRelationshipEnum.COMPANY,  # type: ignore
+                co_company,
+            )
+            data["relationships"].append(relation)
+    return data
+
+
+def map_unternehmensregister_json(data: dict) -> Company:
+    """Processes the Unternehmensregister structured export to a Company by using several helper methods.
+
+    Args:
+        data (dict): Data export
+
+    Returns:
+        Company: Transformed data
+    """
+    root_key = list(data.keys())[0]
+    data = data[root_key]
+    result: dict = {"relationships": []}
+
+    result["id"] = map_company_id(data)
+    result["name"] = name_from_beteiligung(data)
+
+    result["location"] = loc_from_beteiligung(data)
+    result["last_update"] = map_last_update(data)
+
+    result["company_type"] = map_rechtsform(result["name"], data)
+    result["capital"] = map_capital(data, result["company_type"])
+    result["business_purpose"] = map_business_purpose(data)
+    result["founding_date"] = map_founding_date(data)
+
+    # TODO adapt...
+    # for i in range(
+    #     2, len(data["tns:grunddaten"]["tns:verfahrensdaten"]["tns:beteiligung"])
+    # ):
+    #     people = parse_stakeholder(
+    #         data["tns:grunddaten"]["tns:verfahrensdaten"]["tns:beteiligung"][i]
+    #     )
+    #     result["relationships"].append(people)
+    result = map_co_relation(result)
+    return Company(**result)
+
+
+if __name__ == "__main__":
+    from loguru import logger
+
+    base_path = "./Jupyter/API-tests/Unternehmensregister/data/Unternehmensregister"
+    for file in tqdm(glob.glob1(f"{base_path}/export", "*.json")):
+        path = os.path.join(f"{base_path}/export", file)
+        with open(path, encoding="utf-8") as file_object:
+            try:
+                company: Company = map_unternehmensregister_json(
+                    json.loads(file_object.read())
+                )
+
+                name = "".join(e for e in company.name if e.isalnum())[:50]
+
+                with open(
+                    f"{base_path}/transformed/{name}.json",
+                    "w+",
+                    encoding="utf-8",
+                ) as export_file:
+                    json.dump(
+                        dataclasses.asdict(company), export_file, ensure_ascii=False
+                    )
+            except Exception as e:
+                logger.error(e)
+                logger.error(f"Error in processing {path}")
+                sys.exit(1)
diff --git a/tmp/transformation.ipynb b/tmp/transformation.ipynb
new file mode 100644
index 0000000..6fd84ae
--- /dev/null
+++ b/tmp/transformation.ipynb
@@ -0,0 +1,90 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "KeyError",
+     "evalue": "'Beteiligter'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
+      "\u001b[1;32mc:\\Users\\trist\\Documents\\Code\\M.Sc\\aki_prj23_transparenzregister\\tmp\\transformation.ipynb Cell 1\u001b[0m line \u001b[0;36m6\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/Users/trist/Documents/Code/M.Sc/aki_prj23_transparenzregister/tmp/transformation.ipynb#W1sZmlsZQ%3D%3D?line=3'>4</a>\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39m(\u001b[39m'\u001b[39m\u001b[39m../tmp/json/GEAFarmTechnologiesGmbH.json\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mr\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39mas\u001b[39;00m file:\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/Users/trist/Documents/Code/M.Sc/aki_prj23_transparenzregister/tmp/transformation.ipynb#W1sZmlsZQ%3D%3D?line=4'>5</a>\u001b[0m     content \u001b[39m=\u001b[39m json\u001b[39m.\u001b[39mload(file)\n\u001b[1;32m----> <a href='vscode-notebook-cell:/c%3A/Users/trist/Documents/Code/M.Sc/aki_prj23_transparenzregister/tmp/transformation.ipynb#W1sZmlsZQ%3D%3D?line=5'>6</a>\u001b[0m     company_data \u001b[39m=\u001b[39m map_unternehmensregister_json(content)\n",
+      "File \u001b[1;32mc:\\Users\\trist\\Documents\\Code\\M.Sc\\aki_prj23_transparenzregister\\tmp\\transform.py:609\u001b[0m, in \u001b[0;36mmap_unternehmensregister_json\u001b[1;34m(data)\u001b[0m\n\u001b[0;32m    605\u001b[0m \u001b[39m# TODO adapt...\u001b[39;00m\n\u001b[0;32m    606\u001b[0m \u001b[39mfor\u001b[39;00m i \u001b[39min\u001b[39;00m \u001b[39mrange\u001b[39m(\n\u001b[0;32m    607\u001b[0m     \u001b[39m2\u001b[39m, \u001b[39mlen\u001b[39m(data[\u001b[39m\"\u001b[39m\u001b[39mtns:grunddaten\u001b[39m\u001b[39m\"\u001b[39m][\u001b[39m\"\u001b[39m\u001b[39mtns:verfahrensdaten\u001b[39m\u001b[39m\"\u001b[39m][\u001b[39m\"\u001b[39m\u001b[39mtns:beteiligung\u001b[39m\u001b[39m\"\u001b[39m])\n\u001b[0;32m    608\u001b[0m ):\n\u001b[1;32m--> 609\u001b[0m     people \u001b[39m=\u001b[39m parse_stakeholder(\n\u001b[0;32m    610\u001b[0m         data[\u001b[39m\"\u001b[39;49m\u001b[39mtns:grunddaten\u001b[39;49m\u001b[39m\"\u001b[39;49m][\u001b[39m\"\u001b[39;49m\u001b[39mtns:verfahrensdaten\u001b[39;49m\u001b[39m\"\u001b[39;49m][\u001b[39m\"\u001b[39;49m\u001b[39mtns:beteiligung\u001b[39;49m\u001b[39m\"\u001b[39;49m][i]\n\u001b[0;32m    611\u001b[0m     )\n\u001b[0;32m    612\u001b[0m     result[\u001b[39m\"\u001b[39m\u001b[39mrelationships\u001b[39m\u001b[39m\"\u001b[39m]\u001b[39m.\u001b[39mappend(people)\n\u001b[0;32m    613\u001b[0m result \u001b[39m=\u001b[39m map_co_relation(result)\n",
+      "File \u001b[1;32mc:\\Users\\trist\\Documents\\Code\\M.Sc\\aki_prj23_transparenzregister\\tmp\\transform.py:82\u001b[0m, in \u001b[0;36mparse_stakeholder\u001b[1;34m(data)\u001b[0m\n\u001b[0;32m     73\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mparse_stakeholder\u001b[39m(data: \u001b[39mdict\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m CompanyRelationship \u001b[39m|\u001b[39m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m     74\u001b[0m \u001b[39m    \u001b[39m\u001b[39m\"\"\"Extract the company stakeholder/relation from a single \"Beteiligung\".\u001b[39;00m\n\u001b[0;32m     75\u001b[0m \n\u001b[0;32m     76\u001b[0m \u001b[39m    Args:\u001b[39;00m\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m     80\u001b[0m \u001b[39m        CompanyRelationship | None: Relationship if it could be processed\u001b[39;00m\n\u001b[0;32m     81\u001b[0m \u001b[39m    \"\"\"\u001b[39;00m\n\u001b[1;32m---> 82\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39mNatuerliche_Person\u001b[39m\u001b[39m\"\u001b[39m \u001b[39min\u001b[39;00m data[\u001b[39m\"\u001b[39;49m\u001b[39mBeteiligter\u001b[39;49m\u001b[39m\"\u001b[39;49m]:\n\u001b[0;32m     83\u001b[0m         \u001b[39m# It's a Company serving as a \"Kommanditist\" or similar\u001b[39;00m\n\u001b[0;32m     84\u001b[0m         \u001b[39mif\u001b[39;00m data[\u001b[39m\"\u001b[39m\u001b[39mBeteiligter\u001b[39m\u001b[39m\"\u001b[39m][\u001b[39m\"\u001b[39m\u001b[39mNatuerliche_Person\u001b[39m\u001b[39m\"\u001b[39m][\u001b[39m\"\u001b[39m\u001b[39mVoller_Name\u001b[39m\u001b[39m\"\u001b[39m][\u001b[39m\"\u001b[39m\u001b[39mVorname\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m     85\u001b[0m             \u001b[39mreturn\u001b[39;00m CompanyToCompanyRelationship(\n\u001b[0;32m     86\u001b[0m                 \u001b[39m*\u001b[39m\u001b[39m*\u001b[39m{  \u001b[39m# type: ignore\u001b[39;00m\n\u001b[0;32m     87\u001b[0m                     \u001b[39m\"\u001b[39m\u001b[39mname\u001b[39m\u001b[39m\"\u001b[39m: remove_traling_and_leading_quotes(\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    110\u001b[0m                 }\n\u001b[0;32m    111\u001b[0m             )\n",
+      "\u001b[1;31mKeyError\u001b[0m: 'Beteiligter'"
+     ]
+    }
+   ],
+   "source": [
+    "import json\n",
+    "from transform import map_unternehmensregister_json\n",
+    "\n",
+    "with open('../tmp/json/GEAFarmTechnologiesGmbH.json', \"r\") as file:\n",
+    "    content = json.load(file)\n",
+    "    company_data = map_unternehmensregister_json(content)\n",
+    "    print(company_data)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Test"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "AssertionError",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mAssertionError\u001b[0m                            Traceback (most recent call last)",
+      "\u001b[1;32mc:\\Users\\trist\\Documents\\Code\\M.Sc\\aki_prj23_transparenzregister\\tmp\\transformation.ipynb Cell 1\u001b[0m line \u001b[0;36m9\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/Users/trist/Documents/Code/M.Sc/aki_prj23_transparenzregister/tmp/transformation.ipynb#W0sZmlsZQ%3D%3D?line=6'>7</a>\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39m(\u001b[39m'\u001b[39m\u001b[39m../tmp/tests/GEAFarmTechnologiesGmbH.json\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mr\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39mas\u001b[39;00m file:\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/Users/trist/Documents/Code/M.Sc/aki_prj23_transparenzregister/tmp/transformation.ipynb#W0sZmlsZQ%3D%3D?line=7'>8</a>\u001b[0m     expected_result \u001b[39m=\u001b[39m json\u001b[39m.\u001b[39mload(file)\n\u001b[1;32m----> <a href='vscode-notebook-cell:/c%3A/Users/trist/Documents/Code/M.Sc/aki_prj23_transparenzregister/tmp/transformation.ipynb#W0sZmlsZQ%3D%3D?line=8'>9</a>\u001b[0m     \u001b[39massert\u001b[39;00m result \u001b[39m==\u001b[39m expected_result\n",
+      "\u001b[1;31mAssertionError\u001b[0m: "
+     ]
+    }
+   ],
+   "source": [
+    "import json\n",
+    "\n",
+    "result = None\n",
+    "expected_result = None\n",
+    "with open('../tmp/transformed/GEAFarmTechnologiesGmbH.json', 'r') as file_a:\n",
+    "    result = json.load(file_a)\n",
+    "with open('../tmp/tests/GEAFarmTechnologiesGmbH.json', \"r\") as file:\n",
+    "    expected_result = json.load(file)\n",
+    "    assert result == expected_result"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "aki-prj23-transparenzregister-jVJfu35g-py3.11",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}