From 2d9e3f19f9281770cda977db14a0de67bef56f3f Mon Sep 17 00:00:00 2001 From: TrisNol Date: Sun, 29 Oct 2023 20:11:09 +0100 Subject: [PATCH] checkpoint: First iteration of fixed mapping --- tmp/transform.py | 29 ++++++++++++++++++----------- tmp/transformation.ipynb | 25 +++++++++++-------------- 2 files changed, 29 insertions(+), 25 deletions(-) diff --git a/tmp/transform.py b/tmp/transform.py index b876d41..9178805 100644 --- a/tmp/transform.py +++ b/tmp/transform.py @@ -63,13 +63,20 @@ def parse_date_of_birth(data: dict) -> str | None: Returns: str | None: date of birth or None if not found """ - if "tns:geburt" in (base := data["tns:beteiligter"]["tns:auswahl_beteililgter"]["tns:natuerlichePerson"]): + if "tns:geburt" in (base := data["tns:beteiligter"]["tns:auswahl_beteiligter"]["tns:natuerlichePerson"]): base = base["tns:geburt"]["tns:geburtsdatum"] if isinstance(base, str): return base return None -# def map_role_id_to_enum(role_id: str) -> RelationshipRoleEnum: +def map_role_id_to_enum(role_id: str) -> RelationshipRoleEnum: + match role_id: + case "086": + return RelationshipRoleEnum.GESCHAEFTSFUEHRER + case "285": + return RelationshipRoleEnum.PROKURIST + case _: + raise KeyError(f'Uknown role_id: {role_id}') def parse_stakeholder(data: dict) -> CompanyRelationship | None: @@ -138,8 +145,8 @@ def parse_stakeholder(data: dict) -> CompanyRelationship | None: } ), # TODO get role via ID - "role": RelationshipRoleEnum( - data["Rolle"]["Rollenbezeichnung"]["content"] + "role": map_role_id_to_enum( + data["tns:rolle"]["tns:rollenbezeichnung"]["code"] ), "type": CompanyRelationshipEnum.PERSON, } @@ -606,13 +613,13 @@ def map_unternehmensregister_json(data: dict) -> Company: result["founding_date"] = map_founding_date(data) # TODO adapt... - # for i in range( - # 2, len(data["tns:grunddaten"]["tns:verfahrensdaten"]["tns:beteiligung"]) - # ): - # people = parse_stakeholder( - # data["tns:grunddaten"]["tns:verfahrensdaten"]["tns:beteiligung"][i] - # ) - # result["relationships"].append(people) + for i in range( + 2, len(data["tns:grunddaten"]["tns:verfahrensdaten"]["tns:beteiligung"]) + ): + people = parse_stakeholder( + data["tns:grunddaten"]["tns:verfahrensdaten"]["tns:beteiligung"][i] + ) + result["relationships"].append(people) result = map_co_relation(result) return Company(**result) diff --git a/tmp/transformation.ipynb b/tmp/transformation.ipynb index 6fd84ae..915b440 100644 --- a/tmp/transformation.ipynb +++ b/tmp/transformation.ipynb @@ -2,31 +2,28 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 8, "metadata": {}, "outputs": [ { - "ename": "KeyError", - "evalue": "'Beteiligter'", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32mc:\\Users\\trist\\Documents\\Code\\M.Sc\\aki_prj23_transparenzregister\\tmp\\transformation.ipynb Cell 1\u001b[0m line \u001b[0;36m6\n\u001b[0;32m 4\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39m(\u001b[39m'\u001b[39m\u001b[39m../tmp/json/GEAFarmTechnologiesGmbH.json\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mr\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39mas\u001b[39;00m file:\n\u001b[0;32m 5\u001b[0m content \u001b[39m=\u001b[39m json\u001b[39m.\u001b[39mload(file)\n\u001b[1;32m----> 6\u001b[0m company_data \u001b[39m=\u001b[39m map_unternehmensregister_json(content)\n", - "File \u001b[1;32mc:\\Users\\trist\\Documents\\Code\\M.Sc\\aki_prj23_transparenzregister\\tmp\\transform.py:609\u001b[0m, in \u001b[0;36mmap_unternehmensregister_json\u001b[1;34m(data)\u001b[0m\n\u001b[0;32m 605\u001b[0m \u001b[39m# TODO adapt...\u001b[39;00m\n\u001b[0;32m 606\u001b[0m \u001b[39mfor\u001b[39;00m i \u001b[39min\u001b[39;00m \u001b[39mrange\u001b[39m(\n\u001b[0;32m 607\u001b[0m \u001b[39m2\u001b[39m, \u001b[39mlen\u001b[39m(data[\u001b[39m\"\u001b[39m\u001b[39mtns:grunddaten\u001b[39m\u001b[39m\"\u001b[39m][\u001b[39m\"\u001b[39m\u001b[39mtns:verfahrensdaten\u001b[39m\u001b[39m\"\u001b[39m][\u001b[39m\"\u001b[39m\u001b[39mtns:beteiligung\u001b[39m\u001b[39m\"\u001b[39m])\n\u001b[0;32m 608\u001b[0m ):\n\u001b[1;32m--> 609\u001b[0m people \u001b[39m=\u001b[39m parse_stakeholder(\n\u001b[0;32m 610\u001b[0m data[\u001b[39m\"\u001b[39;49m\u001b[39mtns:grunddaten\u001b[39;49m\u001b[39m\"\u001b[39;49m][\u001b[39m\"\u001b[39;49m\u001b[39mtns:verfahrensdaten\u001b[39;49m\u001b[39m\"\u001b[39;49m][\u001b[39m\"\u001b[39;49m\u001b[39mtns:beteiligung\u001b[39;49m\u001b[39m\"\u001b[39;49m][i]\n\u001b[0;32m 611\u001b[0m )\n\u001b[0;32m 612\u001b[0m result[\u001b[39m\"\u001b[39m\u001b[39mrelationships\u001b[39m\u001b[39m\"\u001b[39m]\u001b[39m.\u001b[39mappend(people)\n\u001b[0;32m 613\u001b[0m result \u001b[39m=\u001b[39m map_co_relation(result)\n", - "File \u001b[1;32mc:\\Users\\trist\\Documents\\Code\\M.Sc\\aki_prj23_transparenzregister\\tmp\\transform.py:82\u001b[0m, in \u001b[0;36mparse_stakeholder\u001b[1;34m(data)\u001b[0m\n\u001b[0;32m 73\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mparse_stakeholder\u001b[39m(data: \u001b[39mdict\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m CompanyRelationship \u001b[39m|\u001b[39m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m 74\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Extract the company stakeholder/relation from a single \"Beteiligung\".\u001b[39;00m\n\u001b[0;32m 75\u001b[0m \n\u001b[0;32m 76\u001b[0m \u001b[39m Args:\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 80\u001b[0m \u001b[39m CompanyRelationship | None: Relationship if it could be processed\u001b[39;00m\n\u001b[0;32m 81\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m---> 82\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39mNatuerliche_Person\u001b[39m\u001b[39m\"\u001b[39m \u001b[39min\u001b[39;00m data[\u001b[39m\"\u001b[39;49m\u001b[39mBeteiligter\u001b[39;49m\u001b[39m\"\u001b[39;49m]:\n\u001b[0;32m 83\u001b[0m \u001b[39m# It's a Company serving as a \"Kommanditist\" or similar\u001b[39;00m\n\u001b[0;32m 84\u001b[0m \u001b[39mif\u001b[39;00m data[\u001b[39m\"\u001b[39m\u001b[39mBeteiligter\u001b[39m\u001b[39m\"\u001b[39m][\u001b[39m\"\u001b[39m\u001b[39mNatuerliche_Person\u001b[39m\u001b[39m\"\u001b[39m][\u001b[39m\"\u001b[39m\u001b[39mVoller_Name\u001b[39m\u001b[39m\"\u001b[39m][\u001b[39m\"\u001b[39m\u001b[39mVorname\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m 85\u001b[0m \u001b[39mreturn\u001b[39;00m CompanyToCompanyRelationship(\n\u001b[0;32m 86\u001b[0m \u001b[39m*\u001b[39m\u001b[39m*\u001b[39m{ \u001b[39m# type: ignore\u001b[39;00m\n\u001b[0;32m 87\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mname\u001b[39m\u001b[39m\"\u001b[39m: remove_traling_and_leading_quotes(\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 110\u001b[0m }\n\u001b[0;32m 111\u001b[0m )\n", - "\u001b[1;31mKeyError\u001b[0m: 'Beteiligter'" + "name": "stdout", + "output_type": "stream", + "text": [ + "Company(id=CompanyID(district_court=DistrictCourt(name='Amtsgericht Hamm', city='Hamm'), hr_number='HRB 5363'), location=Location(city='Bönen', street='Siemensstraße', house_number='25-27', zip_code='59199'), name='GEA Farm Technologies GmbH', last_update='2023-10-27', relationships=[PersonToCompanyRelationship(role=, location=Location(city='Oelde', street=None, house_number=None, zip_code=None), type=, name=PersonName(firstname='Reinhard', lastname='Gebing'), date_of_birth='1964-04-26'), PersonToCompanyRelationship(role=, location=Location(city='Wetter', street=None, house_number=None, zip_code=None), type=, name=PersonName(firstname='Markus', lastname='Kreft'), date_of_birth='1966-04-03'), PersonToCompanyRelationship(role=, location=Location(city='Holzminden', street=None, house_number=None, zip_code=None), type=, name=PersonName(firstname='Kai', lastname='Luntz'), date_of_birth='1970-12-04'), PersonToCompanyRelationship(role=, location=Location(city='Rheda-Wiedenbrück', street=None, house_number=None, zip_code=None), type=, name=PersonName(firstname='Thomas', lastname='Mader'), date_of_birth='1972-05-24'), PersonToCompanyRelationship(role=, location=Location(city='Düsseldorf', street=None, house_number=None, zip_code=None), type=, name=PersonName(firstname='Peter', lastname='Lauwers'), date_of_birth='1970-03-26'), PersonToCompanyRelationship(role=, location=Location(city='Erkrath', street=None, house_number=None, zip_code=None), type=, name=PersonName(firstname='Erkul', lastname='Basaran'), date_of_birth='1977-05-06'), PersonToCompanyRelationship(role=, location=Location(city='Bochum', street=None, house_number=None, zip_code=None), type=, name=PersonName(firstname='Henrik', lastname='Böttner'), date_of_birth='1982-11-07'), PersonToCompanyRelationship(role=, location=Location(city='Oelde', street=None, house_number=None, zip_code=None), type=, name=PersonName(firstname='Ulrich', lastname='Raßenhövel'), date_of_birth='1969-04-16'), PersonToCompanyRelationship(role=, location=Location(city='Herdecke', street=None, house_number=None, zip_code=None), type=, name=PersonName(firstname='Andreas', lastname='Naroska'), date_of_birth='1967-03-23'), PersonToCompanyRelationship(role=, location=Location(city='Witten', street=None, house_number=None, zip_code=None), type=, name=PersonName(firstname='Mark', lastname='Kramps'), date_of_birth='1967-09-04'), PersonToCompanyRelationship(role=, location=Location(city='Dortmund', street=None, house_number=None, zip_code=None), type=, name=PersonName(firstname='Ralf', lastname='Barkmeyer'), date_of_birth='1974-02-28'), PersonToCompanyRelationship(role=, location=Location(city='Tönnisvorst', street=None, house_number=None, zip_code=None), type=, name=PersonName(firstname='Holger', lastname='Siegwarth'), date_of_birth='1967-05-13'), PersonToCompanyRelationship(role=, location=Location(city='Herne', street=None, house_number=None, zip_code=None), type=, name=PersonName(firstname='Oliver', lastname='Liß'), date_of_birth='1981-04-13'), PersonToCompanyRelationship(role=, location=Location(city='Göppingen', street=None, house_number=None, zip_code=None), type=, name=PersonName(firstname='Liang', lastname='Cheng'), date_of_birth='1980-12-29'), PersonToCompanyRelationship(role=, location=Location(city='Beckum', street=None, house_number=None, zip_code=None), type=, name=PersonName(firstname='Astrid', lastname='Dörner-Rodeheger'), date_of_birth='1968-12-24'), PersonToCompanyRelationship(role=, location=Location(city='Dortmund', street=None, house_number=None, zip_code=None), type=, name=PersonName(firstname='Jon', lastname='Lange'), date_of_birth='1978-04-25'), PersonToCompanyRelationship(role=, location=Location(city='Werne', street=None, house_number=None, zip_code=None), type=, name=PersonName(firstname='Ralf', lastname='Frombach'), date_of_birth='1977-01-25'), PersonToCompanyRelationship(role=, location=Location(city='Berlin', street=None, house_number=None, zip_code=None), type=, name=PersonName(firstname='Sven', lastname='Hommel'), date_of_birth='1979-04-22'), PersonToCompanyRelationship(role=, location=Location(city='Oberhausen', street=None, house_number=None, zip_code=None), type=, name=PersonName(firstname='Matthias', lastname='Peters'), date_of_birth='1973-08-28')], company_type=, capital=Capital(value=5115000.0, currency=, type=), business_purpose='Entwicklung, Herstellung und der Vertrieb von Landtechnik, insbesondere von Komponenten und Anlagen (a) zur Gewinnung, Kühlung, Behandlung und Lagerung von Milch; (b) für das Milchvieh-Herdenmanagement; (c) zur Tierhygiene und Sicherung der Milchqualität und (d) zur Aufstallung von Tieren; sowie die Herstellung und der Vertrieb von Anlagen und Fahrzeugen zur Aufbereitung und zum Transport von Gülle.', founding_date='1995-04-25')\n" ] } ], "source": [ "import json\n", + "import dataclasses\n", "from transform import map_unternehmensregister_json\n", "\n", "with open('../tmp/json/GEAFarmTechnologiesGmbH.json', \"r\") as file:\n", " content = json.load(file)\n", " company_data = map_unternehmensregister_json(content)\n", - " print(company_data)" + " print(company_data)\n", + " with open('../tmp/transformed/GEAFarmTechnologiesGmbH.json', \"w+\", encoding=\"utf-8\") as file:\n", + " json.dump(dataclasses.asdict(company_data), file, ensure_ascii=False)" ] }, { @@ -38,7 +35,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -48,7 +45,7 @@ "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mAssertionError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32mc:\\Users\\trist\\Documents\\Code\\M.Sc\\aki_prj23_transparenzregister\\tmp\\transformation.ipynb Cell 1\u001b[0m line \u001b[0;36m9\n\u001b[0;32m 7\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39m(\u001b[39m'\u001b[39m\u001b[39m../tmp/tests/GEAFarmTechnologiesGmbH.json\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mr\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39mas\u001b[39;00m file:\n\u001b[0;32m 8\u001b[0m expected_result \u001b[39m=\u001b[39m json\u001b[39m.\u001b[39mload(file)\n\u001b[1;32m----> 9\u001b[0m \u001b[39massert\u001b[39;00m result \u001b[39m==\u001b[39m expected_result\n", + "\u001b[1;32mc:\\Users\\trist\\Documents\\Code\\M.Sc\\aki_prj23_transparenzregister\\tmp\\transformation.ipynb Cell 3\u001b[0m line \u001b[0;36m9\n\u001b[0;32m 7\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39m(\u001b[39m'\u001b[39m\u001b[39m../tmp/tests/GEAFarmTechnologiesGmbH.json\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mr\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39mas\u001b[39;00m file:\n\u001b[0;32m 8\u001b[0m expected_result \u001b[39m=\u001b[39m json\u001b[39m.\u001b[39mload(file)\n\u001b[1;32m----> 9\u001b[0m \u001b[39massert\u001b[39;00m result \u001b[39m==\u001b[39m expected_result\n", "\u001b[1;31mAssertionError\u001b[0m: " ] }