diff --git a/tmp/transform.py b/tmp/transform.py
index b876d41..9178805 100644
--- a/tmp/transform.py
+++ b/tmp/transform.py
@@ -63,13 +63,20 @@ def parse_date_of_birth(data: dict) -> str | None:
Returns:
str | None: date of birth or None if not found
"""
- if "tns:geburt" in (base := data["tns:beteiligter"]["tns:auswahl_beteililgter"]["tns:natuerlichePerson"]):
+ if "tns:geburt" in (base := data["tns:beteiligter"]["tns:auswahl_beteiligter"]["tns:natuerlichePerson"]):
base = base["tns:geburt"]["tns:geburtsdatum"]
if isinstance(base, str):
return base
return None
-# def map_role_id_to_enum(role_id: str) -> RelationshipRoleEnum:
+def map_role_id_to_enum(role_id: str) -> RelationshipRoleEnum:
+ match role_id:
+ case "086":
+ return RelationshipRoleEnum.GESCHAEFTSFUEHRER
+ case "285":
+ return RelationshipRoleEnum.PROKURIST
+ case _:
+ raise KeyError(f'Uknown role_id: {role_id}')
def parse_stakeholder(data: dict) -> CompanyRelationship | None:
@@ -138,8 +145,8 @@ def parse_stakeholder(data: dict) -> CompanyRelationship | None:
}
),
# TODO get role via ID
- "role": RelationshipRoleEnum(
- data["Rolle"]["Rollenbezeichnung"]["content"]
+ "role": map_role_id_to_enum(
+ data["tns:rolle"]["tns:rollenbezeichnung"]["code"]
),
"type": CompanyRelationshipEnum.PERSON,
}
@@ -606,13 +613,13 @@ def map_unternehmensregister_json(data: dict) -> Company:
result["founding_date"] = map_founding_date(data)
# TODO adapt...
- # for i in range(
- # 2, len(data["tns:grunddaten"]["tns:verfahrensdaten"]["tns:beteiligung"])
- # ):
- # people = parse_stakeholder(
- # data["tns:grunddaten"]["tns:verfahrensdaten"]["tns:beteiligung"][i]
- # )
- # result["relationships"].append(people)
+ for i in range(
+ 2, len(data["tns:grunddaten"]["tns:verfahrensdaten"]["tns:beteiligung"])
+ ):
+ people = parse_stakeholder(
+ data["tns:grunddaten"]["tns:verfahrensdaten"]["tns:beteiligung"][i]
+ )
+ result["relationships"].append(people)
result = map_co_relation(result)
return Company(**result)
diff --git a/tmp/transformation.ipynb b/tmp/transformation.ipynb
index 6fd84ae..915b440 100644
--- a/tmp/transformation.ipynb
+++ b/tmp/transformation.ipynb
@@ -2,31 +2,28 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 8,
"metadata": {},
"outputs": [
{
- "ename": "KeyError",
- "evalue": "'Beteiligter'",
- "output_type": "error",
- "traceback": [
- "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
- "\u001b[1;32mc:\\Users\\trist\\Documents\\Code\\M.Sc\\aki_prj23_transparenzregister\\tmp\\transformation.ipynb Cell 1\u001b[0m line \u001b[0;36m6\n\u001b[0;32m 4\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39m(\u001b[39m'\u001b[39m\u001b[39m../tmp/json/GEAFarmTechnologiesGmbH.json\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mr\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39mas\u001b[39;00m file:\n\u001b[0;32m 5\u001b[0m content \u001b[39m=\u001b[39m json\u001b[39m.\u001b[39mload(file)\n\u001b[1;32m----> 6\u001b[0m company_data \u001b[39m=\u001b[39m map_unternehmensregister_json(content)\n",
- "File \u001b[1;32mc:\\Users\\trist\\Documents\\Code\\M.Sc\\aki_prj23_transparenzregister\\tmp\\transform.py:609\u001b[0m, in \u001b[0;36mmap_unternehmensregister_json\u001b[1;34m(data)\u001b[0m\n\u001b[0;32m 605\u001b[0m \u001b[39m# TODO adapt...\u001b[39;00m\n\u001b[0;32m 606\u001b[0m \u001b[39mfor\u001b[39;00m i \u001b[39min\u001b[39;00m \u001b[39mrange\u001b[39m(\n\u001b[0;32m 607\u001b[0m \u001b[39m2\u001b[39m, \u001b[39mlen\u001b[39m(data[\u001b[39m\"\u001b[39m\u001b[39mtns:grunddaten\u001b[39m\u001b[39m\"\u001b[39m][\u001b[39m\"\u001b[39m\u001b[39mtns:verfahrensdaten\u001b[39m\u001b[39m\"\u001b[39m][\u001b[39m\"\u001b[39m\u001b[39mtns:beteiligung\u001b[39m\u001b[39m\"\u001b[39m])\n\u001b[0;32m 608\u001b[0m ):\n\u001b[1;32m--> 609\u001b[0m people \u001b[39m=\u001b[39m parse_stakeholder(\n\u001b[0;32m 610\u001b[0m data[\u001b[39m\"\u001b[39;49m\u001b[39mtns:grunddaten\u001b[39;49m\u001b[39m\"\u001b[39;49m][\u001b[39m\"\u001b[39;49m\u001b[39mtns:verfahrensdaten\u001b[39;49m\u001b[39m\"\u001b[39;49m][\u001b[39m\"\u001b[39;49m\u001b[39mtns:beteiligung\u001b[39;49m\u001b[39m\"\u001b[39;49m][i]\n\u001b[0;32m 611\u001b[0m )\n\u001b[0;32m 612\u001b[0m result[\u001b[39m\"\u001b[39m\u001b[39mrelationships\u001b[39m\u001b[39m\"\u001b[39m]\u001b[39m.\u001b[39mappend(people)\n\u001b[0;32m 613\u001b[0m result \u001b[39m=\u001b[39m map_co_relation(result)\n",
- "File \u001b[1;32mc:\\Users\\trist\\Documents\\Code\\M.Sc\\aki_prj23_transparenzregister\\tmp\\transform.py:82\u001b[0m, in \u001b[0;36mparse_stakeholder\u001b[1;34m(data)\u001b[0m\n\u001b[0;32m 73\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mparse_stakeholder\u001b[39m(data: \u001b[39mdict\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m CompanyRelationship \u001b[39m|\u001b[39m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m 74\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Extract the company stakeholder/relation from a single \"Beteiligung\".\u001b[39;00m\n\u001b[0;32m 75\u001b[0m \n\u001b[0;32m 76\u001b[0m \u001b[39m Args:\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 80\u001b[0m \u001b[39m CompanyRelationship | None: Relationship if it could be processed\u001b[39;00m\n\u001b[0;32m 81\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m---> 82\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39mNatuerliche_Person\u001b[39m\u001b[39m\"\u001b[39m \u001b[39min\u001b[39;00m data[\u001b[39m\"\u001b[39;49m\u001b[39mBeteiligter\u001b[39;49m\u001b[39m\"\u001b[39;49m]:\n\u001b[0;32m 83\u001b[0m \u001b[39m# It's a Company serving as a \"Kommanditist\" or similar\u001b[39;00m\n\u001b[0;32m 84\u001b[0m \u001b[39mif\u001b[39;00m data[\u001b[39m\"\u001b[39m\u001b[39mBeteiligter\u001b[39m\u001b[39m\"\u001b[39m][\u001b[39m\"\u001b[39m\u001b[39mNatuerliche_Person\u001b[39m\u001b[39m\"\u001b[39m][\u001b[39m\"\u001b[39m\u001b[39mVoller_Name\u001b[39m\u001b[39m\"\u001b[39m][\u001b[39m\"\u001b[39m\u001b[39mVorname\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m 85\u001b[0m \u001b[39mreturn\u001b[39;00m CompanyToCompanyRelationship(\n\u001b[0;32m 86\u001b[0m \u001b[39m*\u001b[39m\u001b[39m*\u001b[39m{ \u001b[39m# type: ignore\u001b[39;00m\n\u001b[0;32m 87\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mname\u001b[39m\u001b[39m\"\u001b[39m: remove_traling_and_leading_quotes(\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 110\u001b[0m }\n\u001b[0;32m 111\u001b[0m )\n",
- "\u001b[1;31mKeyError\u001b[0m: 'Beteiligter'"
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Company(id=CompanyID(district_court=DistrictCourt(name='Amtsgericht Hamm', city='Hamm'), hr_number='HRB 5363'), location=Location(city='Bönen', street='Siemensstraße', house_number='25-27', zip_code='59199'), name='GEA Farm Technologies GmbH', last_update='2023-10-27', relationships=[PersonToCompanyRelationship(role=, location=Location(city='Oelde', street=None, house_number=None, zip_code=None), type=, name=PersonName(firstname='Reinhard', lastname='Gebing'), date_of_birth='1964-04-26'), PersonToCompanyRelationship(role=, location=Location(city='Wetter', street=None, house_number=None, zip_code=None), type=, name=PersonName(firstname='Markus', lastname='Kreft'), date_of_birth='1966-04-03'), PersonToCompanyRelationship(role=, location=Location(city='Holzminden', street=None, house_number=None, zip_code=None), type=, name=PersonName(firstname='Kai', lastname='Luntz'), date_of_birth='1970-12-04'), PersonToCompanyRelationship(role=, location=Location(city='Rheda-Wiedenbrück', street=None, house_number=None, zip_code=None), type=, name=PersonName(firstname='Thomas', lastname='Mader'), date_of_birth='1972-05-24'), PersonToCompanyRelationship(role=, location=Location(city='Düsseldorf', street=None, house_number=None, zip_code=None), type=, name=PersonName(firstname='Peter', lastname='Lauwers'), date_of_birth='1970-03-26'), PersonToCompanyRelationship(role=, location=Location(city='Erkrath', street=None, house_number=None, zip_code=None), type=, name=PersonName(firstname='Erkul', lastname='Basaran'), date_of_birth='1977-05-06'), PersonToCompanyRelationship(role=, location=Location(city='Bochum', street=None, house_number=None, zip_code=None), type=, name=PersonName(firstname='Henrik', lastname='Böttner'), date_of_birth='1982-11-07'), PersonToCompanyRelationship(role=, location=Location(city='Oelde', street=None, house_number=None, zip_code=None), type=, name=PersonName(firstname='Ulrich', lastname='Raßenhövel'), date_of_birth='1969-04-16'), PersonToCompanyRelationship(role=, location=Location(city='Herdecke', street=None, house_number=None, zip_code=None), type=, name=PersonName(firstname='Andreas', lastname='Naroska'), date_of_birth='1967-03-23'), PersonToCompanyRelationship(role=, location=Location(city='Witten', street=None, house_number=None, zip_code=None), type=, name=PersonName(firstname='Mark', lastname='Kramps'), date_of_birth='1967-09-04'), PersonToCompanyRelationship(role=, location=Location(city='Dortmund', street=None, house_number=None, zip_code=None), type=, name=PersonName(firstname='Ralf', lastname='Barkmeyer'), date_of_birth='1974-02-28'), PersonToCompanyRelationship(role=, location=Location(city='Tönnisvorst', street=None, house_number=None, zip_code=None), type=, name=PersonName(firstname='Holger', lastname='Siegwarth'), date_of_birth='1967-05-13'), PersonToCompanyRelationship(role=, location=Location(city='Herne', street=None, house_number=None, zip_code=None), type=, name=PersonName(firstname='Oliver', lastname='Liß'), date_of_birth='1981-04-13'), PersonToCompanyRelationship(role=, location=Location(city='Göppingen', street=None, house_number=None, zip_code=None), type=, name=PersonName(firstname='Liang', lastname='Cheng'), date_of_birth='1980-12-29'), PersonToCompanyRelationship(role=, location=Location(city='Beckum', street=None, house_number=None, zip_code=None), type=, name=PersonName(firstname='Astrid', lastname='Dörner-Rodeheger'), date_of_birth='1968-12-24'), PersonToCompanyRelationship(role=, location=Location(city='Dortmund', street=None, house_number=None, zip_code=None), type=, name=PersonName(firstname='Jon', lastname='Lange'), date_of_birth='1978-04-25'), PersonToCompanyRelationship(role=, location=Location(city='Werne', street=None, house_number=None, zip_code=None), type=, name=PersonName(firstname='Ralf', lastname='Frombach'), date_of_birth='1977-01-25'), PersonToCompanyRelationship(role=, location=Location(city='Berlin', street=None, house_number=None, zip_code=None), type=, name=PersonName(firstname='Sven', lastname='Hommel'), date_of_birth='1979-04-22'), PersonToCompanyRelationship(role=, location=Location(city='Oberhausen', street=None, house_number=None, zip_code=None), type=, name=PersonName(firstname='Matthias', lastname='Peters'), date_of_birth='1973-08-28')], company_type=, capital=Capital(value=5115000.0, currency=, type=), business_purpose='Entwicklung, Herstellung und der Vertrieb von Landtechnik, insbesondere von Komponenten und Anlagen (a) zur Gewinnung, Kühlung, Behandlung und Lagerung von Milch; (b) für das Milchvieh-Herdenmanagement; (c) zur Tierhygiene und Sicherung der Milchqualität und (d) zur Aufstallung von Tieren; sowie die Herstellung und der Vertrieb von Anlagen und Fahrzeugen zur Aufbereitung und zum Transport von Gülle.', founding_date='1995-04-25')\n"
]
}
],
"source": [
"import json\n",
+ "import dataclasses\n",
"from transform import map_unternehmensregister_json\n",
"\n",
"with open('../tmp/json/GEAFarmTechnologiesGmbH.json', \"r\") as file:\n",
" content = json.load(file)\n",
" company_data = map_unternehmensregister_json(content)\n",
- " print(company_data)"
+ " print(company_data)\n",
+ " with open('../tmp/transformed/GEAFarmTechnologiesGmbH.json', \"w+\", encoding=\"utf-8\") as file:\n",
+ " json.dump(dataclasses.asdict(company_data), file, ensure_ascii=False)"
]
},
{
@@ -38,7 +35,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 9,
"metadata": {},
"outputs": [
{
@@ -48,7 +45,7 @@
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mAssertionError\u001b[0m Traceback (most recent call last)",
- "\u001b[1;32mc:\\Users\\trist\\Documents\\Code\\M.Sc\\aki_prj23_transparenzregister\\tmp\\transformation.ipynb Cell 1\u001b[0m line \u001b[0;36m9\n\u001b[0;32m 7\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39m(\u001b[39m'\u001b[39m\u001b[39m../tmp/tests/GEAFarmTechnologiesGmbH.json\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mr\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39mas\u001b[39;00m file:\n\u001b[0;32m 8\u001b[0m expected_result \u001b[39m=\u001b[39m json\u001b[39m.\u001b[39mload(file)\n\u001b[1;32m----> 9\u001b[0m \u001b[39massert\u001b[39;00m result \u001b[39m==\u001b[39m expected_result\n",
+ "\u001b[1;32mc:\\Users\\trist\\Documents\\Code\\M.Sc\\aki_prj23_transparenzregister\\tmp\\transformation.ipynb Cell 3\u001b[0m line \u001b[0;36m9\n\u001b[0;32m 7\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39m(\u001b[39m'\u001b[39m\u001b[39m../tmp/tests/GEAFarmTechnologiesGmbH.json\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mr\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39mas\u001b[39;00m file:\n\u001b[0;32m 8\u001b[0m expected_result \u001b[39m=\u001b[39m json\u001b[39m.\u001b[39mload(file)\n\u001b[1;32m----> 9\u001b[0m \u001b[39massert\u001b[39;00m result \u001b[39m==\u001b[39m expected_result\n",
"\u001b[1;31mAssertionError\u001b[0m: "
]
}