From 5b96bb7e3e05dbf44cdc9dbc690dfde32e8ff12b Mon Sep 17 00:00:00 2001 From: TrisNol Date: Fri, 16 Jun 2023 18:00:11 +0200 Subject: [PATCH] adding company ID as well as compatible dataclasses --- .pre-commit-config.yaml | 2 +- .vscode/settings.json | 3 + .../Unternehmensregister/models/Company.py | 37 +++ .../Unternehmensregister/models/__init__.py | 0 .../Unternehmensregister/notebook.ipynb | 230 ++++++++++++++---- 5 files changed, 220 insertions(+), 52 deletions(-) create mode 100644 .vscode/settings.json create mode 100644 Jupyter/API-tests/Unternehmensregister/models/Company.py create mode 100644 Jupyter/API-tests/Unternehmensregister/models/__init__.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 27df19b..18edde8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -53,7 +53,7 @@ repos: rev: 6.0.0 hooks: - id: flake8 - args: [--config=tox.ini] + # args: [--config=tox.ini] - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.2.0 diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..7e6882b --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "files.eol": "\n" +} diff --git a/Jupyter/API-tests/Unternehmensregister/models/Company.py b/Jupyter/API-tests/Unternehmensregister/models/Company.py new file mode 100644 index 0000000..dbddee3 --- /dev/null +++ b/Jupyter/API-tests/Unternehmensregister/models/Company.py @@ -0,0 +1,37 @@ +from abc import ABC +from dataclasses import dataclass +from enum import Enum + + +class RelationshipRoleEnum(Enum): + STAKEHOLDER = "" + ORGANISATION = "ORGANISATION" + + +@dataclass +class CompayID: + district_court: str + hr_number: str + + +@dataclass +class Location: + city: str + street: str | None = None + house_number: str | None = None + zip_code: str | None = None + + +@dataclass +class CompanyRelationship(ABC): + role: RelationshipRoleEnum + location: Location + + +@dataclass +class Company: + id: CompayID + location: Location + name: str + last_update: str + relationships: list[CompanyRelationship] diff --git a/Jupyter/API-tests/Unternehmensregister/models/__init__.py b/Jupyter/API-tests/Unternehmensregister/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/Jupyter/API-tests/Unternehmensregister/notebook.ipynb b/Jupyter/API-tests/Unternehmensregister/notebook.ipynb index de91308..38ac9be 100644 --- a/Jupyter/API-tests/Unternehmensregister/notebook.ipynb +++ b/Jupyter/API-tests/Unternehmensregister/notebook.ipynb @@ -18,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -28,7 +28,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -50,7 +50,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -67,7 +67,7 @@ "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "\u001b[1;32mc:\\Users\\trist\\Documents\\Code\\M.Sc\\aki_prj23_transparenzregister\\Jupyter\\API-tests\\Unternehmensregister\\notebook.ipynb Cell 5\u001b[0m in \u001b[0;36m3\n\u001b[0;32m 27\u001b[0m options\u001b[39m.\u001b[39madd_experimental_option(\u001b[39m\"\u001b[39m\u001b[39mprefs\u001b[39m\u001b[39m\"\u001b[39m, preferences)\n\u001b[0;32m 29\u001b[0m driver \u001b[39m=\u001b[39m webdriver\u001b[39m.\u001b[39mChrome(options\u001b[39m=\u001b[39moptions)\n\u001b[1;32m---> 31\u001b[0m driver\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mhttps://www.unternehmensregister.de/ureg/\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n\u001b[0;32m 32\u001b[0m \u001b[39m# Accept Cookies\u001b[39;00m\n\u001b[0;32m 33\u001b[0m driver\u001b[39m.\u001b[39mfind_elements(\n\u001b[0;32m 34\u001b[0m By\u001b[39m.\u001b[39mXPATH, \u001b[39m'\u001b[39m\u001b[39m//button[text()=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mNur technisch notwendige Cookies akzeptieren\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m]\u001b[39m\u001b[39m'\u001b[39m\n\u001b[0;32m 35\u001b[0m )[\u001b[39m0\u001b[39m]\u001b[39m.\u001b[39mclick()\n", + "\u001b[1;32mc:\\Users\\trist\\Documents\\Code\\M.Sc\\aki_prj23_transparenzregister\\Jupyter\\API-tests\\Unternehmensregister\\notebook.ipynb Cell 5\u001b[0m in \u001b[0;36m3\n\u001b[0;32m 27\u001b[0m options\u001b[39m.\u001b[39madd_experimental_option(\u001b[39m\"\u001b[39m\u001b[39mprefs\u001b[39m\u001b[39m\"\u001b[39m, preferences)\n\u001b[0;32m 29\u001b[0m driver \u001b[39m=\u001b[39m webdriver\u001b[39m.\u001b[39mChrome(options\u001b[39m=\u001b[39moptions)\n\u001b[1;32m---> 31\u001b[0m driver\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mhttps://www.unternehmensregister.de/ureg/\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n\u001b[0;32m 32\u001b[0m \u001b[39m# Accept Cookies\u001b[39;00m\n\u001b[0;32m 33\u001b[0m driver\u001b[39m.\u001b[39mfind_elements(\n\u001b[0;32m 34\u001b[0m By\u001b[39m.\u001b[39mXPATH, \u001b[39m'\u001b[39m\u001b[39m//button[text()=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mNur technisch notwendige Cookies akzeptieren\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m]\u001b[39m\u001b[39m'\u001b[39m\n\u001b[0;32m 35\u001b[0m )[\u001b[39m0\u001b[39m]\u001b[39m.\u001b[39mclick()\n", "File \u001b[1;32mc:\\Python310\\lib\\site-packages\\selenium\\webdriver\\remote\\webdriver.py:355\u001b[0m, in \u001b[0;36mWebDriver.get\u001b[1;34m(self, url)\u001b[0m\n\u001b[0;32m 353\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mget\u001b[39m(\u001b[39mself\u001b[39m, url: \u001b[39mstr\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m 354\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Loads a web page in the current browser session.\"\"\"\u001b[39;00m\n\u001b[1;32m--> 355\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mexecute(Command\u001b[39m.\u001b[39;49mGET, {\u001b[39m\"\u001b[39;49m\u001b[39murl\u001b[39;49m\u001b[39m\"\u001b[39;49m: url})\n", "File \u001b[1;32mc:\\Python310\\lib\\site-packages\\selenium\\webdriver\\remote\\webdriver.py:344\u001b[0m, in \u001b[0;36mWebDriver.execute\u001b[1;34m(self, driver_command, params)\u001b[0m\n\u001b[0;32m 341\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39msessionId\u001b[39m\u001b[39m\"\u001b[39m \u001b[39mnot\u001b[39;00m \u001b[39min\u001b[39;00m params:\n\u001b[0;32m 342\u001b[0m params[\u001b[39m\"\u001b[39m\u001b[39msessionId\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39msession_id\n\u001b[1;32m--> 344\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mcommand_executor\u001b[39m.\u001b[39;49mexecute(driver_command, params)\n\u001b[0;32m 345\u001b[0m \u001b[39mif\u001b[39;00m response:\n\u001b[0;32m 346\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39merror_handler\u001b[39m.\u001b[39mcheck_response(response)\n", "File \u001b[1;32mc:\\Python310\\lib\\site-packages\\selenium\\webdriver\\remote\\remote_connection.py:290\u001b[0m, in \u001b[0;36mRemoteConnection.execute\u001b[1;34m(self, command, params)\u001b[0m\n\u001b[0;32m 288\u001b[0m data \u001b[39m=\u001b[39m utils\u001b[39m.\u001b[39mdump_json(params)\n\u001b[0;32m 289\u001b[0m url \u001b[39m=\u001b[39m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_url\u001b[39m}\u001b[39;00m\u001b[39m{\u001b[39;00mpath\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m--> 290\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_request(command_info[\u001b[39m0\u001b[39;49m], url, body\u001b[39m=\u001b[39;49mdata)\n", @@ -215,134 +215,252 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['A&A_Amini_Art_GmbH.xml',\n", + "['A&A_Amini_Art_GmbH.json',\n", + " 'A&A_Amini_Art_GmbH.xml',\n", + " 'A&A_Immo_GmbH.json',\n", " 'A&A_Immo_GmbH.xml',\n", + " 'A&P_AUDITING_GmbH_Wirtschaftsprüfungsgesellschaft.json',\n", " 'A&P_AUDITING_GmbH_Wirtschaftsprüfungsgesellschaft.xml',\n", + " 'A&QUA_gemeinnützige_Gesellschaft_für_Arbeit_u._Qualifizierung_mbH.json',\n", " 'A&QUA_gemeinnützige_Gesellschaft_für_Arbeit_u._Qualifizierung_mbH.xml',\n", + " 'a+b_Asphalt-_und_Betonmischwerke_GmbH_&_Co._KG.json',\n", " 'a+b_Asphalt-_und_Betonmischwerke_GmbH_&_Co._KG.xml',\n", + " 'a+b_Verwaltungsgesellschaft_mbH.json',\n", " 'a+b_Verwaltungsgesellschaft_mbH.xml',\n", + " 'A+E_Beteiligungs-_und_Handels-GmbH.json',\n", " 'A+E_Beteiligungs-_und_Handels-GmbH.xml',\n", + " 'A+W_Systemhaus_GmbH.json',\n", " 'A+W_Systemhaus_GmbH.xml',\n", + " 'A-S-D_Kfz-Teile-Handel_GmbH.json',\n", " 'A-S-D_Kfz-Teile-Handel_GmbH.xml',\n", + " 'A-TEAM_Industrielles_Roboterschweißen_GmbH.json',\n", " 'A-TEAM_Industrielles_Roboterschweißen_GmbH.xml',\n", + " 'A.C.C._Funk_Taxi_&_Minicar_e.K..json',\n", " 'A.C.C._Funk_Taxi_&_Minicar_e.K..xml',\n", + " 'a.c.k._aqua_concept_GmbH_Karlsruhe.json',\n", " 'a.c.k._aqua_concept_GmbH_Karlsruhe.xml',\n", + " 'A.C._Weiss_GmbH_&_Co._KG.json',\n", " 'A.C._Weiss_GmbH_&_Co._KG.xml',\n", + " 'A.D.S._OHG.json',\n", " 'A.D.S._OHG.xml',\n", + " 'A.D._Glas-_und_Gebäudereinigung_e.K..json',\n", " 'A.D._Glas-_und_Gebäudereinigung_e.K..xml',\n", + " 'A.E._Z-Line_Taxi_-_und_Shuttle-Service_e.K..json',\n", " 'A.E._Z-Line_Taxi_-_und_Shuttle-Service_e.K..xml',\n", + " 'A.F.Z._Automatisierung,_Fördern,_Zuführen_GmbH.json',\n", " 'A.F.Z._Automatisierung,_Fördern,_Zuführen_GmbH.xml',\n", + " 'A.G._Zentral_Michael_Greising_e.K..json',\n", " 'A.G._Zentral_Michael_Greising_e.K..xml',\n", + " 'A.H._Steuerberatungsgesellschaft_mbH.json',\n", " 'A.H._Steuerberatungsgesellschaft_mbH.xml',\n", + " 'A.I.V._SERVICES_GmbH.json',\n", " 'A.I.V._SERVICES_GmbH.xml',\n", + " 'A.I._Kommanditist-Gesellschaft_mbH.json',\n", " 'A.I._Kommanditist-Gesellschaft_mbH.xml',\n", + " 'A.KIein_Immobilien_KG.json',\n", " 'A.KIein_Immobilien_KG.xml',\n", + " 'A.L.G._Christian_Schmelzer.json',\n", " 'A.L.G._Christian_Schmelzer.xml',\n", + " 'A.L.S._Architektonische_Licht-Systeme_GmbH.json',\n", " 'A.L.S._Architektonische_Licht-Systeme_GmbH.xml',\n", + " 'A.M.G._Motorenbau_Hans_Werner_Aufrecht.json',\n", " 'A.M.G._Motorenbau_Hans_Werner_Aufrecht.xml',\n", + " 'A.M.P._Athos_GmbH.json',\n", " 'A.M.P._Athos_GmbH.xml',\n", + " 'A.N._Gartenbau_GmbH.json',\n", " 'A.N._Gartenbau_GmbH.xml',\n", + " 'A.Q.U.A_Services_KG.json',\n", " 'A.Q.U.A_Services_KG.xml',\n", + " 'A.R.S._GmbH_Süd,_Alt_und_Reststoffverwertung.json',\n", " 'A.R.S._GmbH_Süd,_Alt_und_Reststoffverwertung.xml',\n", + " 'A.S.G._Industrielackierungen_GmbH.json',\n", " 'A.S.G._Industrielackierungen_GmbH.xml',\n", + " 'A.S.S._bikes_and_parts_GmbH.json',\n", " 'A.S.S._bikes_and_parts_GmbH.xml',\n", + " 'A.S._Baubedarfvermittlung_Gesellschaft_mit_beschränkter_Haftung.json',\n", " 'A.S._Baubedarfvermittlung_Gesellschaft_mit_beschränkter_Haftung.xml',\n", + " 'A.T.C._Automotive_GmbH.json',\n", " 'A.T.C._Automotive_GmbH.xml',\n", + " 'A._&_S._Aigner_und_Schulz_GmbH.json',\n", " 'A._&_S._Aigner_und_Schulz_GmbH.xml',\n", + " 'A._+_H._Weier_GmbH.json',\n", " 'A._+_H._Weier_GmbH.xml',\n", + " 'A._+_K._Hertkorn_OHG_Möbel_-_Innenausbau.json',\n", " 'A._+_K._Hertkorn_OHG_Möbel_-_Innenausbau.xml',\n", + " 'A._Abele_GmbH.json',\n", " 'A._Abele_GmbH.xml',\n", + " 'A._Baur_Mineralöl-Abfertigungsspedition_GmbH.json',\n", " 'A._Baur_Mineralöl-Abfertigungsspedition_GmbH.xml',\n", + " 'A._Blum_GmbH.json',\n", " 'A._Blum_GmbH.xml',\n", + " 'A._Both_GmbH.json',\n", " 'A._Both_GmbH.xml',\n", + " 'A._Both_GmbH_&_Co._KG_Werkzeugtechnik_CNC_Maschinenausrüstung.json',\n", " 'A._Both_GmbH_&_Co._KG_Werkzeugtechnik_CNC_Maschinenausrüstung.xml',\n", + " 'A._DINKIC_GMBH.json',\n", " 'A._DINKIC_GMBH.xml',\n", + " 'A._Elsbecker_GmbH.json',\n", " 'A._Elsbecker_GmbH.xml',\n", + " 'A._Erglis_GmbH.json',\n", " 'A._Erglis_GmbH.xml',\n", + " 'A._Frauenrath_Landschaftsbau_GmbH_&_Co._KG..json',\n", " 'A._Frauenrath_Landschaftsbau_GmbH_&_Co._KG..xml',\n", + " 'A._Gradmann_GmbH_&_Co._KG.json',\n", " 'A._Gradmann_GmbH_&_Co._KG.xml',\n", + " 'A._Hanhart_GmbH_&_Co._KG.json',\n", " 'A._Hanhart_GmbH_&_Co._KG.xml',\n", + " 'A._Hüglin_-_Putz_und_Stuck_-_Gesellschaft_mit_beschränkter_Haftung.json',\n", " 'A._Hüglin_-_Putz_und_Stuck_-_Gesellschaft_mit_beschränkter_Haftung.xml',\n", + " 'A._Illmann_Zahntechnik_GmbH.json',\n", " 'A._Illmann_Zahntechnik_GmbH.xml',\n", + " 'A._Junghanns_Automatisierungs_GmbH.json',\n", " 'A._Junghanns_Automatisierungs_GmbH.xml',\n", + " 'A._Jung_GmbH_&_Co._KG.json',\n", " 'A._Jung_GmbH_&_Co._KG.xml',\n", + " 'A._Kolbinger_GmbH_Versicherungs-Makler.json',\n", " 'A._Kolbinger_GmbH_Versicherungs-Makler.xml',\n", + " 'A._Kolckmann,_Weberei_und_Kunststoffbeschichtungen_GmbH.json',\n", " 'A._Kolckmann,_Weberei_und_Kunststoffbeschichtungen_GmbH.xml',\n", + " 'A._Kolckmann_GmbH_&_Co._KG.json',\n", " 'A._Kolckmann_GmbH_&_Co._KG.xml',\n", + " 'A._Kuhner_GmbH.json',\n", " 'A._Kuhner_GmbH.xml',\n", + " 'A._Lipp_GmbH.json',\n", " 'A._Lipp_GmbH.xml',\n", + " 'A._Müller_Geschäftsführungs-_GmbH.json',\n", " 'A._Müller_Geschäftsführungs-_GmbH.xml',\n", + " 'A._Müller_GmbH_&_Co._KG.json',\n", " 'A._Müller_GmbH_&_Co._KG.xml',\n", + " 'A._Nassal_GmbH.json',\n", " 'A._Nassal_GmbH.xml',\n", + " 'A._Oster_e.K..json',\n", " 'A._Oster_e.K..xml',\n", + " 'A._Pfeiffer_Zimmerei_GmbH.json',\n", " 'A._Pfeiffer_Zimmerei_GmbH.xml',\n", + " 'A._Pfingsten_KG.json',\n", " 'A._Pfingsten_KG.xml',\n", + " 'A._Pullmann_GmbH.json',\n", " 'A._Pullmann_GmbH.xml',\n", + " 'A._Randecker_Wirtschafts-_und_Steuerberatungsgesellschaft_mbH.json',\n", " 'A._Randecker_Wirtschafts-_und_Steuerberatungsgesellschaft_mbH.xml',\n", + " 'A._Reinhard_GmbH.json',\n", " 'A._Reinhard_GmbH.xml',\n", + " 'A._Ritter_GmbH.json',\n", " 'A._Ritter_GmbH.xml',\n", + " 'A._Sabadinowitsch_Verwaltung_GmbH.json',\n", " 'A._Sabadinowitsch_Verwaltung_GmbH.xml',\n", + " 'A._Sluka-Verwaltungsgesellschaft_mit_beschränkter_Haftung.json',\n", " 'A._Sluka-Verwaltungsgesellschaft_mit_beschränkter_Haftung.xml',\n", + " 'A._Sommer_Finanzdienstleistungsvermittlung_e.K..json',\n", " 'A._Sommer_Finanzdienstleistungsvermittlung_e.K..xml',\n", + " 'A._Sorg_GmbH_&_Co._KG.json',\n", " 'A._Sorg_GmbH_&_Co._KG.xml',\n", + " 'A._u._G_Sexton_GmbH.json',\n", " 'A._u._G_Sexton_GmbH.xml',\n", + " 'A._Umminger_LUM-Air,_Elektro-_und_Filtertechnik_GmbH.json',\n", " 'A._Umminger_LUM-Air,_Elektro-_und_Filtertechnik_GmbH.xml',\n", + " 'A._Wankmüller_GmbH_&_Co._KG.json',\n", " 'A._Wankmüller_GmbH_&_Co._KG.xml',\n", + " 'A._Ziemann_Gesellschaft_mit_beschränkter_Haftung.json',\n", " 'A._Ziemann_Gesellschaft_mit_beschränkter_Haftung.xml',\n", + " 'A._Zwisler_e.K..json',\n", " 'A._Zwisler_e.K..xml',\n", + " 'A_&_A_Consulting_GmbH.json',\n", " 'A_&_A_Consulting_GmbH.xml',\n", + " 'A_&_A_Gipserbetrieb_GmbH.json',\n", " 'A_&_A_Gipserbetrieb_GmbH.xml',\n", + " 'a_&_b_Beteiligungs-GmbH.json',\n", " 'a_&_b_Beteiligungs-GmbH.xml',\n", + " 'A_&_B_Gastronomie-Betriebe_GmbH.json',\n", " 'A_&_B_Gastronomie-Betriebe_GmbH.xml',\n", + " 'A_&_C_Aqua_&_Care_Limited.json',\n", " 'A_&_C_Aqua_&_Care_Limited.xml',\n", + " 'A_&_F_Lori_GmbH.json',\n", " 'A_&_F_Lori_GmbH.xml',\n", + " 'A_&_L_Engineering_Service_GmbH.json',\n", " 'A_&_L_Engineering_Service_GmbH.xml',\n", + " 'A_&_M_Stanzformzubehör_Olaf_Abendroth_GmbH.json',\n", " 'A_&_M_Stanzformzubehör_Olaf_Abendroth_GmbH.xml',\n", + " 'A_&_O_Grundstücksverwaltungs_GmbH_&_Co._KG.json',\n", " 'A_&_O_Grundstücksverwaltungs_GmbH_&_Co._KG.xml',\n", + " 'A_&_R_Textilproduktion_GmbH.json',\n", " 'A_&_R_Textilproduktion_GmbH.xml',\n", + " 'A_&_S_Bäder_GmbH_&_Co..json',\n", " 'A_&_S_Bäder_GmbH_&_Co..xml',\n", + " 'A_&_S_Vermögensverwaltungs_GmbH.json',\n", " 'A_&_S_Vermögensverwaltungs_GmbH.xml',\n", + " 'A_&_T_Roth_GmbH.json',\n", " 'A_&_T_Roth_GmbH.xml',\n", + " 'A_+_A_Aalsmeer_Blumen_-_Bräutigam_E._Kfr.,_Inh._Manuela_Bräutigam.json',\n", " 'A_+_A_Aalsmeer_Blumen_-_Bräutigam_E._Kfr.,_Inh._Manuela_Bräutigam.xml',\n", + " 'a_+_b_Wohnbau_GmbH.json',\n", " 'a_+_b_Wohnbau_GmbH.xml',\n", + " 'A_+_H_Bauträger-_und_Verwaltungsgesellschaft_mit_beschränkter_Haftung.json',\n", " 'A_+_H_Bauträger-_und_Verwaltungsgesellschaft_mit_beschränkter_Haftung.xml',\n", + " 'A_+_M_Verwaltungs-GmbH.json',\n", " 'A_+_M_Verwaltungs-GmbH.xml',\n", + " 'A_+_P_Baumaschinen_GmbH_&_Co._KG.json',\n", " 'A_+_P_Baumaschinen_GmbH_&_Co._KG.xml',\n", + " 'A_+_R_Baumaschinen_-_Mietpark_+_-Vertriebs-GmbH.json',\n", " 'A_+_R_Baumaschinen_-_Mietpark_+_-Vertriebs-GmbH.xml',\n", + " 'A_+_S_Tierbedarf_GmbH.json',\n", " 'A_+_S_Tierbedarf_GmbH.xml',\n", + " 'A_+_Te_Stabil-Bau_GmbH.json',\n", " 'A_+_Te_Stabil-Bau_GmbH.xml',\n", + " 'A_+_W._Sahm_Bedachungs-GmbH.json',\n", " 'A_+_W._Sahm_Bedachungs-GmbH.xml',\n", + " 'a_-_Vermögensverwaltungs-GmbH_&_Co._KG.json',\n", " 'a_-_Vermögensverwaltungs-GmbH_&_Co._KG.xml',\n", + " 'A_-_Z_Kreditvermittlungs-Gesellschaft_mit_beschränkter_Haftung.json',\n", " 'A_-_Z_Kreditvermittlungs-Gesellschaft_mit_beschränkter_Haftung.xml',\n", + " 'A_2000_Industrie-Elektronik_GmbH.json',\n", " 'A_2000_Industrie-Elektronik_GmbH.xml',\n", + " 'A_bis_Z_Verwaltungs_GmbH.json',\n", " 'A_bis_Z_Verwaltungs_GmbH.xml',\n", + " 'A_B_A_S_A_GmbH_Organisations_-_Planungsbüro_für_den_Innenausbau.json',\n", " 'A_B_A_S_A_GmbH_Organisations_-_Planungsbüro_für_den_Innenausbau.xml',\n", + " 'A_B_U_-_GmbH_Altlasten_Bauökologie_Umweltmanagement.json',\n", " 'A_B_U_-_GmbH_Altlasten_Bauökologie_Umweltmanagement.xml',\n", + " 'A_F_Fussbodentechnik_GmbH.json',\n", " 'A_F_Fussbodentechnik_GmbH.xml',\n", + " 'A_L_T_E_C_GmbH.json',\n", " 'A_L_T_E_C_GmbH.xml',\n", + " 'A_l_u_f_o_r_m_Alucobondverarbeitungs-GmbH.json',\n", " 'A_l_u_f_o_r_m_Alucobondverarbeitungs-GmbH.xml',\n", + " 'A_L_Z_Auto_Licht_und_Zündung_Service_Gesellschaft_mit_beschränkter_Haftung.json',\n", " 'A_L_Z_Auto_Licht_und_Zündung_Service_Gesellschaft_mit_beschränkter_Haftung.xml',\n", + " 'a_m_friseure_GmbH_Karlsruhe.json',\n", " 'a_m_friseure_GmbH_Karlsruhe.xml',\n", + " 'a_m_friseure_GmbH_Koblenz.json',\n", " 'a_m_friseure_GmbH_Koblenz.xml',\n", + " 'a_priori_GmbH.json',\n", " 'a_priori_GmbH.xml',\n", + " 'a_s_k_-_Kunststoffe_GmbH.json',\n", " 'a_s_k_-_Kunststoffe_GmbH.xml',\n", + " 'A_S_TRUCKS_e.K..json',\n", " 'A_S_TRUCKS_e.K..xml',\n", + " 'A_S_Y_S_Automatic_Systems_Beteiligungs-GmbH.json',\n", " 'A_S_Y_S_Automatic_Systems_Beteiligungs-GmbH.xml',\n", + " 'A_u_c_h_Gesellschaft_mit_beschränkter_Haftung.json',\n", " 'A_u_c_h_Gesellschaft_mit_beschränkter_Haftung.xml',\n", " 'export',\n", + " 'registerdocument-2023-06-11-12-41-30 (1).json',\n", " 'registerdocument-2023-06-11-12-41-30 (1).xml',\n", + " 'registerdocument-2023-06-11-12-41-30.json',\n", " 'registerdocument-2023-06-11-12-41-30.xml',\n", + " 'registerdocument-2023-06-11-12-52-33.json',\n", " 'registerdocument-2023-06-11-12-52-33.xml',\n", + " 'registerdocument-2023-06-11-12-52-41.json',\n", " 'registerdocument-2023-06-11-12-52-41.xml']" ] }, - "execution_count": 4, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -356,7 +474,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -380,7 +498,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -392,10 +510,13 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ + "from models.Company import Company\n", + "\n", + "\n", "def parse_stakeholder(data: dict) -> list:\n", " if \"Natuerliche_Person\" in data[\"Beteiligter\"]:\n", " return {\n", @@ -441,54 +562,61 @@ "\n", "\n", "def map_unternehmensregister_json(data: dict) -> dict:\n", - " result = {\"base_info\": None, \"relationships\": []}\n", + " result = {\"relationships\": []}\n", "\n", - " base_info = {\n", - " \"company_name\": data[\"XJustiz_Daten\"][\"Fachdaten_Register\"][\n", - " \"Basisdaten_Register\"\n", - " ][\"Rechtstraeger\"][\"Bezeichnung\"][\"Bezeichnung_Aktuell\"],\n", - " \"location\": {\n", - " \"city\": data[\"XJustiz_Daten\"][\"Fachdaten_Register\"][\"Basisdaten_Register\"][\n", - " \"Rechtstraeger\"\n", - " ][\"Anschrift\"][\"Ort\"],\n", - " \"zip_code\": data[\"XJustiz_Daten\"][\"Fachdaten_Register\"][\n", - " \"Basisdaten_Register\"\n", - " ][\"Rechtstraeger\"][\"Anschrift\"][\"Postleitzahl\"],\n", - " \"street\": data[\"XJustiz_Daten\"][\"Fachdaten_Register\"][\n", - " \"Basisdaten_Register\"\n", - " ][\"Rechtstraeger\"][\"Anschrift\"][\"Strasse\"]\n", - " if \"Strasse\"\n", - " in data[\"XJustiz_Daten\"][\"Fachdaten_Register\"][\"Basisdaten_Register\"][\n", - " \"Rechtstraeger\"\n", - " ][\"Anschrift\"]\n", - " else None,\n", - " \"house_number\": data[\"XJustiz_Daten\"][\"Fachdaten_Register\"][\n", - " \"Basisdaten_Register\"\n", - " ][\"Rechtstraeger\"][\"Anschrift\"][\"Hausnummer\"]\n", - " if \"Hausnummer\"\n", - " in data[\"XJustiz_Daten\"][\"Fachdaten_Register\"][\"Basisdaten_Register\"][\n", - " \"Rechtstraeger\"\n", - " ][\"Anschrift\"]\n", - " else None,\n", - " },\n", - " \"last_update\": data[\"XJustiz_Daten\"][\"Fachdaten_Register\"][\"Auszug\"][\n", - " \"letzte_Eintragung\"\n", - " ],\n", + " result[\"id\"] = {\n", + " \"hr_number\": data[\"XJustiz_Daten\"][\"Grunddaten\"][\"Verfahrensdaten\"][\n", + " \"Instanzdaten\"\n", + " ][\"Aktenzeichen\"],\n", + " \"district_court\": data[\"XJustiz_Daten\"][\"Grunddaten\"][\"Verfahrensdaten\"][\n", + " \"Beteiligung\"\n", + " ][1][\"Beteiligter\"][\"Organisation\"][\"Bezeichnung\"][\"Bezeichnung_Aktuell\"],\n", " }\n", - " result[\"base_info\"] = base_info\n", + " result[\"name\"] = data[\"XJustiz_Daten\"][\"Fachdaten_Register\"][\"Basisdaten_Register\"][\n", + " \"Rechtstraeger\"\n", + " ][\"Bezeichnung\"][\"Bezeichnung_Aktuell\"]\n", + "\n", + " result[\"location\"] = {\n", + " \"city\": data[\"XJustiz_Daten\"][\"Fachdaten_Register\"][\"Basisdaten_Register\"][\n", + " \"Rechtstraeger\"\n", + " ][\"Anschrift\"][\"Ort\"],\n", + " \"zip_code\": data[\"XJustiz_Daten\"][\"Fachdaten_Register\"][\"Basisdaten_Register\"][\n", + " \"Rechtstraeger\"\n", + " ][\"Anschrift\"][\"Postleitzahl\"],\n", + " \"street\": data[\"XJustiz_Daten\"][\"Fachdaten_Register\"][\"Basisdaten_Register\"][\n", + " \"Rechtstraeger\"\n", + " ][\"Anschrift\"][\"Strasse\"]\n", + " if \"Strasse\"\n", + " in data[\"XJustiz_Daten\"][\"Fachdaten_Register\"][\"Basisdaten_Register\"][\n", + " \"Rechtstraeger\"\n", + " ][\"Anschrift\"]\n", + " else None,\n", + " \"house_number\": data[\"XJustiz_Daten\"][\"Fachdaten_Register\"][\n", + " \"Basisdaten_Register\"\n", + " ][\"Rechtstraeger\"][\"Anschrift\"][\"Hausnummer\"]\n", + " if \"Hausnummer\"\n", + " in data[\"XJustiz_Daten\"][\"Fachdaten_Register\"][\"Basisdaten_Register\"][\n", + " \"Rechtstraeger\"\n", + " ][\"Anschrift\"]\n", + " else None,\n", + " }\n", + " result[\"last_update\"] = data[\"XJustiz_Daten\"][\"Fachdaten_Register\"][\"Auszug\"][\n", + " \"letzte_Eintragung\"\n", + " ]\n", + "\n", " for i in range(\n", - " len(data[\"XJustiz_Daten\"][\"Grunddaten\"][\"Verfahrensdaten\"][\"Beteiligung\"])\n", + " 2, len(data[\"XJustiz_Daten\"][\"Grunddaten\"][\"Verfahrensdaten\"][\"Beteiligung\"])\n", " ):\n", " people = parse_stakeholder(\n", " data[\"XJustiz_Daten\"][\"Grunddaten\"][\"Verfahrensdaten\"][\"Beteiligung\"][i]\n", " )\n", " result[\"relationships\"].append(people)\n", - " return result" + " return Company(**result)" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -618,18 +746,18 @@ ], "source": [ "import json\n", + "import dataclasses\n", "\n", "for file in glob.glob1(\"./data/Unternehmensregister/\", \"*.json\"):\n", " path = os.path.join(\"./data/Unternehmensregister/\", file)\n", " with open(path, \"r\", encoding=\"utf-8\") as file_object:\n", " data = json.loads(file_object.read())\n", "\n", - " result = map_unternehmensregister_json(data)\n", - " print(result[\"base_info\"][\"company_name\"])\n", + " company: Company = map_unternehmensregister_json(data)\n", + " print(company.name)\n", "\n", " name = (\n", - " result[\"base_info\"][\"company_name\"]\n", - " .replace(\" \", \"_\")\n", + " company.name.replace(\" \", \"_\")\n", " .replace(\"/\", \"_\")\n", " .replace('\"', \"\")\n", " .replace(\"|\", \"_\")\n", @@ -637,7 +765,7 @@ " with open(\n", " f\"./data/Unternehmensregister/export/{name}.json\", \"w+\", encoding=\"utf-8\"\n", " ) as export_file:\n", - " json.dump(result, export_file, ensure_ascii=False)" + " json.dump(dataclasses.asdict(company), export_file, ensure_ascii=False)" ] } ],