refactor: Apply linter feedback

This commit is contained in:
TrisNol 2023-11-04 09:12:04 +01:00
parent d6b07431e7
commit 1121f26052
8 changed files with 142 additions and 1268 deletions

View File

@ -14,7 +14,17 @@ from aki_prj23_transparenzregister.utils.mongo.connector import (
MongoConnector,
)
def load_directory_to_mongo(base_path: str, service: CompanyMongoService) -> int:
"""Load all json files in a directory to MongoDB company collection.
Args:
base_path (str): Directory to scan
service (CompanyMongoService): MongoDB service
Returns:
int: Number of processed files
"""
num_processed = 0
for file in tqdm(glob.glob1(base_path, "*.json")):
path = os.path.join(base_path, file)
@ -26,10 +36,14 @@ def load_directory_to_mongo(base_path: str, service: CompanyMongoService) -> int
num_processed += 1
return num_processed
if __name__ == "__main__":
provider = JsonFileConfigProvider("secrets.json")
conn_string = provider.get_mongo_connection_string()
connector = MongoConnector(conn_string)
service = CompanyMongoService(connector)
load_directory_to_mongo("./Jupyter/API-tests/Unternehmensregister/data/Unternehmensregister/transformed", service)
load_directory_to_mongo(
"./Jupyter/API-tests/Unternehmensregister/data/Unternehmensregister/transformed",
service,
)

View File

@ -0,0 +1 @@
"""Transform Unternehmensregister data to Transparenzregister API."""

View File

@ -3,16 +3,21 @@ import dataclasses
import glob
import json
import os
import re
import sys
import typing
import xmltodict
from tqdm import tqdm
from loguru import logger
from tqdm import tqdm
from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v1 import v1
from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v3 import v3
from aki_prj23_transparenzregister.models.company import Company
from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v1 import (
v1,
)
from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v3 import (
v3,
)
def transform_xml_to_json(source_dir: str, target_dir: str) -> None:
"""Convert all xml files in a directory to json files.
@ -36,13 +41,27 @@ def transform_xml_to_json(source_dir: str, target_dir: str) -> None:
except Exception as e:
logger.error(e)
def determine_version(data: dict):
def determine_version(data: dict) -> typing.Any:
"""Determine Unternehmensregister data API version of given entry.
Args:
data (dict): Unternehmensregister data
Raises:
ValueError: If version could not be determined
Returns:
module: Version module
"""
if "XJustiz_Daten" in data:
# TODO consider class inheritance for version modules
return v1
elif "tns:nachrichtenkopf" in data[list(data.keys())[0]]:
if "tns:nachrichtenkopf" in data[list(data.keys())[0]]:
return v3
raise ValueError("Could not determine Unternehmensregister version.")
def map_unternehmensregister_json(data: dict) -> Company:
"""Processes the Unternehmensregister structured export to a Company by using several helper methods.
@ -57,8 +76,6 @@ def map_unternehmensregister_json(data: dict) -> Company:
if __name__ == "__main__":
from loguru import logger
base_path = "./Jupyter/API-tests/Unternehmensregister/data/Unternehmensregister"
for file in tqdm(glob.glob1(f"{base_path}/export", "*.json")):
path = os.path.join(f"{base_path}/export", file)

View File

@ -1,13 +1,5 @@
"""Transform raw Unternehmensregister export (*.xml) to processed .json files for loading."""
import dataclasses
import glob
import json
import os
import re
import sys
import xmltodict
from tqdm import tqdm
from aki_prj23_transparenzregister.models.company import (
Capital,

View File

@ -0,0 +1 @@
"""Transforms data from the Unternehmensregister v3 API to the data model of the Transparenzregister API."""

View File

@ -1,34 +1,60 @@
"""RoleMapper for Unternehmensregister v3 API."""
import os
from pathlib import Path
import xmltodict
from pathlib import Path
from aki_prj23_transparenzregister.models.company import RelationshipRoleEnum
class RoleMapper:
"""RoleMapper for Unternehmensregister v3 API."""
singleton = None
def __init__(self):
# TODO Automated file retrieval
def __init__(self) -> None:
"""Initialize RoleMapper by ingesting XSD schema file."""
# TODO Automated file retrieval
base_path = os.path.dirname(Path(__file__))
path = os.path.join(base_path, "assets", "xjustiz_0040_cl_rollenbezeichnung_3_3.xsd")
path = os.path.join(
base_path, "assets", "xjustiz_0040_cl_rollenbezeichnung_3_3.xsd"
)
with open(path, encoding="utf-8") as file:
content = file.read()
data = xmltodict.parse(content)
mapping = {}
for entry in data["xs:schema"]["xs:simpleType"]["xs:restriction"]["xs:enumeration"]:
mapping[entry['@value']] = entry['xs:annotation']['xs:appinfo']['wert']
for entry in data["xs:schema"]["xs:simpleType"]["xs:restriction"][
"xs:enumeration"
]:
mapping[entry["@value"]] = entry["xs:annotation"]["xs:appinfo"]["wert"]
self.dictionary = mapping
@staticmethod
def mapper():
def mapper() -> "RoleMapper":
"""Singleton getter for RoleMapper.
Returns:
RoleMapper: Singleton instance
"""
if RoleMapper.singleton is None:
RoleMapper.singleton = RoleMapper()
return RoleMapper.singleton
def get(self, key: str) -> RelationshipRoleEnum:
"""Get mapped value for given key.
Args:
key (str): Key to map
Returns:
RelationshipRoleEnum: Mapped value
"""
return RelationshipRoleEnum(self.dictionary[key])
if __name__ == '__main__':
if __name__ == "__main__":
from loguru import logger
mapper = RoleMapper()
print(mapper.get("201"))
logger.info(f"Mapped value for role 201 - {mapper.get('201')}")

View File

@ -1,13 +1,8 @@
"""Transform raw Unternehmensregister export (*.xml) to processed .json files for loading."""
import dataclasses
import glob
import json
import os
import re
import sys
import xmltodict
from tqdm import tqdm
import re
import typing
from collections.abc import Sequence
from aki_prj23_transparenzregister.models.company import (
Capital,
@ -25,15 +20,14 @@ from aki_prj23_transparenzregister.models.company import (
PersonToCompanyRelationship,
RelationshipRoleEnum,
)
from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v3.role_mapper import (
RoleMapper,
)
from aki_prj23_transparenzregister.utils.string_tools import (
remove_traling_and_leading_quotes,
transform_date_to_iso,
)
from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.v3.role_mapper import (
RoleMapper,
)
def parse_date_of_birth(data: dict) -> str | None:
"""Retreives the date of birth from a stakeholder entry if possible.
@ -56,6 +50,14 @@ def parse_date_of_birth(data: dict) -> str | None:
def map_role_id_to_enum(role_id: str) -> RelationshipRoleEnum:
"""Map Unternehmensregister role ID to RelationshipRoleEnum.
Args:
role_id (str): Unternehmensregister role ID
Returns:
RelationshipRoleEnum: Role enum
"""
mapper = RoleMapper.mapper()
return mapper.get(role_id)
@ -229,10 +231,7 @@ def loc_from_beteiligung(data: dict) -> Location:
# "tns:anschrift",
]
base = traversal(data, base_path)
if "tns:anschrift" in base:
base = base["tns:anschrift"]
else:
base = base["tns:sitz"]
base = base["tns:anschrift"] if "tns:anschrift" in base else base["tns:sitz"]
if isinstance(base, list):
base = base[0]
@ -318,7 +317,9 @@ def map_rechtsform(company_name: str, data: dict) -> CompanyTypeEnum | None:
return None
def map_capital(data: dict, company_type: CompanyTypeEnum) -> Capital | None:
def map_capital( # noqa: PLR0912
data: dict, company_type: CompanyTypeEnum
) -> Capital | None:
"""Extracts the company capital from the given Unternehmensregister export.
Args:
@ -332,7 +333,11 @@ def map_capital(data: dict, company_type: CompanyTypeEnum) -> Capital | None:
if "tns:auswahl_zusatzangaben" not in data["tns:fachdatenRegister"]:
return None
capital: dict = {"tns:zahl": 0.0, "tns:waehrung": {"code": None}}
if company_type == CompanyTypeEnum.KG and "tns:personengesellschaft" in data["tns:fachdatenRegister"]["tns:auswahl_zusatzangaben"]:
if (
company_type == CompanyTypeEnum.KG
and "tns:personengesellschaft"
in data["tns:fachdatenRegister"]["tns:auswahl_zusatzangaben"]
):
capital_type = "Hafteinlage"
base = data["tns:fachdatenRegister"]["tns:auswahl_zusatzangaben"][
"tns:personengesellschaft"
@ -475,17 +480,40 @@ def map_founding_date(data: dict) -> str | None:
return None
def traversal(data: dict, path: list[str | int]) -> any:
def traversal(data: dict, path: Sequence[str | int | object]) -> typing.Any:
"""Traverse a dict using list of keys.
Args:
data (dict): Data export
path (Sequence[str | int | object]): List of keys
Raises:
KeyError: If key not found
Returns:
any: Value at the end of the path
"""
current = data
for key in path:
try:
current = current[key]
except:
raise KeyError(f"Key {key} not found")
except KeyError as e:
raise KeyError(f"Key {key} not found") from e
return current
def map_hr_number(data: dict) -> str:
"""Extract the HR number from a given Unternehmensregister export.
Args:
data (dict): Data export
Raises:
KeyError: If key not found
Returns:
str: HR number
"""
base = data["tns:grunddaten"]["tns:verfahrensdaten"]["tns:instanzdaten"][
"tns:aktenzeichen"
]["tns:auswahl_aktenzeichen"]
@ -493,12 +521,20 @@ def map_hr_number(data: dict) -> str:
hr_prefix = base["tns:aktenzeichen.strukturiert"]["tns:register"]["code"]
hr_number = base["tns:aktenzeichen.strukturiert"]["tns:laufendeNummer"]
return f"{hr_prefix} {hr_number}"
elif "tns:aktenzeichen.freitext" in base:
if "tns:aktenzeichen.freitext" in base:
return base["tns:aktenzeichen.freitext"]
return hr_full
raise KeyError("Could not find HR number")
def map_district_court(data: dict) -> DistrictCourt:
"""Extract the district court from a given Unternehmensregister export.
Args:
data (dict): Data export
Returns:
DistrictCourt: District court
"""
base_path = [
"tns:grunddaten",
"tns:verfahrensdaten",
@ -525,11 +561,13 @@ def map_company_id(data: dict) -> CompanyID:
CompanyID: ID of the company
"""
try:
return CompanyID(
**{"hr_number": map_hr_number(data), "district_court": map_district_court(data)}
)
return CompanyID(map_hr_number(data), map_district_court(data)) # type: ignore
except KeyError:
hr_number = data["tns:grunddaten"]["tns:verfahrensdaten"]["tns:beteiligung"][0]["tns:beteiligter"]["tns:auswahl_beteiligter"]["tns:organisation"]["tns:registereintragung"]["tns:registernummer"]
hr_number = data["tns:grunddaten"]["tns:verfahrensdaten"]["tns:beteiligung"][0][
"tns:beteiligter"
]["tns:auswahl_beteiligter"]["tns:organisation"]["tns:registereintragung"][
"tns:registernummer"
]
district_court = map_district_court(data)
return CompanyID(hr_number=hr_number, district_court=district_court)

File diff suppressed because one or more lines are too long