mirror of
https://github.com/fhswf/aki_prj23_transparenzregister.git
synced 2025-05-13 09:58:47 +02:00
test(data-extraction): Include first unit tests
This commit is contained in:
parent
bfe50ac76d
commit
febcd59e39
File diff suppressed because one or more lines are too long
14
poetry.lock
generated
14
poetry.lock
generated
@ -1,5 +1,17 @@
|
|||||||
# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand.
|
# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand.
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "aenum"
|
||||||
|
version = "3.1.15"
|
||||||
|
description = "Advanced Enumerations (compatible with Python's stdlib Enum), NamedTuples, and NamedConstants"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
files = [
|
||||||
|
{file = "aenum-3.1.15-py2-none-any.whl", hash = "sha256:27b1710b9d084de6e2e695dab78fe9f269de924b51ae2850170ee7e1ca6288a5"},
|
||||||
|
{file = "aenum-3.1.15-py3-none-any.whl", hash = "sha256:e0dfaeea4c2bd362144b87377e2c61d91958c5ed0b4daf89cb6f45ae23af6288"},
|
||||||
|
{file = "aenum-3.1.15.tar.gz", hash = "sha256:8cbd76cd18c4f870ff39b24284d3ea028fbe8731a58df3aa581e434c575b9559"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "alabaster"
|
name = "alabaster"
|
||||||
version = "0.7.13"
|
version = "0.7.13"
|
||||||
@ -5610,4 +5622,4 @@ ingest = ["selenium"]
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = "^3.11"
|
python-versions = "^3.11"
|
||||||
content-hash = "ca7006861e8580466abc2a7e0555ef97365754f6406e83a777ee0a87bc8c1dc0"
|
content-hash = "f8728eee18c7402c68901a65631c2f3c0bd1a04fb1fb952f3746a12f47a9b9a4"
|
||||||
|
@ -36,6 +36,7 @@ version = "0.1.0"
|
|||||||
|
|
||||||
[tool.poetry.dependencies]
|
[tool.poetry.dependencies]
|
||||||
SQLAlchemy = {version = "^1.4.46", extras = ["mypy"]}
|
SQLAlchemy = {version = "^1.4.46", extras = ["mypy"]}
|
||||||
|
aenum = "^3.1.15"
|
||||||
dash = "^2.11.1"
|
dash = "^2.11.1"
|
||||||
dash-bootstrap-components = "^1.4.2"
|
dash-bootstrap-components = "^1.4.2"
|
||||||
deutschland = {git = "https://github.com/TrisNol/deutschland.git", branch = "hotfix/python-3.11-support"}
|
deutschland = {git = "https://github.com/TrisNol/deutschland.git", branch = "hotfix/python-3.11-support"}
|
||||||
|
@ -2,16 +2,31 @@
|
|||||||
from dataclasses import asdict, dataclass
|
from dataclasses import asdict, dataclass
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
|
||||||
|
from aenum import MultiValueEnum
|
||||||
|
|
||||||
class RelationshipRoleEnum(Enum):
|
|
||||||
"""_summary_.
|
|
||||||
|
|
||||||
Args:
|
class RelationshipRoleEnum(str, MultiValueEnum):
|
||||||
Enum (_type_): _description_
|
"""Roles taken by entities in relationships to a Company."""
|
||||||
"""
|
|
||||||
|
|
||||||
STAKEHOLDER = ""
|
|
||||||
ORGANISATION = "ORGANISATION"
|
ORGANISATION = "ORGANISATION"
|
||||||
|
KOMMANDITIST = "Kommanditist(in)", "Kommanditist"
|
||||||
|
GESCHAEFTSFUEHRER = "Geschäftsführer(in)", "Geschäftsführer"
|
||||||
|
PROKURIST = "Prokurist(in)", "Prokurist"
|
||||||
|
VORSTAND = "Vorstand"
|
||||||
|
INHABER = "Inhaber(in)", "Inhaber"
|
||||||
|
HAFTENDER_GESELLSCHAFTER = (
|
||||||
|
"Persönlich haftende(r) Gesellschafter(in)",
|
||||||
|
"Persönlich haftender Gesellschafter",
|
||||||
|
)
|
||||||
|
LIQUIDATOR = "Liquidator(in)", "Liquidator"
|
||||||
|
PARTNER = "Partner(in)", "Partner"
|
||||||
|
DIREKTOR = "Geschäftsführende(r) Direktor(in)", "Geschäftsführender Direktor"
|
||||||
|
LEITUNG = "Mitglied des Leitungsorgans"
|
||||||
|
VORSTANDSVORSITZENDER = "Vorstandsvorsitzende(r)", "Vorstandsvorsitzender"
|
||||||
|
NACHFOLGER = "Rechtsnachfolger"
|
||||||
|
STAENDIGER_VERTRETER = "Ständige(r) Vertreter(in)"
|
||||||
|
SONSTIGER_VERTRETER = "Sonstige(r) Vertreter(in)", "Sonstiger Vertreter"
|
||||||
|
GESCHAEFTSLEITER = "Geschäftsleiter(in)", "Geschäftsleiter"
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@ -32,7 +47,7 @@ class Location:
|
|||||||
zip_code: str | None = None
|
zip_code: str | None = None
|
||||||
|
|
||||||
|
|
||||||
class CompanyRelationshipEnum(Enum):
|
class CompanyRelationshipEnum(str, Enum):
|
||||||
"""Type of companyrelations."""
|
"""Type of companyrelations."""
|
||||||
|
|
||||||
PERSON = "Person"
|
PERSON = "Person"
|
||||||
@ -136,10 +151,10 @@ class Company:
|
|||||||
last_update: str
|
last_update: str
|
||||||
relationships: list[CompanyRelationship]
|
relationships: list[CompanyRelationship]
|
||||||
# yearly_results: Optional[list[FinancialResults]]
|
# yearly_results: Optional[list[FinancialResults]]
|
||||||
company_type: str # TODO define Enum
|
company_type: str | None = None # TODO define Enum
|
||||||
capital: Capital | None
|
capital: Capital | None | None = None
|
||||||
business_purpose: str
|
business_purpose: str | None = None
|
||||||
founding_date: str
|
founding_date: str | None = None
|
||||||
|
|
||||||
def to_dict(self) -> dict:
|
def to_dict(self) -> dict:
|
||||||
"""_summary_.
|
"""_summary_.
|
||||||
|
@ -0,0 +1 @@
|
|||||||
|
"""Everything regarding data extraction from the Unternehmensregister."""
|
@ -12,10 +12,14 @@ from tqdm import tqdm
|
|||||||
from aki_prj23_transparenzregister.models.company import (
|
from aki_prj23_transparenzregister.models.company import (
|
||||||
Capital,
|
Capital,
|
||||||
Company,
|
Company,
|
||||||
|
CompanyID,
|
||||||
CompanyRelationship,
|
CompanyRelationship,
|
||||||
|
CompanyRelationshipEnum,
|
||||||
CompanyToCompanyRelationship,
|
CompanyToCompanyRelationship,
|
||||||
Location,
|
Location,
|
||||||
|
PersonName,
|
||||||
PersonToCompanyRelationship,
|
PersonToCompanyRelationship,
|
||||||
|
RelationshipRoleEnum,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -57,7 +61,45 @@ def parse_stakeholder(data: dict) -> CompanyRelationship | None:
|
|||||||
"description": data["Beteiligter"]["Natuerliche_Person"][
|
"description": data["Beteiligter"]["Natuerliche_Person"][
|
||||||
"Voller_Name"
|
"Voller_Name"
|
||||||
]["Nachname"],
|
]["Nachname"],
|
||||||
"location": {
|
"location": Location(
|
||||||
|
**{
|
||||||
|
"city": data["Beteiligter"]["Natuerliche_Person"][
|
||||||
|
"Anschrift"
|
||||||
|
][-1]["Ort"]
|
||||||
|
if isinstance(
|
||||||
|
data["Beteiligter"]["Natuerliche_Person"]["Anschrift"],
|
||||||
|
list,
|
||||||
|
)
|
||||||
|
else data["Beteiligter"]["Natuerliche_Person"]["Anschrift"][
|
||||||
|
"Ort"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
),
|
||||||
|
"role": RelationshipRoleEnum(
|
||||||
|
data["Rolle"]["Rollenbezeichnung"]["content"]
|
||||||
|
),
|
||||||
|
"type": CompanyRelationshipEnum.COMPANY,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return PersonToCompanyRelationship(
|
||||||
|
**{
|
||||||
|
"name": PersonName(
|
||||||
|
**{
|
||||||
|
"firstname": data["Beteiligter"]["Natuerliche_Person"][
|
||||||
|
"Voller_Name"
|
||||||
|
]["Vorname"],
|
||||||
|
"lastname": data["Beteiligter"]["Natuerliche_Person"][
|
||||||
|
"Voller_Name"
|
||||||
|
]["Nachname"],
|
||||||
|
}
|
||||||
|
),
|
||||||
|
"date_of_birth": data["Beteiligter"]["Natuerliche_Person"]["Geburt"][
|
||||||
|
"Geburtsdatum"
|
||||||
|
]
|
||||||
|
if "Geburt" in data["Beteiligter"]["Natuerliche_Person"]
|
||||||
|
else None,
|
||||||
|
"location": Location(
|
||||||
|
**{
|
||||||
"city": data["Beteiligter"]["Natuerliche_Person"]["Anschrift"][
|
"city": data["Beteiligter"]["Natuerliche_Person"]["Anschrift"][
|
||||||
-1
|
-1
|
||||||
]["Ort"]
|
]["Ort"]
|
||||||
@ -67,37 +109,12 @@ def parse_stakeholder(data: dict) -> CompanyRelationship | None:
|
|||||||
else data["Beteiligter"]["Natuerliche_Person"]["Anschrift"][
|
else data["Beteiligter"]["Natuerliche_Person"]["Anschrift"][
|
||||||
"Ort"
|
"Ort"
|
||||||
]
|
]
|
||||||
},
|
}
|
||||||
"role": data["Rolle"]["Rollenbezeichnung"]["content"],
|
),
|
||||||
"type": "Company",
|
"role": RelationshipRoleEnum(
|
||||||
}
|
data["Rolle"]["Rollenbezeichnung"]["content"]
|
||||||
)
|
),
|
||||||
return PersonToCompanyRelationship(
|
"type": CompanyRelationshipEnum.PERSON,
|
||||||
**{
|
|
||||||
"name": {
|
|
||||||
"firstname": data["Beteiligter"]["Natuerliche_Person"][
|
|
||||||
"Voller_Name"
|
|
||||||
]["Vorname"],
|
|
||||||
"lastname": data["Beteiligter"]["Natuerliche_Person"][
|
|
||||||
"Voller_Name"
|
|
||||||
]["Nachname"],
|
|
||||||
},
|
|
||||||
"date_of_birth": data["Beteiligter"]["Natuerliche_Person"]["Geburt"][
|
|
||||||
"Geburtsdatum"
|
|
||||||
]
|
|
||||||
if "Geburt" in data["Beteiligter"]["Natuerliche_Person"]
|
|
||||||
else None,
|
|
||||||
"location": {
|
|
||||||
"city": data["Beteiligter"]["Natuerliche_Person"]["Anschrift"][-1][
|
|
||||||
"Ort"
|
|
||||||
]
|
|
||||||
if isinstance(
|
|
||||||
data["Beteiligter"]["Natuerliche_Person"]["Anschrift"], list
|
|
||||||
)
|
|
||||||
else data["Beteiligter"]["Natuerliche_Person"]["Anschrift"]["Ort"]
|
|
||||||
},
|
|
||||||
"role": data["Rolle"]["Rollenbezeichnung"]["content"],
|
|
||||||
"type": "Person",
|
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
if "Organisation" in data["Beteiligter"]:
|
if "Organisation" in data["Beteiligter"]:
|
||||||
@ -107,23 +124,29 @@ def parse_stakeholder(data: dict) -> CompanyRelationship | None:
|
|||||||
"description": data["Beteiligter"]["Organisation"]["Bezeichnung"][
|
"description": data["Beteiligter"]["Organisation"]["Bezeichnung"][
|
||||||
"Bezeichnung_Aktuell"
|
"Bezeichnung_Aktuell"
|
||||||
],
|
],
|
||||||
"location": {
|
"location": Location(
|
||||||
"city": data["Beteiligter"]["Organisation"]["Anschrift"]["Ort"],
|
**{
|
||||||
"street": data["Beteiligter"]["Organisation"]["Anschrift"][
|
"city": data["Beteiligter"]["Organisation"]["Anschrift"]["Ort"],
|
||||||
"Strasse"
|
"street": data["Beteiligter"]["Organisation"]["Anschrift"][
|
||||||
]
|
"Strasse"
|
||||||
if "Strasse" in data["Beteiligter"]["Organisation"]["Anschrift"]
|
]
|
||||||
else None,
|
if "Strasse" in data["Beteiligter"]["Organisation"]["Anschrift"]
|
||||||
"house_number": data["Beteiligter"]["Organisation"]["Anschrift"][
|
else None,
|
||||||
"Hausnummer"
|
"house_number": data["Beteiligter"]["Organisation"][
|
||||||
]
|
"Anschrift"
|
||||||
if "Hausnummer" in data["Beteiligter"]["Organisation"]["Anschrift"]
|
]["Hausnummer"]
|
||||||
else None,
|
if "Hausnummer"
|
||||||
"zip_code": data["Beteiligter"]["Organisation"]["Anschrift"][
|
in data["Beteiligter"]["Organisation"]["Anschrift"]
|
||||||
"Postleitzahl"
|
else None,
|
||||||
],
|
"zip_code": data["Beteiligter"]["Organisation"]["Anschrift"][
|
||||||
},
|
"Postleitzahl"
|
||||||
"type": "Company",
|
]
|
||||||
|
if "Postleitzahl"
|
||||||
|
in data["Beteiligter"]["Organisation"]["Anschrift"]
|
||||||
|
else None,
|
||||||
|
}
|
||||||
|
),
|
||||||
|
"type": CompanyRelationshipEnum.COMPANY,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
return None
|
return None
|
||||||
@ -166,20 +189,6 @@ def loc_from_beteiligung(data: dict) -> Location:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def name_from_organisation(data: dict) -> str:
|
|
||||||
"""Extract the company from the description.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
data (dict): Data export
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
str: Company name
|
|
||||||
"""
|
|
||||||
return data["XJustiz_Daten"]["Fachdaten_Register"]["Basisdaten_Register"][
|
|
||||||
"Organisation"
|
|
||||||
]["Bezeichnung"]["Bezeichnung_Aktuell"]
|
|
||||||
|
|
||||||
|
|
||||||
def name_from_beteiligung(data: dict) -> str:
|
def name_from_beteiligung(data: dict) -> str:
|
||||||
"""Extract the Company name from an Unternehmensregister export by using the first relationship found.
|
"""Extract the Company name from an Unternehmensregister export by using the first relationship found.
|
||||||
|
|
||||||
@ -374,6 +383,48 @@ def map_founding_date(data: dict) -> str | None:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def map_company_id(data: dict) -> CompanyID:
|
||||||
|
"""Retrieve Company ID from export.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
data (dict): Data export
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
CompanyID: ID of the company
|
||||||
|
"""
|
||||||
|
return CompanyID(
|
||||||
|
**{
|
||||||
|
"hr_number": data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"][
|
||||||
|
"Instanzdaten"
|
||||||
|
]["Aktenzeichen"],
|
||||||
|
"district_court": {
|
||||||
|
"name": data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"][
|
||||||
|
"Beteiligung"
|
||||||
|
][1]["Beteiligter"]["Organisation"]["Bezeichnung"][
|
||||||
|
"Bezeichnung_Aktuell"
|
||||||
|
]
|
||||||
|
if "Organisation"
|
||||||
|
in data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"][
|
||||||
|
"Beteiligung"
|
||||||
|
][1]["Beteiligter"]
|
||||||
|
else data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"][
|
||||||
|
"Beteiligung"
|
||||||
|
][1]["Beteiligter"]["Natuerliche_Person"]["Voller_Name"]["Nachname"],
|
||||||
|
"city": data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"][
|
||||||
|
"Beteiligung"
|
||||||
|
][1]["Beteiligter"]["Organisation"]["Sitz"]["Ort"]
|
||||||
|
if "Organisation"
|
||||||
|
in data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"][
|
||||||
|
"Beteiligung"
|
||||||
|
][1]["Beteiligter"]
|
||||||
|
else data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"][
|
||||||
|
"Beteiligung"
|
||||||
|
][1]["Beteiligter"]["Natuerliche_Person"]["Anschrift"]["Ort"],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def map_unternehmensregister_json(data: dict) -> Company:
|
def map_unternehmensregister_json(data: dict) -> Company:
|
||||||
"""Processes the Unternehmensregister structured export to a Company by using several helper methods.
|
"""Processes the Unternehmensregister structured export to a Company by using several helper methods.
|
||||||
|
|
||||||
@ -386,33 +437,7 @@ def map_unternehmensregister_json(data: dict) -> Company:
|
|||||||
result: dict = {"relationships": []}
|
result: dict = {"relationships": []}
|
||||||
|
|
||||||
# TODO Refactor mapping - this is a nightmare...
|
# TODO Refactor mapping - this is a nightmare...
|
||||||
result["id"] = {
|
result["id"] = map_company_id(data)
|
||||||
"hr_number": data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"][
|
|
||||||
"Instanzdaten"
|
|
||||||
]["Aktenzeichen"],
|
|
||||||
"district_court": {
|
|
||||||
"name": data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"][
|
|
||||||
"Beteiligung"
|
|
||||||
][1]["Beteiligter"]["Organisation"]["Bezeichnung"]["Bezeichnung_Aktuell"]
|
|
||||||
if "Organisation"
|
|
||||||
in data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"]["Beteiligung"][1][
|
|
||||||
"Beteiligter"
|
|
||||||
]
|
|
||||||
else data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"]["Beteiligung"][
|
|
||||||
1
|
|
||||||
]["Beteiligter"]["Natuerliche_Person"]["Voller_Name"]["Nachname"],
|
|
||||||
"city": data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"][
|
|
||||||
"Beteiligung"
|
|
||||||
][1]["Beteiligter"]["Organisation"]["Sitz"]["Ort"]
|
|
||||||
if "Organisation"
|
|
||||||
in data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"]["Beteiligung"][1][
|
|
||||||
"Beteiligter"
|
|
||||||
]
|
|
||||||
else data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"]["Beteiligung"][
|
|
||||||
1
|
|
||||||
]["Beteiligter"]["Natuerliche_Person"]["Anschrift"]["Ort"],
|
|
||||||
},
|
|
||||||
}
|
|
||||||
result["name"] = name_from_beteiligung(data)
|
result["name"] = name_from_beteiligung(data)
|
||||||
|
|
||||||
result["location"] = loc_from_beteiligung(data)
|
result["location"] = loc_from_beteiligung(data)
|
||||||
@ -438,12 +463,12 @@ def map_unternehmensregister_json(data: dict) -> Company:
|
|||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
transform_xml_to_json(
|
# transform_xml_to_json(
|
||||||
"./data/Unternehmensregister/scraping/", "./data/Unternehmensregister/export/"
|
# "./data/Unternehmensregister/scraping/", "./data/Unternehmensregister/export/"
|
||||||
)
|
# )
|
||||||
|
base_path = "./Jupyter/API-tests/Unternehmensregister/data/Unternehmensregister"
|
||||||
for file in tqdm(glob.glob1("./data/Unternehmensregister/export", "*.json")):
|
for file in tqdm(glob.glob1(f"{base_path}/export", "*.json")):
|
||||||
path = os.path.join("./data/Unternehmensregister/export", file)
|
path = os.path.join(f"{base_path}/export", file)
|
||||||
with open(path, encoding="utf-8") as file_object:
|
with open(path, encoding="utf-8") as file_object:
|
||||||
try:
|
try:
|
||||||
data = json.loads(file_object.read())
|
data = json.loads(file_object.read())
|
||||||
@ -452,7 +477,7 @@ if __name__ == "__main__":
|
|||||||
name = "".join(e for e in company.name if e.isalnum())[:50]
|
name = "".join(e for e in company.name if e.isalnum())[:50]
|
||||||
|
|
||||||
with open(
|
with open(
|
||||||
f"./data/Unternehmensregister/transformed/{name}.json",
|
f"{base_path}/transformed/{name}.json",
|
||||||
"w+",
|
"w+",
|
||||||
encoding="utf-8",
|
encoding="utf-8",
|
||||||
) as export_file:
|
) as export_file:
|
||||||
|
@ -1,7 +1,12 @@
|
|||||||
"""Test Models.company."""
|
"""Test Models.company."""
|
||||||
|
|
||||||
|
|
||||||
from aki_prj23_transparenzregister.models.company import Company, CompanyID, Location
|
from aki_prj23_transparenzregister.models.company import (
|
||||||
|
Capital,
|
||||||
|
Company,
|
||||||
|
CompanyID,
|
||||||
|
Location,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_to_dict() -> None:
|
def test_to_dict() -> None:
|
||||||
@ -10,12 +15,17 @@ def test_to_dict() -> None:
|
|||||||
location = Location(
|
location = Location(
|
||||||
city="Insmouth", house_number="19", street="Harbor", zip_code="1890"
|
city="Insmouth", house_number="19", street="Harbor", zip_code="1890"
|
||||||
)
|
)
|
||||||
|
capital = Capital(currency="BTC", type="Virtual assets", value=42)
|
||||||
company = Company(
|
company = Company(
|
||||||
id=company_id,
|
id=company_id,
|
||||||
last_update="Tomorrow",
|
last_update="Tomorrow",
|
||||||
location=location,
|
location=location,
|
||||||
name="BLANK GmbH",
|
name="BLANK GmbH",
|
||||||
relationships=[],
|
relationships=[],
|
||||||
|
business_purpose="Blockchain and NFTs",
|
||||||
|
capital=capital,
|
||||||
|
company_type="Something",
|
||||||
|
founding_date="Yesterday",
|
||||||
)
|
)
|
||||||
|
|
||||||
assert company.to_dict() == {
|
assert company.to_dict() == {
|
||||||
@ -32,4 +42,12 @@ def test_to_dict() -> None:
|
|||||||
},
|
},
|
||||||
"name": "BLANK GmbH",
|
"name": "BLANK GmbH",
|
||||||
"relationships": [],
|
"relationships": [],
|
||||||
|
"business_purpose": "Blockchain and NFTs",
|
||||||
|
"capital": {
|
||||||
|
"value": capital.value,
|
||||||
|
"currency": capital.currency,
|
||||||
|
"type": capital.type,
|
||||||
|
},
|
||||||
|
"company_type": "Something",
|
||||||
|
"founding_date": "Yesterday",
|
||||||
}
|
}
|
||||||
|
@ -1 +0,0 @@
|
|||||||
"""Tests for data_extraction."""
|
|
@ -0,0 +1,81 @@
|
|||||||
|
import glob
|
||||||
|
import os
|
||||||
|
from tempfile import TemporaryDirectory
|
||||||
|
|
||||||
|
from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister import (
|
||||||
|
extract,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def prepare_temporary_dir(directory: str, formats: list[str]) -> None:
|
||||||
|
for index in range(len(formats)):
|
||||||
|
test_file = os.path.join(directory, f"file-{index}.{formats[index]}")
|
||||||
|
with open(test_file, "w") as file:
|
||||||
|
file.write(f"Hello There {index}")
|
||||||
|
|
||||||
|
|
||||||
|
def test_rename_latest_file() -> None:
|
||||||
|
with TemporaryDirectory() as temp_dir:
|
||||||
|
# Create some test files in the temporary directory
|
||||||
|
test_file1 = os.path.join(temp_dir, "file1.xml")
|
||||||
|
test_file2 = os.path.join(temp_dir, "file2.xml")
|
||||||
|
test_file3 = os.path.join(temp_dir, "file3.xml")
|
||||||
|
|
||||||
|
# Create files with different modification times
|
||||||
|
with open(test_file1, "w") as f:
|
||||||
|
f.write("Content 1")
|
||||||
|
with open(test_file2, "w") as f:
|
||||||
|
f.write("Content 2")
|
||||||
|
with open(test_file3, "w") as f:
|
||||||
|
f.write("Content 3")
|
||||||
|
|
||||||
|
# Rename the latest file to 'new_file.xml'
|
||||||
|
extract.rename_latest_file(temp_dir, "new_file.xml")
|
||||||
|
|
||||||
|
glob.glob1(temp_dir, "*.xml")
|
||||||
|
# Verify that 'file3.xml' is renamed to 'new_file.xml'
|
||||||
|
assert not os.path.exists(test_file3)
|
||||||
|
assert os.path.exists(os.path.join(temp_dir, "new_file.xml"))
|
||||||
|
|
||||||
|
# Verify that 'file1.xml' and 'file2.xml' are still present
|
||||||
|
assert os.path.exists(test_file1)
|
||||||
|
assert os.path.exists(test_file2)
|
||||||
|
|
||||||
|
# Verify that renaming with a different pattern works
|
||||||
|
with open(test_file1, "w") as f:
|
||||||
|
f.write("Content 4")
|
||||||
|
with open(os.path.join(temp_dir, "file4.txt"), "w") as f:
|
||||||
|
f.write("Content 5")
|
||||||
|
|
||||||
|
# Rename the latest .txt file to 'new_file.txt'
|
||||||
|
extract.rename_latest_file(temp_dir, "new_file.txt", pattern="*.txt")
|
||||||
|
|
||||||
|
# Verify that 'file4.txt' is renamed to 'new_file.txt'
|
||||||
|
assert not os.path.exists(os.path.join(temp_dir, "file4.txt"))
|
||||||
|
assert os.path.exists(os.path.join(temp_dir, "new_file.txt"))
|
||||||
|
|
||||||
|
# Verify that 'file1.xml' is still present and unchanged
|
||||||
|
with open(test_file1) as f:
|
||||||
|
assert f.read() == "Content 4"
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_num_files_default_pattern() -> None:
|
||||||
|
with TemporaryDirectory() as temp_dir:
|
||||||
|
prepare_temporary_dir(temp_dir, ["xml", "xml", "xml"])
|
||||||
|
|
||||||
|
expected_result = 3
|
||||||
|
assert extract.get_num_files(temp_dir) == expected_result
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_num_files_different_pattern() -> None:
|
||||||
|
with TemporaryDirectory() as temp_dir:
|
||||||
|
prepare_temporary_dir(temp_dir, ["xml", "txt", "json"])
|
||||||
|
|
||||||
|
num_files = extract.get_num_files(temp_dir, "*.txt")
|
||||||
|
assert num_files == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_wait_for_download_condition() -> None:
|
||||||
|
with TemporaryDirectory() as temp_dir:
|
||||||
|
prepare_temporary_dir(temp_dir, ["xml", "txt"])
|
||||||
|
assert extract.wait_for_download_condition(temp_dir, 2) is False
|
@ -0,0 +1,208 @@
|
|||||||
|
import json
|
||||||
|
import os
|
||||||
|
from tempfile import TemporaryDirectory
|
||||||
|
|
||||||
|
from aki_prj23_transparenzregister.models.company import (
|
||||||
|
CompanyRelationshipEnum,
|
||||||
|
CompanyToCompanyRelationship,
|
||||||
|
Location,
|
||||||
|
PersonName,
|
||||||
|
PersonToCompanyRelationship,
|
||||||
|
RelationshipRoleEnum,
|
||||||
|
)
|
||||||
|
from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister import (
|
||||||
|
transform,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_transform_xml_to_json() -> None:
|
||||||
|
with TemporaryDirectory() as temp_source_dir:
|
||||||
|
with open(os.path.join(temp_source_dir, "test.xml"), "w") as file:
|
||||||
|
xml_input = """<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<test>
|
||||||
|
<message>Hello World!</message>
|
||||||
|
</test>
|
||||||
|
"""
|
||||||
|
file.write(xml_input)
|
||||||
|
with TemporaryDirectory() as temp_target_dir:
|
||||||
|
transform.transform_xml_to_json(temp_source_dir, temp_target_dir)
|
||||||
|
with open(os.path.join(temp_target_dir, "test.json")) as file:
|
||||||
|
json_output = json.load(file)
|
||||||
|
assert json_output == {"test": {"message": "Hello World!"}}
|
||||||
|
|
||||||
|
|
||||||
|
def test_parse_stakeholder_org_hidden_in_person() -> None:
|
||||||
|
data = {
|
||||||
|
"Beteiligter": {
|
||||||
|
"Natuerliche_Person": {
|
||||||
|
"Voller_Name": {"Vorname": None, "Nachname": "Some Company KG"},
|
||||||
|
"Anschrift": {"Ort": "Area 51"},
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"Rolle": {"Rollenbezeichnung": {"content": "Kommanditist(in)"}},
|
||||||
|
}
|
||||||
|
expected_result = CompanyToCompanyRelationship(
|
||||||
|
role=RelationshipRoleEnum.KOMMANDITIST, # type: ignore
|
||||||
|
description="Some Company KG",
|
||||||
|
type=CompanyRelationshipEnum.COMPANY,
|
||||||
|
location=Location(**{"city": "Area 51"}),
|
||||||
|
)
|
||||||
|
assert transform.parse_stakeholder(data) == expected_result
|
||||||
|
|
||||||
|
|
||||||
|
def test_parse_stakeholder_person() -> None:
|
||||||
|
data = {
|
||||||
|
"Beteiligter": {
|
||||||
|
"Natuerliche_Person": {
|
||||||
|
"Voller_Name": {"Vorname": "Stephen", "Nachname": "King"},
|
||||||
|
"Anschrift": {"Ort": "Maine"},
|
||||||
|
"Geburt": {"Geburtsdatum": "1947-09-21"},
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"Rolle": {"Rollenbezeichnung": {"content": "Geschäftsleiter(in)"}},
|
||||||
|
}
|
||||||
|
expected_result = PersonToCompanyRelationship(
|
||||||
|
role=RelationshipRoleEnum.GESCHAEFTSLEITER, # type: ignore
|
||||||
|
date_of_birth="1947-09-21",
|
||||||
|
name=PersonName(**{"firstname": "Stephen", "lastname": "King"}),
|
||||||
|
type=CompanyRelationshipEnum.PERSON,
|
||||||
|
location=Location(**{"city": "Maine"}),
|
||||||
|
)
|
||||||
|
assert transform.parse_stakeholder(data) == expected_result
|
||||||
|
|
||||||
|
|
||||||
|
def test_parse_stakeholder_org() -> None:
|
||||||
|
data = {
|
||||||
|
"Beteiligter": {
|
||||||
|
"Organisation": {
|
||||||
|
"Bezeichnung": {"Bezeichnung_Aktuell": "Transparenzregister kG"},
|
||||||
|
"Anschrift": {
|
||||||
|
"Ort": "Iserlohn",
|
||||||
|
"Strasse": "Hauptstrasse",
|
||||||
|
"Hausnummer": "42",
|
||||||
|
"Postleitzahl": "58636",
|
||||||
|
},
|
||||||
|
"Geburt": {"Geburtsdatum": "1947-09-21"},
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"Rolle": {"Rollenbezeichnung": {"content": "Geschäftsführender Direktor"}},
|
||||||
|
}
|
||||||
|
expected_result = CompanyToCompanyRelationship(
|
||||||
|
description="Transparenzregister kG",
|
||||||
|
role=RelationshipRoleEnum.DIREKTOR, # type: ignore
|
||||||
|
type=CompanyRelationshipEnum.COMPANY,
|
||||||
|
location=Location(
|
||||||
|
**{
|
||||||
|
"city": "Iserlohn",
|
||||||
|
"zip_code": "58636",
|
||||||
|
"house_number": "42",
|
||||||
|
"street": "Hauptstrasse",
|
||||||
|
}
|
||||||
|
),
|
||||||
|
)
|
||||||
|
assert transform.parse_stakeholder(data) == expected_result
|
||||||
|
|
||||||
|
|
||||||
|
def test_parse_stakeholder_no_result() -> None:
|
||||||
|
data: dict = {"Beteiligter": {}}
|
||||||
|
assert transform.parse_stakeholder(data) is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_loc_from_beteiligung() -> None:
|
||||||
|
data = {
|
||||||
|
"XJustiz_Daten": {
|
||||||
|
"Grunddaten": {
|
||||||
|
"Verfahrensdaten": {
|
||||||
|
"Beteiligung": [
|
||||||
|
{
|
||||||
|
"Beteiligter": {
|
||||||
|
"Beteiligtennummer": "1",
|
||||||
|
"Organisation": {
|
||||||
|
"Bezeichnung": {
|
||||||
|
"Bezeichnung_Aktuell": "1 A Autenrieth Kunststofftechnik GmbH & Co. KG"
|
||||||
|
},
|
||||||
|
"Sitz": {
|
||||||
|
"Ort": "Heroldstatt",
|
||||||
|
"Staat": {
|
||||||
|
"@xsi:type": "WL_Staaten",
|
||||||
|
"@wl_version": "1.5",
|
||||||
|
"@wl_fassung": "2",
|
||||||
|
"content": "DE",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"Anschrift": {
|
||||||
|
"Strasse": "Gewerbestraße",
|
||||||
|
"Hausnummer": "8",
|
||||||
|
"Postleitzahl": "72535",
|
||||||
|
"Ort": "Heroldstatt",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
expected_result = Location(
|
||||||
|
city="Heroldstatt", house_number="8", street="Gewerbestraße", zip_code="72535"
|
||||||
|
)
|
||||||
|
assert transform.loc_from_beteiligung(data) == expected_result
|
||||||
|
|
||||||
|
|
||||||
|
def test_name_from_beteiligung() -> None:
|
||||||
|
data = {
|
||||||
|
"XJustiz_Daten": {
|
||||||
|
"Grunddaten": {
|
||||||
|
"Verfahrensdaten": {
|
||||||
|
"Beteiligung": [
|
||||||
|
{
|
||||||
|
"Beteiligter": {
|
||||||
|
"Beteiligtennummer": "1",
|
||||||
|
"Organisation": {
|
||||||
|
"Bezeichnung": {
|
||||||
|
"Bezeichnung_Aktuell": "1 A Autenrieth Kunststofftechnik GmbH & Co. KG"
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
expected_result = "1 A Autenrieth Kunststofftechnik GmbH & Co. KG"
|
||||||
|
assert transform.name_from_beteiligung(data) == expected_result
|
||||||
|
|
||||||
|
|
||||||
|
def test_map_rechtsform() -> None:
|
||||||
|
data = {
|
||||||
|
"XJustiz_Daten": {
|
||||||
|
"Fachdaten_Register": {
|
||||||
|
"Basisdaten_Register": {
|
||||||
|
"Aktuelles_Satzungsdatum": "1952-07-15",
|
||||||
|
"Rechtstraeger": {
|
||||||
|
"Rechtsform": {
|
||||||
|
"content": "Gesellschaft mit beschränkter Haftung"
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
expected_result = "Gesellschaft mit beschränkter Haftung"
|
||||||
|
assert transform.map_rechtsform("", data) == expected_result
|
||||||
|
|
||||||
|
|
||||||
|
def test_map_rechtsform_from_name() -> None:
|
||||||
|
data = [
|
||||||
|
("GEA Farm Technologies GmbH", "Gesellschaft mit beschränkter Haftung"),
|
||||||
|
("Atos SE", "Europäische Aktiengesellschaft (SE)"),
|
||||||
|
("Bilkenroth KG", "Kommanditgesellschaft"),
|
||||||
|
("jfoiahfo8sah 98548902 öhz ö", None),
|
||||||
|
]
|
||||||
|
|
||||||
|
for company_name, expected_result in data:
|
||||||
|
assert transform.map_rechtsform(company_name, {}) == expected_result
|
Loading…
x
Reference in New Issue
Block a user