Feature/additional stammdaten (#132)

Feature/additional stammdaten
This commit is contained in:
Tristan Nolde
2023-09-24 15:31:17 +02:00
committed by GitHub
20 changed files with 1800 additions and 149 deletions

View File

@ -1,26 +1,43 @@
"""Test Models.company."""
from aki_prj23_transparenzregister.models.company import Company, CompanyID, Location
from aki_prj23_transparenzregister.models.company import (
Capital,
CapitalTypeEnum,
Company,
CompanyID,
CompanyTypeEnum,
CurrencyEnum,
DistrictCourt,
Location,
)
def test_to_dict() -> None:
"""Tests if the version tag is entered."""
company_id = CompanyID("The Shire", "420")
district_court = DistrictCourt("abc", "abc")
company_id = CompanyID(district_court=district_court, hr_number="HRB 123")
location = Location(
city="Insmouth", house_number="19", street="Harbor", zip_code="1890"
)
capital = Capital(
currency=CurrencyEnum.DEUTSCHE_MARK, type=CapitalTypeEnum.GRUNDKAPITAL, value=42 # type: ignore
)
company = Company(
id=company_id,
last_update="Tomorrow",
location=location,
name="BLANK GmbH",
relationships=[],
business_purpose="Blockchain and NFTs",
capital=capital,
company_type=CompanyTypeEnum.AG, # type: ignore
founding_date="Yesterday",
)
assert company.to_dict() == {
"id": {
"district_court": company_id.district_court,
"district_court": district_court.to_dict(),
"hr_number": company_id.hr_number,
},
"last_update": company.last_update,
@ -32,4 +49,12 @@ def test_to_dict() -> None:
},
"name": "BLANK GmbH",
"relationships": [],
"business_purpose": "Blockchain and NFTs",
"capital": {
"value": capital.value,
"currency": capital.currency,
"type": capital.type,
},
"company_type": company.company_type,
"founding_date": "Yesterday",
}

View File

@ -0,0 +1,89 @@
"""Testing utisl/data_extraction/unternehmensregister/extract.py."""
import os
from tempfile import TemporaryDirectory
from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister import (
extract,
)
def prepare_temporary_dir(directory: str, formats: list[str]) -> None:
for index in range(len(formats)):
test_file = os.path.join(directory, f"file-{index}.{formats[index]}")
with open(test_file, "w") as file:
file.write(f"Hello There {index}")
def test_rename_latest_file() -> None:
import time
with TemporaryDirectory(dir="./") as temp_dir:
# Create some test files in the temporary directory
test_file1 = os.path.join(temp_dir, "file1.xml")
test_file2 = os.path.join(temp_dir, "file2.xml")
test_file3 = os.path.join(temp_dir, "file3.xml")
# Create files with different modification times
with open(test_file1, "w") as f:
f.write("Content 1")
time.sleep(0.15)
with open(test_file2, "w") as f:
f.write("Content 2")
time.sleep(0.15)
with open(test_file3, "w") as f:
f.write("Content 3")
time.sleep(0.15)
# Rename the latest file to 'new_file.xml'
extract.rename_latest_file(temp_dir, "new_file.xml")
# Verify that 'file3.xml' is renamed to 'new_file.xml'
assert not os.path.exists(test_file3)
assert os.path.exists(os.path.join(temp_dir, "new_file.xml"))
# Verify that 'file1.xml' and 'file2.xml' are still present
assert os.path.exists(test_file1)
assert os.path.exists(test_file2)
# Verify that renaming with a different pattern works
with open(test_file1, "w") as f:
f.write("Content 4")
with open(os.path.join(temp_dir, "file4.txt"), "w") as f:
f.write("Content 5")
# Rename the latest .txt file to 'new_file.txt'
extract.rename_latest_file(temp_dir, "new_file.txt", pattern="*.txt")
# Verify that 'file4.txt' is renamed to 'new_file.txt'
assert not os.path.exists(os.path.join(temp_dir, "file4.txt"))
assert os.path.exists(os.path.join(temp_dir, "new_file.txt"))
# Verify that 'file1.xml' is still present and unchanged
with open(test_file1) as f:
assert f.read() == "Content 4"
def test_get_num_files_default_pattern() -> None:
with TemporaryDirectory(dir="./") as temp_dir:
prepare_temporary_dir(temp_dir, ["xml", "xml", "xml"])
expected_result = 3
assert extract.get_num_files(temp_dir) == expected_result
def test_get_num_files_different_pattern() -> None:
with TemporaryDirectory(dir="./") as temp_dir:
prepare_temporary_dir(temp_dir, ["xml", "txt", "json"])
num_files = extract.get_num_files(temp_dir, "*.txt")
assert num_files == 1
def test_wait_for_download_condition() -> None:
with TemporaryDirectory(dir="./") as temp_dir:
prepare_temporary_dir(temp_dir, ["xml", "txt"])
assert extract.wait_for_download_condition(temp_dir, 2) is False
def test_scrape() -> None:
with TemporaryDirectory(dir="./") as temp_dir:
extract.scrape("GEA Farm Technologies GmbH", [temp_dir])

View File

@ -0,0 +1,8 @@
"""Test load utils from Unternehmensregister."""
from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister import (
load,
)
def test_smoke() -> None:
assert load

View File

@ -0,0 +1,592 @@
"""Testing utils/data_extraction/unternehmensregister/transform.py."""
import json
import os
from tempfile import TemporaryDirectory
from unittest.mock import Mock, patch
from aki_prj23_transparenzregister.models.company import (
Capital,
CapitalTypeEnum,
Company,
CompanyID,
CompanyRelationshipEnum,
CompanyToCompanyRelationship,
CompanyTypeEnum,
CurrencyEnum,
DistrictCourt,
Location,
PersonName,
PersonToCompanyRelationship,
RelationshipRoleEnum,
)
from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister import (
transform,
)
def test_transform_xml_to_json() -> None:
with TemporaryDirectory(dir="./") as temp_source_dir:
with open(os.path.join(temp_source_dir, "test.xml"), "w") as file:
xml_input = """<?xml version="1.0" encoding="UTF-8"?>
<test>
<message>Hello World!</message>
</test>
"""
file.write(xml_input)
with TemporaryDirectory(dir="./") as temp_target_dir:
transform.transform_xml_to_json(temp_source_dir, temp_target_dir)
with open(os.path.join(temp_target_dir, "test.json")) as file:
json_output = json.load(file)
assert json_output == {"test": {"message": "Hello World!"}}
def test_parse_stakeholder_org_hidden_in_person() -> None:
data = {
"Beteiligter": {
"Natuerliche_Person": {
"Voller_Name": {"Vorname": None, "Nachname": "Some Company KG"},
"Anschrift": {"Ort": "Area 51"},
}
},
"Rolle": {"Rollenbezeichnung": {"content": "Kommanditist(in)"}},
}
expected_result = CompanyToCompanyRelationship(
role=RelationshipRoleEnum.KOMMANDITIST, # type: ignore
name="Some Company KG",
type=CompanyRelationshipEnum.COMPANY,
location=Location(**{"city": "Area 51"}),
)
assert transform.parse_stakeholder(data) == expected_result
def test_parse_stakeholder_person() -> None:
data = {
"Beteiligter": {
"Natuerliche_Person": {
"Voller_Name": {"Vorname": "Stephen", "Nachname": "King"},
"Anschrift": {"Ort": "Maine"},
"Geburt": {"Geburtsdatum": "1947-09-21"},
}
},
"Rolle": {"Rollenbezeichnung": {"content": "Geschäftsleiter(in)"}},
}
expected_result = PersonToCompanyRelationship(
role=RelationshipRoleEnum.GESCHAEFTSLEITER, # type: ignore
date_of_birth="1947-09-21",
name=PersonName(**{"firstname": "Stephen", "lastname": "King"}),
type=CompanyRelationshipEnum.PERSON,
location=Location(**{"city": "Maine"}),
)
assert transform.parse_stakeholder(data) == expected_result
def test_parse_stakeholder_org() -> None:
data = {
"Beteiligter": {
"Organisation": {
"Bezeichnung": {"Bezeichnung_Aktuell": "Transparenzregister kG"},
"Anschrift": {
"Ort": "Iserlohn",
"Strasse": "Hauptstrasse",
"Hausnummer": "42",
"Postleitzahl": "58636",
},
"Geburt": {"Geburtsdatum": "1947-09-21"},
}
},
"Rolle": {"Rollenbezeichnung": {"content": "Geschäftsführender Direktor"}},
}
expected_result = CompanyToCompanyRelationship(
name="Transparenzregister kG",
role=RelationshipRoleEnum.DIREKTOR, # type: ignore
type=CompanyRelationshipEnum.COMPANY,
location=Location(
**{
"city": "Iserlohn",
"zip_code": "58636",
"house_number": "42",
"street": "Hauptstrasse",
}
),
)
assert transform.parse_stakeholder(data) == expected_result
def test_parse_stakeholder_no_result() -> None:
data: dict = {"Beteiligter": {}}
assert transform.parse_stakeholder(data) is None
def test_loc_from_beteiligung() -> None:
data = {
"XJustiz_Daten": {
"Grunddaten": {
"Verfahrensdaten": {
"Beteiligung": [
{
"Beteiligter": {
"Beteiligtennummer": "1",
"Organisation": {
"Bezeichnung": {
"Bezeichnung_Aktuell": "1 A Autenrieth Kunststofftechnik GmbH & Co. KG"
},
"Sitz": {
"Ort": "Heroldstatt",
"Staat": {
"@xsi:type": "WL_Staaten",
"@wl_version": "1.5",
"@wl_fassung": "2",
"content": "DE",
},
},
"Anschrift": {
"Strasse": "Gewerbestraße",
"Hausnummer": "8",
"Postleitzahl": "72535",
"Ort": "Heroldstatt",
},
},
}
},
]
}
}
}
}
expected_result = Location(
city="Heroldstatt", house_number="8", street="Gewerbestraße", zip_code="72535"
)
assert transform.loc_from_beteiligung(data) == expected_result
def test_name_from_beteiligung() -> None:
data = {
"XJustiz_Daten": {
"Grunddaten": {
"Verfahrensdaten": {
"Beteiligung": [
{
"Beteiligter": {
"Beteiligtennummer": "1",
"Organisation": {
"Bezeichnung": {
"Bezeichnung_Aktuell": "1 A Autenrieth Kunststofftechnik GmbH & Co. KG"
},
},
}
},
]
}
}
}
}
expected_result = "1 A Autenrieth Kunststofftechnik GmbH & Co. KG"
assert transform.name_from_beteiligung(data) == expected_result
def test_map_rechtsform() -> None:
data = {
"XJustiz_Daten": {
"Fachdaten_Register": {
"Basisdaten_Register": {
"Aktuelles_Satzungsdatum": "1952-07-15",
"Rechtstraeger": {
"Rechtsform": {
"content": "Gesellschaft mit beschränkter Haftung"
},
},
}
}
}
}
expected_result = "Gesellschaft mit beschränkter Haftung"
assert transform.map_rechtsform("", data) == expected_result
def test_map_rechtsform_from_name() -> None:
data = [
("GEA Farm Technologies GmbH", "Gesellschaft mit beschränkter Haftung"),
("Atos SE", "Europäische Aktiengesellschaft (SE)"),
("Bilkenroth KG", "Kommanditgesellschaft"),
("jfoiahfo8sah 98548902 öhz ö", None),
]
for company_name, expected_result in data:
assert transform.map_rechtsform(company_name, {}) == expected_result
def test_map_capital_kg_single() -> None:
capital = Capital(
currency=CurrencyEnum.EURO, value=69000, type=CapitalTypeEnum.HAFTEINLAGE # type: ignore
)
data = {
"XJustiz_Daten": {
"Fachdaten_Register": {
"Zusatzangaben": {
"Personengesellschaft": {
"Zusatz_KG": {
"Daten_Kommanditist": {
"Hafteinlage": {
"Zahl": str(capital.value),
"Waehrung": capital.currency,
},
}
}
}
}
}
}
}
result = transform.map_capital(data, CompanyTypeEnum.KG) # type: ignore
assert result == capital
def test_map_capital_kg_sum() -> None:
capital = Capital(
currency=CurrencyEnum.EURO, value=20000, type=CapitalTypeEnum.HAFTEINLAGE # type: ignore
)
data = {
"XJustiz_Daten": {
"Fachdaten_Register": {
"Zusatzangaben": {
"Personengesellschaft": {
"Zusatz_KG": {
"Daten_Kommanditist": [
{
"Hafteinlage": {
"Zahl": str(10000),
"Waehrung": capital.currency,
}
},
{
"Hafteinlage": {
"Zahl": str(10000),
"Waehrung": capital.currency,
},
},
]
}
}
}
}
}
}
result = transform.map_capital(data, CompanyTypeEnum.KG) # type: ignore
assert result == capital
def test_map_capital_no_fachdaten() -> None:
data: dict = {"XJustiz_Daten": {"Fachdaten_Register": {}}}
result = transform.map_capital(data, CompanyTypeEnum.KG) # type: ignore
assert result is None
def test_map_capital_gmbh() -> None:
capital = Capital(
currency=CurrencyEnum.DEUTSCHE_MARK, value=42, type=CapitalTypeEnum.STAMMKAPITAL # type: ignore
)
data = {
"XJustiz_Daten": {
"Fachdaten_Register": {
"Zusatzangaben": {
"Kapitalgesellschaft": {
"Zusatz_GmbH": {
"Stammkapital": {
"Zahl": str(capital.value),
"Waehrung": capital.currency,
},
}
}
}
}
}
}
result = transform.map_capital(data, CompanyTypeEnum.GMBH) # type: ignore
assert result == capital
def test_map_capital_ag() -> None:
capital = Capital(
currency=CurrencyEnum.DEUTSCHE_MARK, value=42, type=CapitalTypeEnum.GRUNDKAPITAL # type: ignore
)
data = {
"XJustiz_Daten": {
"Fachdaten_Register": {
"Zusatzangaben": {
"Kapitalgesellschaft": {
"Zusatz_Aktiengesellschaft": {
"Grundkapital": {
"Hoehe": {
"Zahl": str(capital.value),
"Waehrung": capital.currency,
}
},
}
}
}
}
}
}
result = transform.map_capital(data, CompanyTypeEnum.SE) # type: ignore
assert result == capital
def test_map_capital_personengesellschaft() -> None:
capital = Capital(
currency=CurrencyEnum.DEUTSCHE_MARK, value=42, type=CapitalTypeEnum.STAMMKAPITAL # type: ignore
)
data = {
"XJustiz_Daten": {
"Fachdaten_Register": {
"Zusatzangaben": {
"Personengesellschaft": {
"Zusatz_GmbH": {
"Stammkapital": {
"Zahl": str(capital.value),
"Waehrung": capital.currency,
},
}
}
}
}
}
}
result = transform.map_capital(data, CompanyTypeEnum.OHG) # type: ignore
assert result == capital
def test_map_capital_einzelkaufmann() -> None:
capital = Capital(
currency=CurrencyEnum.DEUTSCHE_MARK, value=42, type=CapitalTypeEnum.STAMMKAPITAL # type: ignore
)
data = {
"XJustiz_Daten": {
"Fachdaten_Register": {
"Zusatzangaben": {
"Personengesellschaft": {
"Zusatz_GmbH": {
"Stammkapital": {
"Zahl": str(capital.value),
"Waehrung": capital.currency,
},
}
}
}
}
}
}
result = transform.map_capital(data, CompanyTypeEnum.EINZELKAUFMANN) # type: ignore
assert result is None
def test_map_capital_partial_null_values() -> None:
capital = Capital(
currency=CurrencyEnum.DEUTSCHE_MARK, value=42, type=CapitalTypeEnum.STAMMKAPITAL # type: ignore
)
data = {
"XJustiz_Daten": {
"Fachdaten_Register": {
"Zusatzangaben": {
"Personengesellschaft": {
"Zusatz_GmbH": {
"Stammkapital": {
"Zahl": None,
"Waehrung": capital.currency,
},
}
}
}
}
}
}
result = transform.map_capital(data, CompanyTypeEnum.OHG) # type: ignore
assert result is None
def test_map_business_purpose() -> None:
business_purpose = "Handel mit Betäubungsmitteln aller Art"
data = {
"XJustiz_Daten": {
"Fachdaten_Register": {
"Basisdaten_Register": {
"Gegenstand_oder_Geschaeftszweck": business_purpose
}
}
}
}
result = transform.map_business_purpose(data)
assert result == business_purpose
def test_map_business_purpose_no_result() -> None:
data: dict = {"XJustiz_Daten": {}}
result = transform.map_business_purpose(data)
assert result is None
def test_map_founding_date_from_tag_der_ersten_eintragung() -> None:
data = {
"some entry": "Tag der ersten Eintragung: 01.05.2004",
"some other entry": "hfjdoöiashföahöf iodsazo8 5z4o fdsha8oü gfdsö",
}
expected_result = "2004-05-01"
result = transform.map_founding_date(data)
assert result == expected_result
def test_map_founding_date_from_gesellschaftsvertrag() -> None:
data = {
"some entry": "hfjdoöiashföahöf iodsazo8 5z4o fdsha8oü gfdsö",
"some other entry": "Das Wesen der Rekursion ist der Selbstaufruf Gesellschaftsvertrag vom 22.12.1996 Hallo Welt",
}
expected_result = "1996-12-22"
result = transform.map_founding_date(data)
assert result == expected_result
def test_map_founding_date_from_gruendungsdatum() -> None:
data = {
"XJustiz_Daten": {
"Fachdaten_Register": {
"Basisdaten_Register": {
"Gruendungsmetadaten": {"Gruendungsdatum": "1998-01-01"}
}
}
}
}
expected_result = "1998-01-01"
result = transform.map_founding_date(data)
assert result == expected_result
def test_map_founding_date_no_result() -> None:
data: dict = {"XJustiz_Daten": {"Fachdaten_Register": {"Basisdaten_Register": {}}}}
result = transform.map_founding_date(data)
assert result is None
def test_map_company_id() -> None:
district_court = DistrictCourt("Amtsgericht Ulm", "Ulm")
company_id = CompanyID(district_court, "HRA 4711")
data = {
"XJustiz_Daten": {
"Grunddaten": {
"@XJustizVersion": "1.20.0",
"Verfahrensdaten": {
"Instanzdaten": {
"Aktenzeichen": company_id.hr_number,
},
"Beteiligung": [
{},
{
"Beteiligter": {
"Organisation": {
"Bezeichnung": {
"Bezeichnung_Aktuell": district_court.name
},
"Sitz": {
"Ort": district_court.city,
},
}
},
},
],
},
},
}
}
result = transform.map_company_id(data)
assert result == company_id
def test_map_last_update() -> None:
date = "2024-01-01"
data = {
"XJustiz_Daten": {"Fachdaten_Register": {"Auszug": {"letzte_Eintragung": date}}}
}
result = transform.map_last_update(data)
assert result == date
@patch(
"aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.map_company_id"
)
@patch(
"aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.name_from_beteiligung"
)
@patch(
"aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.loc_from_beteiligung"
)
@patch(
"aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.map_last_update"
)
@patch(
"aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.map_rechtsform"
)
@patch(
"aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.map_capital"
)
@patch(
"aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.map_business_purpose"
)
@patch(
"aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.map_founding_date"
)
@patch(
"aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.parse_stakeholder"
)
def test_map_unternehmensregister_json( # noqa: PLR0913
mock_map_parse_stakeholder: Mock,
mock_map_founding_date: Mock,
mock_map_business_purpose: Mock,
mock_map_capital: Mock,
mock_map_rechtsform: Mock,
mock_map_last_update: Mock,
mock_loc_from_beteiligung: Mock,
mock_map_name_from_beteiligung: Mock,
mock_map_company_id: Mock,
) -> None:
expected_result = Company(
**{ # type: ignore
"id": Mock(),
"name": Mock(),
"location": Mock(),
"last_update": Mock(),
"company_type": Mock(),
"capital": Mock(),
"business_purpose": Mock(),
"founding_date": Mock(),
"relationships": [Mock()],
}
)
mock_map_company_id.return_value = expected_result.id
mock_map_name_from_beteiligung.return_value = expected_result.name
mock_loc_from_beteiligung.return_value = expected_result.location
mock_map_last_update.return_value = expected_result.last_update
mock_map_rechtsform.return_value = expected_result.company_type
mock_map_capital.return_value = expected_result.capital
mock_map_business_purpose.return_value = expected_result.business_purpose
mock_map_founding_date.return_value = expected_result.founding_date
mock_map_parse_stakeholder.return_value = expected_result.relationships[0]
data: dict = {
"XJustiz_Daten": {
"Grunddaten": {"Verfahrensdaten": {"Beteiligung": [{}, {}, {}]}}
}
}
result = transform.map_unternehmensregister_json(data)
assert result == expected_result

View File

@ -3,7 +3,12 @@ from unittest.mock import Mock
import pytest
from aki_prj23_transparenzregister.models.company import Company, CompanyID, Location
from aki_prj23_transparenzregister.models.company import (
Company,
CompanyID,
DistrictCourt,
Location,
)
from aki_prj23_transparenzregister.utils.mongo.company_mongo_service import (
CompanyMongoService,
)
@ -73,7 +78,8 @@ def test_by_id_no_result(mock_mongo_connector: Mock, mock_collection: Mock) -> N
mock_mongo_connector.database = {"companies": mock_collection}
service = CompanyMongoService(mock_mongo_connector)
mock_collection.find.return_value = []
assert service.get_by_id("Does not exist") is None
id = CompanyID(DistrictCourt("a", "b"), "c").to_dict()
assert service.get_by_id(id) is None
def test_by_id_result(mock_mongo_connector: Mock, mock_collection: Mock) -> None:
@ -81,13 +87,14 @@ def test_by_id_result(mock_mongo_connector: Mock, mock_collection: Mock) -> None
Args:
mock_mongo_connector (Mock): Mocked MongoConnector library
mock_collection (Mock): Mocked pymongo collection
mock_collection (Mock): Mocked pymongo collection.
"""
mock_mongo_connector.database = {"companies": mock_collection}
service = CompanyMongoService(mock_mongo_connector)
mock_entry = {"id": "Does exist", "vaue": 42}
mock_collection.find.return_value = [mock_entry]
assert service.get_by_id("Does exist") == mock_entry
id = CompanyID(DistrictCourt("a", "b"), "c").to_dict()
assert service.get_by_id(id) == mock_entry
def test_insert(mock_mongo_connector: Mock, mock_collection: Mock) -> None:
@ -103,7 +110,7 @@ def test_insert(mock_mongo_connector: Mock, mock_collection: Mock) -> None:
mock_collection.insert_one.return_value = mock_result
assert (
service.insert(
Company(CompanyID("", ""), Location("Hier und Dort"), "", "", [])
Company(CompanyID("", ""), Location("Hier und Dort"), "", "", []) # type: ignore
)
== mock_result
)

View File

@ -33,3 +33,15 @@ def test_simplify_string_type_error(value: Any) -> None:
"""Tests if the type error is thrown when the value is the wrong type."""
with pytest.raises(TypeError):
assert string_tools.simplify_string(value)
@pytest.mark.parametrize(
("value", "expected"),
[
("10.10.1111", "1111-10-10"),
("10.10.98", "1998-10-10"),
],
)
def test_transform_date_to_iso(value: str, expected: str) -> None:
result = string_tools.transform_date_to_iso(value)
assert result == expected