mirror of
https://github.com/fhswf/aki_prj23_transparenzregister.git
synced 2025-04-22 08:12:54 +02:00
feat(data-extraction): Extract c/o relation from street (#222)
This commit is contained in:
commit
99b61e7c2e
@ -31,6 +31,7 @@ class RelationshipRoleEnum(str, MultiValueEnum):
|
||||
GESCHAEFTSLEITER = "Geschäftsleiter(in)", "Geschäftsleiter"
|
||||
ZWEIGNIEDERLASSUNG = "Zweigniederlassung"
|
||||
HAUPTNIEDERLASSUNG = "Hauptniederlassung"
|
||||
CARE_OF = "c/o"
|
||||
|
||||
|
||||
class CompanyTypeEnum(str, MultiValueEnum):
|
||||
|
@ -486,6 +486,48 @@ def map_last_update(data: dict) -> str:
|
||||
return data["XJustiz_Daten"]["Fachdaten_Register"]["Auszug"]["letzte_Eintragung"]
|
||||
|
||||
|
||||
def map_co_relation(data: dict) -> dict:
|
||||
"""Search for and map the c/o relation from location.street if possible.
|
||||
|
||||
Args:
|
||||
data (dict): Company dict
|
||||
|
||||
Returns:
|
||||
dict: Modified Company dict
|
||||
"""
|
||||
street = data["location"].street
|
||||
if street is None:
|
||||
return data
|
||||
parts = street.split(",")
|
||||
co_company = None
|
||||
co_company_index = None
|
||||
for index, part in enumerate(parts):
|
||||
trimmed_part = part.strip()
|
||||
result = re.findall(r"^c\/o(.*)$", trimmed_part)
|
||||
if len(result) == 1:
|
||||
co_company = result[0].strip()
|
||||
co_company_index = index
|
||||
if co_company_index is not None:
|
||||
del parts[co_company_index]
|
||||
street = "".join(parts).strip()
|
||||
data["location"].street = street
|
||||
|
||||
if co_company is not None and co_company != "":
|
||||
relation = CompanyToCompanyRelationship(
|
||||
RelationshipRoleEnum.CARE_OF, # type: ignore
|
||||
Location(
|
||||
data["location"].city,
|
||||
street,
|
||||
data["location"].house_number,
|
||||
data["location"].zip_code,
|
||||
),
|
||||
CompanyRelationshipEnum.COMPANY, # type: ignore
|
||||
co_company,
|
||||
)
|
||||
data["relationships"].append(relation)
|
||||
return data
|
||||
|
||||
|
||||
def map_unternehmensregister_json(data: dict) -> Company:
|
||||
"""Processes the Unternehmensregister structured export to a Company by using several helper methods.
|
||||
|
||||
@ -516,15 +558,13 @@ def map_unternehmensregister_json(data: dict) -> Company:
|
||||
data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"]["Beteiligung"][i]
|
||||
)
|
||||
result["relationships"].append(people)
|
||||
result = map_co_relation(result)
|
||||
return Company(**result)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from loguru import logger
|
||||
|
||||
# transform_xml_to_json(
|
||||
# "./data/Unternehmensregister/scraping/", "./data/Unternehmensregister/export/"
|
||||
# )
|
||||
base_path = "./Jupyter/API-tests/Unternehmensregister/data/Unternehmensregister"
|
||||
for file in tqdm(glob.glob1(f"{base_path}/export", "*.json")):
|
||||
path = os.path.join(f"{base_path}/export", file)
|
||||
@ -544,6 +584,7 @@ if __name__ == "__main__":
|
||||
json.dump(
|
||||
dataclasses.asdict(company), export_file, ensure_ascii=False
|
||||
)
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
logger.error(f"Error in processing {path}")
|
||||
sys.exit(1)
|
||||
|
@ -709,6 +709,86 @@ def test_map_last_update() -> None:
|
||||
assert result == date
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("value", "expected_result"),
|
||||
[
|
||||
(
|
||||
{
|
||||
"location": Location(
|
||||
"", "c/o Youco24 Business Center, Abc ffda", None, None
|
||||
),
|
||||
"relationships": [],
|
||||
},
|
||||
{
|
||||
"location": Location("", "Abc ffda", None, None),
|
||||
"relationships": [
|
||||
CompanyToCompanyRelationship(
|
||||
RelationshipRoleEnum.CARE_OF, # type: ignore
|
||||
Location("", "Abc ffda", None, None),
|
||||
CompanyRelationshipEnum.COMPANY,
|
||||
"Youco24 Business Center",
|
||||
)
|
||||
],
|
||||
},
|
||||
),
|
||||
(
|
||||
{
|
||||
"location": Location(
|
||||
"Iserlohn", "c/o Youco24 Business Center, Abc Str.", "42", "58644"
|
||||
),
|
||||
"relationships": [],
|
||||
},
|
||||
{
|
||||
"location": Location("Iserlohn", "Abc Str.", "42", "58644"),
|
||||
"relationships": [
|
||||
CompanyToCompanyRelationship(
|
||||
RelationshipRoleEnum.CARE_OF, # type: ignore
|
||||
Location("Iserlohn", "Abc Str.", "42", "58644"),
|
||||
CompanyRelationshipEnum.COMPANY,
|
||||
"Youco24 Business Center",
|
||||
)
|
||||
],
|
||||
},
|
||||
),
|
||||
(
|
||||
{
|
||||
"location": Location(
|
||||
"Iserlohn", "Abc Str., c/o Youco24 Business Center", "42", "58644"
|
||||
),
|
||||
"relationships": [],
|
||||
},
|
||||
{
|
||||
"location": Location("Iserlohn", "Abc Str.", "42", "58644"),
|
||||
"relationships": [
|
||||
CompanyToCompanyRelationship(
|
||||
RelationshipRoleEnum.CARE_OF, # type: ignore
|
||||
Location("Iserlohn", "Abc Str.", "42", "58644"),
|
||||
CompanyRelationshipEnum.COMPANY,
|
||||
"Youco24 Business Center",
|
||||
)
|
||||
],
|
||||
},
|
||||
),
|
||||
(
|
||||
{
|
||||
"location": Location("Iserlohn", "Abc Str., c/o", "42", "58644"),
|
||||
"relationships": [],
|
||||
},
|
||||
{
|
||||
"location": Location("Iserlohn", "Abc Str.", "42", "58644"),
|
||||
"relationships": [],
|
||||
},
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_map_co_relation(value: dict, expected_result: dict) -> None:
|
||||
result = transform.map_co_relation(value)
|
||||
assert result == expected_result
|
||||
|
||||
|
||||
@patch(
|
||||
"aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.map_co_relation"
|
||||
)
|
||||
@patch(
|
||||
"aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.transform.map_company_id"
|
||||
)
|
||||
@ -746,6 +826,7 @@ def test_map_unternehmensregister_json( # noqa: PLR0913
|
||||
mock_loc_from_beteiligung: Mock,
|
||||
mock_map_name_from_beteiligung: Mock,
|
||||
mock_map_company_id: Mock,
|
||||
mock_map_co_relation: Mock,
|
||||
) -> None:
|
||||
expected_result = Company(
|
||||
**{ # type: ignore
|
||||
@ -770,6 +851,7 @@ def test_map_unternehmensregister_json( # noqa: PLR0913
|
||||
mock_map_business_purpose.return_value = expected_result.business_purpose
|
||||
mock_map_founding_date.return_value = expected_result.founding_date
|
||||
mock_map_parse_stakeholder.return_value = expected_result.relationships[0]
|
||||
mock_map_co_relation.side_effect = lambda x: x
|
||||
|
||||
data: dict = {
|
||||
"XJustiz_Daten": {
|
||||
|
Loading…
x
Reference in New Issue
Block a user