Fix/company names with quotes (#187)

This commit is contained in:
Tristan Nolde
2023-10-04 20:07:51 +02:00
committed by GitHub
4 changed files with 80 additions and 11 deletions

View File

@ -25,7 +25,10 @@ from aki_prj23_transparenzregister.models.company import (
PersonToCompanyRelationship, PersonToCompanyRelationship,
RelationshipRoleEnum, RelationshipRoleEnum,
) )
from aki_prj23_transparenzregister.utils.string_tools import transform_date_to_iso from aki_prj23_transparenzregister.utils.string_tools import (
remove_traling_and_leading_quotes,
transform_date_to_iso,
)
def transform_xml_to_json(source_dir: str, target_dir: str) -> None: def transform_xml_to_json(source_dir: str, target_dir: str) -> None:
@ -62,10 +65,12 @@ def parse_stakeholder(data: dict) -> CompanyRelationship | None:
# It's a Compnay serving as a "Kommanditist" or similar # It's a Compnay serving as a "Kommanditist" or similar
if data["Beteiligter"]["Natuerliche_Person"]["Voller_Name"]["Vorname"] is None: if data["Beteiligter"]["Natuerliche_Person"]["Voller_Name"]["Vorname"] is None:
return CompanyToCompanyRelationship( return CompanyToCompanyRelationship(
**{ **{ # type: ignore
"name": data["Beteiligter"]["Natuerliche_Person"]["Voller_Name"][ "name": remove_traling_and_leading_quotes(
"Nachname" data["Beteiligter"]["Natuerliche_Person"]["Voller_Name"][
], "Nachname"
]
),
"location": Location( "location": Location(
**{ **{
"city": data["Beteiligter"]["Natuerliche_Person"][ "city": data["Beteiligter"]["Natuerliche_Person"][
@ -124,13 +129,15 @@ def parse_stakeholder(data: dict) -> CompanyRelationship | None:
) )
if "Organisation" in data["Beteiligter"]: if "Organisation" in data["Beteiligter"]:
return CompanyToCompanyRelationship( return CompanyToCompanyRelationship(
**{ **{ # type: ignore
"role": RelationshipRoleEnum( "role": RelationshipRoleEnum(
data["Rolle"]["Rollenbezeichnung"]["content"] data["Rolle"]["Rollenbezeichnung"]["content"]
), ),
"name": data["Beteiligter"]["Organisation"]["Bezeichnung"][ "name": remove_traling_and_leading_quotes(
"Bezeichnung_Aktuell" data["Beteiligter"]["Organisation"]["Bezeichnung"][
], "Bezeichnung_Aktuell"
]
),
"location": Location( "location": Location(
**{ **{
"city": data["Beteiligter"]["Organisation"]["Anschrift"]["Ort"], "city": data["Beteiligter"]["Organisation"]["Anschrift"]["Ort"],
@ -223,9 +230,10 @@ def name_from_beteiligung(data: dict) -> str:
Returns: Returns:
str: Company name str: Company name
""" """
return data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"]["Beteiligung"][0][ name = data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"]["Beteiligung"][0][
"Beteiligter" "Beteiligter"
]["Organisation"]["Bezeichnung"]["Bezeichnung_Aktuell"] ]["Organisation"]["Bezeichnung"]["Bezeichnung_Aktuell"]
return remove_traling_and_leading_quotes(name)
def map_rechtsform(company_name: str, data: dict) -> CompanyTypeEnum | None: def map_rechtsform(company_name: str, data: dict) -> CompanyTypeEnum | None:

View File

@ -34,3 +34,24 @@ def transform_date_to_iso(date: str) -> str:
input_format = "%d.%m.%y" if re.match(regex_yy, date) else "%d.%m.%Y" input_format = "%d.%m.%y" if re.match(regex_yy, date) else "%d.%m.%Y"
date_temp = datetime.strptime(date, input_format) date_temp = datetime.strptime(date, input_format)
return date_temp.strftime("%Y-%m-%d") return date_temp.strftime("%Y-%m-%d")
def remove_traling_and_leading_quotes(value: str) -> str:
"""Removes trailing and leading doulbe-quotes from given string if present.
Args:
value (str): _description_
Returns:
str: _description_
"""
if value is not None:
count_quotes = value.count('"')
if count_quotes > 0:
if value.startswith('"') and count_quotes % 2 != 0:
value = value[1:]
if value.endswith('"') and count_quotes % 2 != 0:
value = value[:-1]
if value.startswith('"') and value.endswith('"'):
value = value[1:-1]
return value

View File

@ -46,7 +46,7 @@ def test_parse_stakeholder_org_hidden_in_person() -> None:
data = { data = {
"Beteiligter": { "Beteiligter": {
"Natuerliche_Person": { "Natuerliche_Person": {
"Voller_Name": {"Vorname": None, "Nachname": "Some Company KG"}, "Voller_Name": {"Vorname": None, "Nachname": '"Some Company KG'},
"Anschrift": {"Ort": "Area 51"}, "Anschrift": {"Ort": "Area 51"},
} }
}, },
@ -304,6 +304,32 @@ def test_name_from_beteiligung() -> None:
assert transform.name_from_beteiligung(data) == expected_result assert transform.name_from_beteiligung(data) == expected_result
def test_name_from_beteiligung_remove_quotes() -> None:
data = {
"XJustiz_Daten": {
"Grunddaten": {
"Verfahrensdaten": {
"Beteiligung": [
{
"Beteiligter": {
"Beteiligtennummer": "1",
"Organisation": {
"Bezeichnung": {
"Bezeichnung_Aktuell": '"Siemes Verwaltungs-GmbH"'
},
},
}
},
]
}
}
}
}
expected_result = "Siemes Verwaltungs-GmbH"
assert transform.name_from_beteiligung(data) == expected_result
def test_map_rechtsform() -> None: def test_map_rechtsform() -> None:
data = { data = {
"XJustiz_Daten": { "XJustiz_Daten": {

View File

@ -45,3 +45,17 @@ def test_simplify_string_type_error(value: Any) -> None:
def test_transform_date_to_iso(value: str, expected: str) -> None: def test_transform_date_to_iso(value: str, expected: str) -> None:
result = string_tools.transform_date_to_iso(value) result = string_tools.transform_date_to_iso(value)
assert result == expected assert result == expected
@pytest.mark.parametrize(
("value", "expected_result"),
[
(None, None),
('"Siemes Verwaltungs-GmbH"', "Siemes Verwaltungs-GmbH"),
('"Hans"-Wurst GmbH', '"Hans"-Wurst GmbH'),
('Hans-Wurst GmbH"', "Hans-Wurst GmbH"),
],
)
def test_remove_trailing_and_leading_quotes(value: str, expected_result: str) -> None:
result = string_tools.remove_traling_and_leading_quotes(value)
assert result == expected_result