Rework the transfer of company data to fit the new data in the mongodb (#188)

This adds the additional company data as proposed to the sql db.

- [x] @TrisNol Is everything included or did I miss a feature. Relations
are in another issue.
- [x] @KM-R New DB features for the Dashbord for your review.
This commit is contained in:
Philipp Horstenkamp 2023-10-05 19:47:46 +02:00 committed by GitHub
parent 2152704dfc
commit c6f2c7467c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 98 additions and 27 deletions

View File

@ -62,7 +62,7 @@ def parse_stakeholder(data: dict) -> CompanyRelationship | None:
CompanyRelationship | None: Relationship if it could be processed CompanyRelationship | None: Relationship if it could be processed
""" """
if "Natuerliche_Person" in data["Beteiligter"]: if "Natuerliche_Person" in data["Beteiligter"]:
# It's a Compnay serving as a "Kommanditist" or similar # It's a Company serving as a "Kommanditist" or similar
if data["Beteiligter"]["Natuerliche_Person"]["Voller_Name"]["Vorname"] is None: if data["Beteiligter"]["Natuerliche_Person"]["Voller_Name"]["Vorname"] is None:
return CompanyToCompanyRelationship( return CompanyToCompanyRelationship(
**{ # type: ignore **{ # type: ignore
@ -489,8 +489,9 @@ if __name__ == "__main__":
path = os.path.join(f"{base_path}/export", file) path = os.path.join(f"{base_path}/export", file)
with open(path, encoding="utf-8") as file_object: with open(path, encoding="utf-8") as file_object:
try: try:
data = json.loads(file_object.read()) company: Company = map_unternehmensregister_json(
company: Company = map_unternehmensregister_json(data) json.loads(file_object.read())
)
name = "".join(e for e in company.name if e.isalnum())[:50] name = "".join(e for e in company.name if e.isalnum())[:50]

View File

@ -215,7 +215,7 @@ CURRENCY_CONVERSION_DICT: Final[frozendict[CurrencyEnum, float]] = frozendict(
def norm_capital( def norm_capital(
capital: dict[Literal["value", "currency", "type"], Any] capital: dict[Literal["value", "currency", "type"], Any]
) -> dict[ ) -> dict[
Literal["capital_value", "capital_currency", "capital_type"], Literal["capital_value", "original_currency", "capital_type"],
CurrencyEnum | float | CapitalTypeEnum, CurrencyEnum | float | CapitalTypeEnum,
]: ]:
"""Changes the value of the currency from DM to EUR to be better comparable. """Changes the value of the currency from DM to EUR to be better comparable.
@ -226,6 +226,8 @@ def norm_capital(
Returns: Returns:
A normed version of the currency. A normed version of the currency.
""" """
if not capital:
return {}
if len(capital) != 3: # noqa: PLR2004 if len(capital) != 3: # noqa: PLR2004
logger.warning("The capital isn't formatted as expected. Look into that.") logger.warning("The capital isn't formatted as expected. Look into that.")
return {} return {}
@ -236,7 +238,7 @@ def norm_capital(
/ CURRENCY_CONVERSION_DICT[CurrencyEnum(capital["currency"])], / CURRENCY_CONVERSION_DICT[CurrencyEnum(capital["currency"])],
2, 2,
), ),
"capital_currency": CurrencyEnum(capital["currency"]), "original_currency": CurrencyEnum(capital["currency"]),
"capital_type": CapitalTypeEnum(capital["type"]), "capital_type": CapitalTypeEnum(capital["type"]),
} }
@ -279,16 +281,19 @@ def add_company(company: dict[str, Any], db: Session) -> None:
last_update: date | None = ( last_update: date | None = (
date.fromisoformat(company["last_update"]) if company["last_update"] else None date.fromisoformat(company["last_update"]) if company["last_update"] else None
) )
founding_date_raw = company.get("founding_date")
company_entry = entities.Company( company_entry = entities.Company(
court_id=court_id, court_id=court_id,
hr=company["id"]["hr_number"].strip().replace(" ", " ").replace(" ", " "), hr=company["id"]["hr_number"].strip().replace(" ", " ").replace(" ", " "),
name=name, name=name,
city=simplify_string(location.get("city")),
zip_code=simplify_string(location.get("zip_code")),
street=simplify_string(location.get("street")),
last_update=last_update, last_update=last_update,
# **norm_capital(company.get("capital", {})), **location,
**get_geocodes(location.get("zip_code")), # type: ignore business_purpose=company.get("business_purpose"),
company_type=company.get("company_type"),
founding_date=date.fromisoformat(founding_date_raw)
if founding_date_raw
else None,
**(norm_capital(company.get("capital", {})) | get_geocodes(location.get("zip_code"))), # type: ignore
) )
db.add(company_entry) db.add(company_entry)
db.commit() db.commit()

View File

@ -3,6 +3,11 @@ from datetime import datetime
import sqlalchemy as sa import sqlalchemy as sa
from aki_prj23_transparenzregister.models.company import (
CapitalTypeEnum,
CompanyTypeEnum,
CurrencyEnum,
)
from aki_prj23_transparenzregister.utils.enum_types import ( from aki_prj23_transparenzregister.utils.enum_types import (
RelationTypeEnum, RelationTypeEnum,
SentimentTypeEnum, SentimentTypeEnum,
@ -40,14 +45,25 @@ class Company(Base):
sa.ForeignKey("district_court.id"), sa.ForeignKey("district_court.id"),
nullable=False, nullable=False,
) )
name = sa.Column(sa.String(150), nullable=False) name = sa.Column(sa.String(150), nullable=False)
company_type = sa.Column(sa.Enum(CompanyTypeEnum), nullable=True)
founding_date = sa.Column(sa.Date, nullable=True)
business_purpose = sa.Column(sa.String(), nullable=True)
sector = sa.Column(sa.String(100), nullable=True)
street = sa.Column(sa.String(100), nullable=True) street = sa.Column(sa.String(100), nullable=True)
house_number = sa.Column(sa.String(30), nullable=True)
zip_code = sa.Column(sa.String(5), nullable=True) zip_code = sa.Column(sa.String(5), nullable=True)
city = sa.Column(sa.String(100), nullable=True) city = sa.Column(sa.String(100), nullable=True)
longitude = sa.Column(sa.Float, nullable=True) longitude = sa.Column(sa.Float, nullable=True)
latitude = sa.Column(sa.Float, nullable=True) latitude = sa.Column(sa.Float, nullable=True)
pos_accuracy = sa.Column(sa.Float, nullable=True) pos_accuracy = sa.Column(sa.Float, nullable=True)
capital_value = sa.Column(sa.Float(), nullable=True)
original_currency = sa.Column(sa.Enum(CurrencyEnum), nullable=True)
capital_type = sa.Column(sa.Enum(CapitalTypeEnum), nullable=True)
last_update = sa.Column(sa.Date, nullable=False) last_update = sa.Column(sa.Date, nullable=False)
sector = sa.Column(sa.String(100), nullable=True) sector = sa.Column(sa.String(100), nullable=True)

View File

@ -8,6 +8,7 @@ import pytest
from sqlalchemy.orm import Session, sessionmaker from sqlalchemy.orm import Session, sessionmaker
from aki_prj23_transparenzregister.config.config_template import SQLiteConnectionString from aki_prj23_transparenzregister.config.config_template import SQLiteConnectionString
from aki_prj23_transparenzregister.models.company import CapitalTypeEnum
from aki_prj23_transparenzregister.utils import data_transfer from aki_prj23_transparenzregister.utils import data_transfer
from aki_prj23_transparenzregister.utils.sql import entities from aki_prj23_transparenzregister.utils.sql import entities
from aki_prj23_transparenzregister.utils.sql.connector import ( from aki_prj23_transparenzregister.utils.sql.connector import (
@ -143,11 +144,17 @@ def full_db(empty_db: Session, finance_statements: list[dict[str, Any]]) -> Sess
name="Some Company GmbH", name="Some Company GmbH",
street="Sesamstr.", street="Sesamstr.",
zip_code="58644", zip_code="58644",
house_number="4",
city="TV City", city="TV City",
last_update=datetime.date.fromisoformat("2023-01-01"), last_update=datetime.date.fromisoformat("2023-01-01"),
latitude=51.3246, latitude=51.3246,
longitude=7.6968, longitude=7.6968,
pos_accuracy=4.0, pos_accuracy=4.0,
founding_date=datetime.date(2010, 8, 7),
capital_value=1000000,
original_currency="DM",
capital_type=CapitalTypeEnum.HAFTEINLAGE,
business_purpose='Say "Hello World"',
), ),
entities.Company( entities.Company(
hr="HRB 123", hr="HRB 123",
@ -155,17 +162,23 @@ def full_db(empty_db: Session, finance_statements: list[dict[str, Any]]) -> Sess
name="Other Company GmbH", name="Other Company GmbH",
street="Sesamstr.", street="Sesamstr.",
zip_code="58636", zip_code="58636",
house_number="8",
city="TV City", city="TV City",
last_update=datetime.date.fromisoformat("2023-01-01"), last_update=datetime.date.fromisoformat("2023-01-01"),
latitude=51.38, latitude=51.38,
longitude=7.7032, longitude=7.7032,
pos_accuracy=4.0, pos_accuracy=4.0,
business_purpose="Some purpose",
), ),
entities.Company( entities.Company(
hr="HRB 12", hr="HRB 12",
court_id=2, court_id=2,
name="Third Company GmbH", name="Third Company GmbH",
last_update=datetime.date.fromisoformat("2023-01-01"), last_update=datetime.date.fromisoformat("2023-01-01"),
sector="Electronic",
capital_value=10000,
original_currency="EUR",
capital_type=CapitalTypeEnum.GRUNDKAPITAL,
), ),
] ]
) )

View File

@ -14,7 +14,6 @@ def test_import() -> None:
def test_get_company_data(full_db: Session) -> None: def test_get_company_data(full_db: Session) -> None:
"""Checks if data from the company and district court tables can be accessed.""" """Checks if data from the company and district court tables can be accessed."""
company_df = data_elements.get_company_data(full_db) company_df = data_elements.get_company_data(full_db)
test_data = pd.DataFrame( test_data = pd.DataFrame(
{ {
"company_id": {0: 1, 1: 2, 2: 3}, "company_id": {0: 1, 1: 2, 2: 3},
@ -25,18 +24,28 @@ def test_get_company_data(full_db: Session) -> None:
1: "Other Company GmbH", 1: "Other Company GmbH",
2: "Third Company GmbH", 2: "Third Company GmbH",
}, },
"company_street": {0: "Sesamstr.", 1: "Sesamstr.", 2: None}, "company_company_type": {0: None, 1: None, 2: None},
"company_zip_code": {0: "58644", 1: "58636", 2: None}, "company_founding_date": {0: "2010-08-07"},
"company_city": {0: "TV City", 1: "TV City", 2: None}, "company_business_purpose": {
"company_longitude": {0: 7.6968, 1: 7.7032, 2: None}, 0: 'Say "Hello World"',
"company_latitude": {0: 51.3246, 1: 51.38, 2: None}, 1: "Some purpose",
"company_pos_accuracy": {0: 4.0, 1: 4.0, 2: None}, },
"company_street": {0: "Sesamstr.", 1: "Sesamstr."},
"company_house_number": {0: "4", 1: "8"},
"company_zip_code": {0: "58644", 1: "58636"},
"company_city": {0: "TV City", 1: "TV City"},
"company_longitude": {0: 7.6968, 1: 7.7032},
"company_latitude": {0: 51.3246, 1: 51.38},
"company_pos_accuracy": {0: 4.0, 1: 4.0},
"company_capital_value": {0: 1000000.0, 2: 10000.0},
"company_original_currency": {0: "DEUTSCHE_MARK", 2: "EURO"},
"company_capital_type": {0: "HAFTEINLAGE", 2: "GRUNDKAPITAL"},
"company_last_update": { "company_last_update": {
0: "2023-01-01", 0: "2023-01-01",
1: "2023-01-01", 1: "2023-01-01",
2: "2023-01-01", 2: "2023-01-01",
}, },
"company_sector": {0: None, 1: None, 2: None}, "company_sector": {2: "Electronic"},
"district_court_name": { "district_court_name": {
0: "Amtsgericht Dortmund", 0: "Amtsgericht Dortmund",
1: "Amtsgericht Bochum", 1: "Amtsgericht Bochum",

View File

@ -14,7 +14,11 @@ from pytest_mock import MockerFixture
from sqlalchemy.engine import Engine from sqlalchemy.engine import Engine
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from aki_prj23_transparenzregister.models.company import CapitalTypeEnum, CurrencyEnum from aki_prj23_transparenzregister.models.company import (
CapitalTypeEnum,
CompanyTypeEnum,
CurrencyEnum,
)
from aki_prj23_transparenzregister.utils import data_transfer from aki_prj23_transparenzregister.utils import data_transfer
from aki_prj23_transparenzregister.utils.sql import entities from aki_prj23_transparenzregister.utils.sql import entities
@ -266,7 +270,23 @@ def company_generator(seed: int) -> dict[str, Any]:
"zip_code": get_random_zip() if random.choice([True, False]) else None, "zip_code": get_random_zip() if random.choice([True, False]) else None,
"street": get_random_string(20) if random.choice([True, False]) else None, "street": get_random_string(20) if random.choice([True, False]) else None,
}, },
"capital": random.choice(
[
{},
None,
{
"value": random.randint(1000, 10000000),
"currency": random.choice(["DM", "EUR"]),
"type": random.choice(list(CapitalTypeEnum)),
},
]
),
"last_update": date(random.randint(2000, 2023), 1, 1).isoformat(), "last_update": date(random.randint(2000, 2023), 1, 1).isoformat(),
"company_type": random.choice(list(CompanyTypeEnum) + [None]), # type: ignore
"founding_date": date(
random.randint(2000, 2023), random.randint(1, 12), random.randint(1, 28)
).isoformat(),
"business_purpose": random.choice(["", "Some text", None]),
} }
@ -675,18 +695,25 @@ def test_relationships(documents: list[dict[str, Any]], full_db: Session) -> Non
1: "Other Company GmbH", 1: "Other Company GmbH",
2: "Third Company GmbH", 2: "Third Company GmbH",
}, },
"company_type": {0: None, 1: None, 2: None},
"founding_date": {0: pd.Timestamp(date.fromisoformat("2010-08-07"))},
"business_purpose": {0: 'Say "Hello World"', 1: "Some purpose"},
"street": {0: "Sesamstr.", 1: "Sesamstr.", 2: None}, "street": {0: "Sesamstr.", 1: "Sesamstr.", 2: None},
"zip_code": {0: "58644", 1: "58636", 2: None}, "house_number": {0: "4", 1: "8"},
"city": {0: "TV City", 1: "TV City", 2: None}, "zip_code": {0: "58644", 1: "58636"},
"longitude": {0: 7.6968, 1: 7.7032, 2: None}, "city": {0: "TV City", 1: "TV City"},
"latitude": {0: 51.3246, 1: 51.38, 2: None}, "longitude": {0: 7.6968, 1: 7.7032},
"pos_accuracy": {0: 4.0, 1: 4.0, 2: None}, "latitude": {0: 51.3246, 1: 51.38},
"pos_accuracy": {0: 4.0, 1: 4.0},
"capital_value": {0: 1000000.0, 2: 10000.0},
"original_currency": {0: "DEUTSCHE_MARK", 2: "EURO"},
"capital_type": {0: "HAFTEINLAGE", 2: "GRUNDKAPITAL"},
"last_update": { "last_update": {
0: pd.Timestamp("2023-01-01 00:00:00"), 0: pd.Timestamp("2023-01-01 00:00:00"),
1: pd.Timestamp("2023-01-01 00:00:00"), 1: pd.Timestamp("2023-01-01 00:00:00"),
2: pd.Timestamp("2023-01-01 00:00:00"), 2: pd.Timestamp("2023-01-01 00:00:00"),
}, },
"sector": {0: None, 1: None, 2: None}, "sector": {2: "Electronic"},
} }
), ),
) )
@ -1031,7 +1058,7 @@ def test_norm_capital_eur(currency: str, capital_type: str) -> None:
{"value": 5, "currency": currency, "type": capital_type} {"value": 5, "currency": currency, "type": capital_type}
) == { ) == {
"capital_value": 5.0, "capital_value": 5.0,
"capital_currency": CurrencyEnum("EUR"), "original_currency": CurrencyEnum("EUR"),
"capital_type": CapitalTypeEnum(capital_type), "capital_type": CapitalTypeEnum(capital_type),
} }
@ -1044,7 +1071,7 @@ def test_norm_capital_dm(currency: str, capital_type: CapitalTypeEnum) -> None:
capital={"value": 5, "currency": currency, "type": capital_type} capital={"value": 5, "currency": currency, "type": capital_type}
) == { ) == {
"capital_value": 2.56, "capital_value": 2.56,
"capital_currency": CurrencyEnum("DM"), "original_currency": CurrencyEnum("DM"),
"capital_type": CapitalTypeEnum(capital_type), "capital_type": CapitalTypeEnum(capital_type),
} }