diff --git a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py index 30233dd..2a34b0d 100644 --- a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py +++ b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py @@ -62,7 +62,7 @@ def parse_stakeholder(data: dict) -> CompanyRelationship | None: CompanyRelationship | None: Relationship if it could be processed """ if "Natuerliche_Person" in data["Beteiligter"]: - # It's a Compnay serving as a "Kommanditist" or similar + # It's a Company serving as a "Kommanditist" or similar if data["Beteiligter"]["Natuerliche_Person"]["Voller_Name"]["Vorname"] is None: return CompanyToCompanyRelationship( **{ # type: ignore @@ -489,8 +489,9 @@ if __name__ == "__main__": path = os.path.join(f"{base_path}/export", file) with open(path, encoding="utf-8") as file_object: try: - data = json.loads(file_object.read()) - company: Company = map_unternehmensregister_json(data) + company: Company = map_unternehmensregister_json( + json.loads(file_object.read()) + ) name = "".join(e for e in company.name if e.isalnum())[:50] diff --git a/src/aki_prj23_transparenzregister/utils/data_transfer.py b/src/aki_prj23_transparenzregister/utils/data_transfer.py index 04f0f08..c4eeda9 100644 --- a/src/aki_prj23_transparenzregister/utils/data_transfer.py +++ b/src/aki_prj23_transparenzregister/utils/data_transfer.py @@ -215,7 +215,7 @@ CURRENCY_CONVERSION_DICT: Final[frozendict[CurrencyEnum, float]] = frozendict( def norm_capital( capital: dict[Literal["value", "currency", "type"], Any] ) -> dict[ - Literal["capital_value", "capital_currency", "capital_type"], + Literal["capital_value", "original_currency", "capital_type"], CurrencyEnum | float | CapitalTypeEnum, ]: """Changes the value of the currency from DM to EUR to be better comparable. @@ -226,6 +226,8 @@ def norm_capital( Returns: A normed version of the currency. """ + if not capital: + return {} if len(capital) != 3: # noqa: PLR2004 logger.warning("The capital isn't formatted as expected. Look into that.") return {} @@ -236,7 +238,7 @@ def norm_capital( / CURRENCY_CONVERSION_DICT[CurrencyEnum(capital["currency"])], 2, ), - "capital_currency": CurrencyEnum(capital["currency"]), + "original_currency": CurrencyEnum(capital["currency"]), "capital_type": CapitalTypeEnum(capital["type"]), } @@ -279,16 +281,19 @@ def add_company(company: dict[str, Any], db: Session) -> None: last_update: date | None = ( date.fromisoformat(company["last_update"]) if company["last_update"] else None ) + founding_date_raw = company.get("founding_date") company_entry = entities.Company( court_id=court_id, hr=company["id"]["hr_number"].strip().replace(" ", " ").replace(" ", " "), name=name, - city=simplify_string(location.get("city")), - zip_code=simplify_string(location.get("zip_code")), - street=simplify_string(location.get("street")), last_update=last_update, - # **norm_capital(company.get("capital", {})), - **get_geocodes(location.get("zip_code")), # type: ignore + **location, + business_purpose=company.get("business_purpose"), + company_type=company.get("company_type"), + founding_date=date.fromisoformat(founding_date_raw) + if founding_date_raw + else None, + **(norm_capital(company.get("capital", {})) | get_geocodes(location.get("zip_code"))), # type: ignore ) db.add(company_entry) db.commit() diff --git a/src/aki_prj23_transparenzregister/utils/sql/entities.py b/src/aki_prj23_transparenzregister/utils/sql/entities.py index 4e86492..d9af4c0 100644 --- a/src/aki_prj23_transparenzregister/utils/sql/entities.py +++ b/src/aki_prj23_transparenzregister/utils/sql/entities.py @@ -3,6 +3,11 @@ from datetime import datetime import sqlalchemy as sa +from aki_prj23_transparenzregister.models.company import ( + CapitalTypeEnum, + CompanyTypeEnum, + CurrencyEnum, +) from aki_prj23_transparenzregister.utils.enum_types import ( RelationTypeEnum, SentimentTypeEnum, @@ -40,14 +45,25 @@ class Company(Base): sa.ForeignKey("district_court.id"), nullable=False, ) + name = sa.Column(sa.String(150), nullable=False) + company_type = sa.Column(sa.Enum(CompanyTypeEnum), nullable=True) + founding_date = sa.Column(sa.Date, nullable=True) + business_purpose = sa.Column(sa.String(), nullable=True) + sector = sa.Column(sa.String(100), nullable=True) + street = sa.Column(sa.String(100), nullable=True) + house_number = sa.Column(sa.String(30), nullable=True) zip_code = sa.Column(sa.String(5), nullable=True) city = sa.Column(sa.String(100), nullable=True) longitude = sa.Column(sa.Float, nullable=True) latitude = sa.Column(sa.Float, nullable=True) pos_accuracy = sa.Column(sa.Float, nullable=True) + capital_value = sa.Column(sa.Float(), nullable=True) + original_currency = sa.Column(sa.Enum(CurrencyEnum), nullable=True) + capital_type = sa.Column(sa.Enum(CapitalTypeEnum), nullable=True) + last_update = sa.Column(sa.Date, nullable=False) sector = sa.Column(sa.String(100), nullable=True) diff --git a/tests/conftest.py b/tests/conftest.py index b223105..dca1761 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -8,6 +8,7 @@ import pytest from sqlalchemy.orm import Session, sessionmaker from aki_prj23_transparenzregister.config.config_template import SQLiteConnectionString +from aki_prj23_transparenzregister.models.company import CapitalTypeEnum from aki_prj23_transparenzregister.utils import data_transfer from aki_prj23_transparenzregister.utils.sql import entities from aki_prj23_transparenzregister.utils.sql.connector import ( @@ -143,11 +144,17 @@ def full_db(empty_db: Session, finance_statements: list[dict[str, Any]]) -> Sess name="Some Company GmbH", street="Sesamstr.", zip_code="58644", + house_number="4", city="TV City", last_update=datetime.date.fromisoformat("2023-01-01"), latitude=51.3246, longitude=7.6968, pos_accuracy=4.0, + founding_date=datetime.date(2010, 8, 7), + capital_value=1000000, + original_currency="DM", + capital_type=CapitalTypeEnum.HAFTEINLAGE, + business_purpose='Say "Hello World"', ), entities.Company( hr="HRB 123", @@ -155,17 +162,23 @@ def full_db(empty_db: Session, finance_statements: list[dict[str, Any]]) -> Sess name="Other Company GmbH", street="Sesamstr.", zip_code="58636", + house_number="8", city="TV City", last_update=datetime.date.fromisoformat("2023-01-01"), latitude=51.38, longitude=7.7032, pos_accuracy=4.0, + business_purpose="Some purpose", ), entities.Company( hr="HRB 12", court_id=2, name="Third Company GmbH", last_update=datetime.date.fromisoformat("2023-01-01"), + sector="Electronic", + capital_value=10000, + original_currency="EUR", + capital_type=CapitalTypeEnum.GRUNDKAPITAL, ), ] ) diff --git a/tests/ui/data_elements_test.py b/tests/ui/data_elements_test.py index 5ec89a0..0ac9062 100644 --- a/tests/ui/data_elements_test.py +++ b/tests/ui/data_elements_test.py @@ -14,7 +14,6 @@ def test_import() -> None: def test_get_company_data(full_db: Session) -> None: """Checks if data from the company and district court tables can be accessed.""" company_df = data_elements.get_company_data(full_db) - test_data = pd.DataFrame( { "company_id": {0: 1, 1: 2, 2: 3}, @@ -25,18 +24,28 @@ def test_get_company_data(full_db: Session) -> None: 1: "Other Company GmbH", 2: "Third Company GmbH", }, - "company_street": {0: "Sesamstr.", 1: "Sesamstr.", 2: None}, - "company_zip_code": {0: "58644", 1: "58636", 2: None}, - "company_city": {0: "TV City", 1: "TV City", 2: None}, - "company_longitude": {0: 7.6968, 1: 7.7032, 2: None}, - "company_latitude": {0: 51.3246, 1: 51.38, 2: None}, - "company_pos_accuracy": {0: 4.0, 1: 4.0, 2: None}, + "company_company_type": {0: None, 1: None, 2: None}, + "company_founding_date": {0: "2010-08-07"}, + "company_business_purpose": { + 0: 'Say "Hello World"', + 1: "Some purpose", + }, + "company_street": {0: "Sesamstr.", 1: "Sesamstr."}, + "company_house_number": {0: "4", 1: "8"}, + "company_zip_code": {0: "58644", 1: "58636"}, + "company_city": {0: "TV City", 1: "TV City"}, + "company_longitude": {0: 7.6968, 1: 7.7032}, + "company_latitude": {0: 51.3246, 1: 51.38}, + "company_pos_accuracy": {0: 4.0, 1: 4.0}, + "company_capital_value": {0: 1000000.0, 2: 10000.0}, + "company_original_currency": {0: "DEUTSCHE_MARK", 2: "EURO"}, + "company_capital_type": {0: "HAFTEINLAGE", 2: "GRUNDKAPITAL"}, "company_last_update": { 0: "2023-01-01", 1: "2023-01-01", 2: "2023-01-01", }, - "company_sector": {0: None, 1: None, 2: None}, + "company_sector": {2: "Electronic"}, "district_court_name": { 0: "Amtsgericht Dortmund", 1: "Amtsgericht Bochum", diff --git a/tests/utils/data_transfer_test.py b/tests/utils/data_transfer_test.py index bf01df4..e919d0e 100644 --- a/tests/utils/data_transfer_test.py +++ b/tests/utils/data_transfer_test.py @@ -14,7 +14,11 @@ from pytest_mock import MockerFixture from sqlalchemy.engine import Engine from sqlalchemy.orm import Session -from aki_prj23_transparenzregister.models.company import CapitalTypeEnum, CurrencyEnum +from aki_prj23_transparenzregister.models.company import ( + CapitalTypeEnum, + CompanyTypeEnum, + CurrencyEnum, +) from aki_prj23_transparenzregister.utils import data_transfer from aki_prj23_transparenzregister.utils.sql import entities @@ -266,7 +270,23 @@ def company_generator(seed: int) -> dict[str, Any]: "zip_code": get_random_zip() if random.choice([True, False]) else None, "street": get_random_string(20) if random.choice([True, False]) else None, }, + "capital": random.choice( + [ + {}, + None, + { + "value": random.randint(1000, 10000000), + "currency": random.choice(["DM", "EUR"]), + "type": random.choice(list(CapitalTypeEnum)), + }, + ] + ), "last_update": date(random.randint(2000, 2023), 1, 1).isoformat(), + "company_type": random.choice(list(CompanyTypeEnum) + [None]), # type: ignore + "founding_date": date( + random.randint(2000, 2023), random.randint(1, 12), random.randint(1, 28) + ).isoformat(), + "business_purpose": random.choice(["", "Some text", None]), } @@ -675,18 +695,25 @@ def test_relationships(documents: list[dict[str, Any]], full_db: Session) -> Non 1: "Other Company GmbH", 2: "Third Company GmbH", }, + "company_type": {0: None, 1: None, 2: None}, + "founding_date": {0: pd.Timestamp(date.fromisoformat("2010-08-07"))}, + "business_purpose": {0: 'Say "Hello World"', 1: "Some purpose"}, "street": {0: "Sesamstr.", 1: "Sesamstr.", 2: None}, - "zip_code": {0: "58644", 1: "58636", 2: None}, - "city": {0: "TV City", 1: "TV City", 2: None}, - "longitude": {0: 7.6968, 1: 7.7032, 2: None}, - "latitude": {0: 51.3246, 1: 51.38, 2: None}, - "pos_accuracy": {0: 4.0, 1: 4.0, 2: None}, + "house_number": {0: "4", 1: "8"}, + "zip_code": {0: "58644", 1: "58636"}, + "city": {0: "TV City", 1: "TV City"}, + "longitude": {0: 7.6968, 1: 7.7032}, + "latitude": {0: 51.3246, 1: 51.38}, + "pos_accuracy": {0: 4.0, 1: 4.0}, + "capital_value": {0: 1000000.0, 2: 10000.0}, + "original_currency": {0: "DEUTSCHE_MARK", 2: "EURO"}, + "capital_type": {0: "HAFTEINLAGE", 2: "GRUNDKAPITAL"}, "last_update": { 0: pd.Timestamp("2023-01-01 00:00:00"), 1: pd.Timestamp("2023-01-01 00:00:00"), 2: pd.Timestamp("2023-01-01 00:00:00"), }, - "sector": {0: None, 1: None, 2: None}, + "sector": {2: "Electronic"}, } ), ) @@ -1031,7 +1058,7 @@ def test_norm_capital_eur(currency: str, capital_type: str) -> None: {"value": 5, "currency": currency, "type": capital_type} ) == { "capital_value": 5.0, - "capital_currency": CurrencyEnum("EUR"), + "original_currency": CurrencyEnum("EUR"), "capital_type": CapitalTypeEnum(capital_type), } @@ -1044,7 +1071,7 @@ def test_norm_capital_dm(currency: str, capital_type: CapitalTypeEnum) -> None: capital={"value": 5, "currency": currency, "type": capital_type} ) == { "capital_value": 2.56, - "capital_currency": CurrencyEnum("DM"), + "original_currency": CurrencyEnum("DM"), "capital_type": CapitalTypeEnum(capital_type), }