From f8c111d7e201805dce7dc1eb4ce09e988567c858 Mon Sep 17 00:00:00 2001 From: Philipp Horstenkamp Date: Sat, 14 Oct 2023 17:16:14 +0200 Subject: [PATCH] Resolve mismatch between staging and prod db data for financials (#211) SQL Creation is now done dynamicly by the definition of the enumeration type. --- .../models/company.py | 41 ++----------------- .../utils/data_extraction/bundesanzeiger.py | 2 +- .../utils/enum_types.py | 38 +++++++++++++++++ .../utils/sql/entities.py | 39 ++++++------------ tests/conftest.py | 25 ++++------- tests/ui/data_elements_test.py | 20 ++++----- .../data_extraction/bundesanzeiger_test.py | 2 +- tests/utils/data_transfer_test.py | 21 ++-------- 8 files changed, 76 insertions(+), 112 deletions(-) diff --git a/src/aki_prj23_transparenzregister/models/company.py b/src/aki_prj23_transparenzregister/models/company.py index 94129f4..64b5ce3 100644 --- a/src/aki_prj23_transparenzregister/models/company.py +++ b/src/aki_prj23_transparenzregister/models/company.py @@ -1,9 +1,11 @@ """Company model.""" from dataclasses import asdict, dataclass -from enum import Enum, StrEnum +from enum import StrEnum from aenum import MultiValueEnum +from aki_prj23_transparenzregister.utils.enum_types import FinancialKPIEnum + class RelationshipRoleEnum(str, MultiValueEnum): """Roles taken by entities in relationships to a Company.""" @@ -134,43 +136,6 @@ class CompanyToCompanyRelationship(CompanyRelationship): name: str -class FinancialKPIEnum(Enum): - """Financial KPI keys.""" - - # Umsatz || Erlöse - REVENUE = "revenue" - # Jahresüberschuss || Nettoeinkommen - NET_INCOME = "net_income" - # Ebit - EBIT = "ebit" - # Ebitda - EBITDA = "ebitda" - # Bruttogewinn - GROSS_PROFIT = "gross_profit" - # Betriebsgewinn - OPERATING_PROFIT = "operating_profit" - # Bilanzsumme - ASSETS = "assets" - # Gesamtverbindlichkeiten - LIABILITIES = "liabilities" - # Eigenkapital - EQUITY = "equity" - # Umlaufvermögen - CURRENT_ASSETS = "current_assets" - # Kurzfristige Verbindlichkeiten - CURRENT_LIABILITIES = "current_liabilities" - # Langfristige Verbindlichkeiten - LONG_TERM_DEBT = "long_term_debt" - # Kurzfristige Verbindlichkeiten - SHORT_TERM_DEBT = "short_term_debt" - # Barmittel - CASH_AND_CASH_EQUIVALENTS = "cash_and_cash_equivalents" - # Dividende - DIVIDENDS = "dividends" - # Cash Flow - CASH_FLOW = "cash_flow" - - @dataclass class YearlyResult: """Company yearly result.""" diff --git a/src/aki_prj23_transparenzregister/utils/data_extraction/bundesanzeiger.py b/src/aki_prj23_transparenzregister/utils/data_extraction/bundesanzeiger.py index bdb2e29..0a4a4e9 100644 --- a/src/aki_prj23_transparenzregister/utils/data_extraction/bundesanzeiger.py +++ b/src/aki_prj23_transparenzregister/utils/data_extraction/bundesanzeiger.py @@ -6,7 +6,7 @@ from bs4 import BeautifulSoup from deutschland.bundesanzeiger import Bundesanzeiger as Ba from aki_prj23_transparenzregister.models.auditor import Auditor -from aki_prj23_transparenzregister.models.company import FinancialKPIEnum +from aki_prj23_transparenzregister.utils.enum_types import FinancialKPIEnum pd.options.mode.chained_assignment = None # type: ignore diff --git a/src/aki_prj23_transparenzregister/utils/enum_types.py b/src/aki_prj23_transparenzregister/utils/enum_types.py index 659ecb4..9ec762c 100644 --- a/src/aki_prj23_transparenzregister/utils/enum_types.py +++ b/src/aki_prj23_transparenzregister/utils/enum_types.py @@ -1,5 +1,6 @@ """Collection of enumeration types for the whole project.""" import enum +from enum import Enum class SentimentTypeEnum(enum.Enum): @@ -9,3 +10,40 @@ class SentimentTypeEnum(enum.Enum): sustainability = "sustainability" environmental_aspects = "environmental_aspects" perception = "perception" + + +class FinancialKPIEnum(Enum): + """Financial KPI keys.""" + + # Umsatz || Erlöse + REVENUE = "revenue" + # Jahresüberschuss || Nettoeinkommen + NET_INCOME = "net_income" + # Ebit + EBIT = "ebit" + # Ebitda + EBITDA = "ebitda" + # Bruttogewinn + GROSS_PROFIT = "gross_profit" + # Betriebsgewinn + OPERATING_PROFIT = "operating_profit" + # Bilanzsumme + ASSETS = "assets" + # Gesamtverbindlichkeiten + LIABILITIES = "liabilities" + # Eigenkapital + EQUITY = "equity" + # Umlaufvermögen + CURRENT_ASSETS = "current_assets" + # Kurzfristige Verbindlichkeiten + CURRENT_LIABILITIES = "current_liabilities" + # Langfristige Verbindlichkeiten + LONG_TERM_DEBT = "long_term_debt" + # Kurzfristige Verbindlichkeiten + SHORT_TERM_DEBT = "short_term_debt" + # Barmittel + CASH_AND_CASH_EQUIVALENTS = "cash_and_cash_equivalents" + # Dividende + DIVIDENDS = "dividends" + # Cash Flow + CASH_FLOW = "cash_flow" diff --git a/src/aki_prj23_transparenzregister/utils/sql/entities.py b/src/aki_prj23_transparenzregister/utils/sql/entities.py index f7f00af..2c43c93 100644 --- a/src/aki_prj23_transparenzregister/utils/sql/entities.py +++ b/src/aki_prj23_transparenzregister/utils/sql/entities.py @@ -10,6 +10,7 @@ from aki_prj23_transparenzregister.models.company import ( RelationshipRoleEnum, ) from aki_prj23_transparenzregister.utils.enum_types import ( + FinancialKPIEnum, SentimentTypeEnum, ) from aki_prj23_transparenzregister.utils.sql.connector import Base @@ -105,33 +106,17 @@ class Person(Base): works_for = sa.Column(sa.String(100), nullable=True) -class AnnualFinanceStatement(Base): - """Finance.""" - - __tablename__ = "annual_finance_statement" - - id = sa.Column(sa.Integer, primary_key=True) - company_id = sa.Column(sa.Integer, sa.ForeignKey("company.id")) - date = sa.Column(sa.Date, nullable=False) - total_volume = sa.Column(sa.Float, default="NaN") - ebit = sa.Column(sa.Float, default="NaN") - ebitda = sa.Column(sa.Float, default="NaN") - ebit_margin = sa.Column(sa.Float, default="NaN") - total_balance = sa.Column(sa.Float, default="NaN") - equity = sa.Column(sa.Float, default="NaN") - debt = sa.Column(sa.Float, default="NaN") - return_on_equity = sa.Column(sa.Float, default="NaN") - capital_turnover_rate = sa.Column(sa.Float, default="NaN") - current_liabilities = sa.Column(sa.Float, default="NaN") - dividends = sa.Column(sa.Float, default="NaN") - net_income = sa.Column(sa.Float, default="NaN") - assets = sa.Column(sa.Float, default="NaN") - long_term_debt = sa.Column(sa.Float, default="NaN") - short_term_debt = sa.Column(sa.Float, default="NaN") - revenue = sa.Column(sa.Float, default="NaN") - cash_flow = sa.Column(sa.Float, default="NaN") - current_assets = sa.Column(sa.Float, default="NaN") # assets vs current assets - # company: Mapped[Company] = relationship(Company) +AnnualFinanceStatement = type( + "AnnualFinanceStatement", + (Base,), + { + "__tablename__": "annual_finance_statement", + "id": sa.Column(sa.Integer, primary_key=True), + "company_id": sa.Column(sa.Integer, sa.ForeignKey("company.id")), + "date": sa.Column(sa.Date, nullable=False), + } + | {_.value: sa.Column(sa.Float, default="NaN") for _ in FinancialKPIEnum}, +) class Sentiment(Base): diff --git a/tests/conftest.py b/tests/conftest.py index 70fbb09..8e98e6a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -56,47 +56,40 @@ def finance_statements() -> list[dict[str, Any]]: "id": 1, "company_id": 1, "date": datetime.date.fromisoformat("2023-01-01"), - "total_volume": 1000.0, "ebit": 1000.0, "ebitda": 1000.0, - "ebit_margin": 1000.0, - "total_balance": 1000.0, + "gross_profit": 1000.0, "equity": 1000.0, - "debt": 1000.0, - "return_on_equity": 1000.0, - "capital_turnover_rate": 1000.0, "current_liabilities": 1000.0, - "dividends": float("NaN"), - "net_income": float("NaN"), "assets": 1000.0, + "net_income": 100.0, "long_term_debt": 1000.0, "short_term_debt": 1000.0, "revenue": 1000.0, "cash_flow": 1000.0, "current_assets": 1000.0, + "liabilities": 0.0, + "cash_and_cash_equivalents": 1.0, + "dividends": 0.0, }, { "id": 2, "company_id": 1, "date": datetime.date.fromisoformat("2022-01-01"), - "total_volume": 1100.0, + "revenue": 1100.0, + "net_income": float("NaN"), "ebit": 1100.0, "ebitda": 1100.0, - "ebit_margin": 1100.0, - "total_balance": 1100.0, + "gross_profit": 1100.0, "equity": 1100.0, - "debt": 1100.0, - "return_on_equity": 1100.0, - "capital_turnover_rate": 1100.0, "current_liabilities": 1100.0, "dividends": float("NaN"), - "net_income": float("NaN"), "assets": 1100.0, "long_term_debt": 1100.0, "short_term_debt": 1100.0, - "revenue": 1100.0, "cash_flow": 1100.0, "current_assets": 1100.0, + "operating_profit": 1.0, }, ] diff --git a/tests/ui/data_elements_test.py b/tests/ui/data_elements_test.py index 0ac9062..b3ebd87 100644 --- a/tests/ui/data_elements_test.py +++ b/tests/ui/data_elements_test.py @@ -65,24 +65,22 @@ def test_get_finance_data(full_db: Session) -> None: "annual_finance_statement_id": {0: 1, 1: 2}, "annual_finance_statement_company_id": {0: 1, 1: 1}, "annual_finance_statement_date": {0: "2023-01-01", 1: "2022-01-01"}, - "annual_finance_statement_total_volume": {0: 1000.0, 1: 1100.0}, + "annual_finance_statement_revenue": {0: 1000.0, 1: 1100.0}, + "annual_finance_statement_net_income": {0: 100.0}, "annual_finance_statement_ebit": {0: 1000.0, 1: 1100.0}, "annual_finance_statement_ebitda": {0: 1000.0, 1: 1100.0}, - "annual_finance_statement_ebit_margin": {0: 1000.0, 1: 1100.0}, - "annual_finance_statement_total_balance": {0: 1000.0, 1: 1100.0}, + "annual_finance_statement_gross_profit": {0: 1000.0, 1: 1100.0}, + "annual_finance_statement_operating_profit": {1: 1.0}, + "annual_finance_statement_assets": {0: 1000.0, 1: 1100}, + "annual_finance_statement_liabilities": {0: 0.0}, "annual_finance_statement_equity": {0: 1000.0, 1: 1100.0}, - "annual_finance_statement_debt": {0: 1000.0, 1: 1100.0}, - "annual_finance_statement_return_on_equity": {0: 1000.0, 1: 1100.0}, - "annual_finance_statement_capital_turnover_rate": {0: 1000.0, 1: 1100.0}, + "annual_finance_statement_current_assets": {0: 1000.0, 1: 1100.0}, "annual_finance_statement_current_liabilities": {0: 1000.0, 1: 1100.0}, - "annual_finance_statement_dividends": {0: None, 1: None}, - "annual_finance_statement_net_income": {0: None, 1: None}, - "annual_finance_statement_assets": {0: 1000.0, 1: 1100.0}, "annual_finance_statement_long_term_debt": {0: 1000.0, 1: 1100.0}, "annual_finance_statement_short_term_debt": {0: 1000.0, 1: 1100.0}, - "annual_finance_statement_revenue": {0: 1000.0, 1: 1100.0}, + "annual_finance_statement_cash_and_cash_equivalents": {0: 1.0}, + "annual_finance_statement_dividends": {0: 0.0}, "annual_finance_statement_cash_flow": {0: 1000.0, 1: 1100.0}, - "annual_finance_statement_current_assets": {0: 1000.0, 1: 1100.0}, "company_name": {0: "Some Company GmbH", 1: "Some Company GmbH"}, "company_id": {0: 1, 1: 1}, } diff --git a/tests/utils/data_extraction/bundesanzeiger_test.py b/tests/utils/data_extraction/bundesanzeiger_test.py index 7b26892..d764a52 100644 --- a/tests/utils/data_extraction/bundesanzeiger_test.py +++ b/tests/utils/data_extraction/bundesanzeiger_test.py @@ -3,10 +3,10 @@ from unittest.mock import Mock, patch import pandas as pd -from aki_prj23_transparenzregister.models.company import FinancialKPIEnum from aki_prj23_transparenzregister.utils.data_extraction.bundesanzeiger import ( Bundesanzeiger, ) +from aki_prj23_transparenzregister.utils.enum_types import FinancialKPIEnum def test_extract_auditor_company_no_hits() -> None: diff --git a/tests/utils/data_transfer_test.py b/tests/utils/data_transfer_test.py index 8a968b8..a3d366c 100644 --- a/tests/utils/data_transfer_test.py +++ b/tests/utils/data_transfer_test.py @@ -615,7 +615,6 @@ def test_add_relationships_none(empty_relations: list, full_db: Session) -> None # noinspection SpellCheckingInspection -@pytest.mark.working_on() @pytest.mark.parametrize( "documents", [ @@ -980,7 +979,7 @@ def test_add_annual_report( year, { "financials": { - "ebit": 123, + "revenue": 123, "ebitda": 235, "short_term_debt": short_term_debt, }, @@ -999,24 +998,9 @@ def test_add_annual_report( "id": 3, "company_id": company_id, "date": pd.to_datetime(date(year, 1, 1)), - "total_volume": float("NaN"), - "ebit": 123.0, + "revenue": 123.0, "ebitda": 235.0, - "ebit_margin": float("NaN"), - "total_balance": float("NaN"), - "equity": float("NaN"), - "debt": float("NaN"), - "return_on_equity": float("NaN"), - "capital_turnover_rate": float("NaN"), - "current_liabilities": float("NaN"), - "dividends": float("NaN"), - "net_income": float("NaN"), - "assets": float("NaN"), - "long_term_debt": float("NaN"), "short_term_debt": short_term_debt, - "revenue": float("NaN"), - "cash_flow": float("NaN"), - "current_assets": float("NaN"), } ] ) @@ -1025,6 +1009,7 @@ def test_add_annual_report( pd.testing.assert_frame_equal( expected_results, df_prior, + check_like=True, )