diff --git a/src/aki_prj23_transparenzregister/utils/data_transfer.py b/src/aki_prj23_transparenzregister/utils/data_transfer.py index 84e747c..a4d3de8 100644 --- a/src/aki_prj23_transparenzregister/utils/data_transfer.py +++ b/src/aki_prj23_transparenzregister/utils/data_transfer.py @@ -1,6 +1,7 @@ """This module contains the data transfer and refinement functionalities between staging and production DB.""" import argparse import sys +from collections import Counter from datetime import date from functools import lru_cache from typing import Any, Final, Literal @@ -24,6 +25,7 @@ from aki_prj23_transparenzregister.models.company import ( CurrencyEnum, RelationshipRoleEnum, ) +from aki_prj23_transparenzregister.utils.enum_types import FinancialKPIEnum from aki_prj23_transparenzregister.utils.logger_config import ( add_logger_options_to_argparse, configer_logger, @@ -41,6 +43,8 @@ from aki_prj23_transparenzregister.utils.string_tools import simplify_string nomi = pgeocode.Nominatim("de") +count_unknown_financial_reports: Final[Counter] = Counter() + class DataInvalidError(ValueError): """This error is thrown if a db entry can't be parsed for the production db.""" @@ -521,8 +525,30 @@ def add_relationships(companies: list[dict[str, dict]], db: Session) -> None: # yearly_results +def filter_financials(reports: dict[str, float]) -> dict[str, float]: + """Filters financial reports to norm the names and filter for only normed values. + + Args: + reports: A financial report. Currently unfiltered. + + Returns: + Filtered financial data. + """ + filtered = {} + kpi_names = [_.value for _ in FinancialKPIEnum] + for name, value in reports.items(): + name_processed = name.lower().replace(" ", "_") + if name_processed in kpi_names: + filtered[name_processed] = value + continue + count_unknown_financial_reports[name.lower()] += 1 + # add rapidfuzz + + return filtered + + def add_annual_report(company_id: int, year: int, report: dict, db: Session) -> None: - """Ads a annual financial report to the SQL database. + """Ads an annual financial report to the SQL database. The added report is linked with the company. @@ -544,7 +570,7 @@ def add_annual_report(company_id: int, year: int, report: dict, db: Session) -> entities.AnnualFinanceStatement( company_id=company_id, date=date(year, 1, 1), - **report.get("financials", {}), + **filter_financials(report.get("financials", {})), ), ) for auditor in report.get("auditors", ""): @@ -626,6 +652,10 @@ def add_annual_financial_reports(companies: list[dict], db: Session) -> None: pbar.update() db.commit() logger.info("Company connections added.") + if count_unknown_financial_reports.total() > 10: # noqa: PLR2004 + logger.warning( + f"The following financial reports couldn't be assigned but are most common: {count_unknown_financial_reports.most_common(10)} ." + ) def transfer_data(config_provider: ConfigProvider) -> None: diff --git a/tests/utils/data_transfer_test.py b/tests/utils/data_transfer_test.py index 9f60741..aef4ca0 100644 --- a/tests/utils/data_transfer_test.py +++ b/tests/utils/data_transfer_test.py @@ -1143,19 +1143,6 @@ def test_add_annual_report_unknown_audit( assert added.number_of_links == number_of_years -def test_add_annual_report_financial_key_error(full_db: Session) -> None: - """Tests if an error is thrown financial data is tried to be added with an unknown financial record type.""" - with pytest.raises( - TypeError, match="is an invalid keyword argument for AnnualFinanceStatement" - ): - data_transfer.add_annual_report( - 2, - 2023, - {"financials": {"something-strange": 123.12}, "auditors": {}}, - db=full_db, - ) - - def test_company_relation_missing(empty_db: Session) -> None: """Check if adding missing company to a query list works.""" data_transfer.company_relation_missing("Some_company", None, None, empty_db) @@ -1311,3 +1298,18 @@ def test_transfer_data_cli_env( data_transfer.transfer_data_cli() spy.assert_called_once() + + +@pytest.mark.parametrize( + ("given", "expected"), + [ + ({}, {}), + ({"REVENUE": 2}, {"revenue": 2}), + ({"GROSS PROFIT": 10}, {"gross_profit": 10}), + ({"I dont know this one": 5, "GROSS PROFIT": 10}, {"gross_profit": 10}), + ({"I dont know this one": 5}, {}), + ], +) +def test_filter_financials(given: dict[str, float], expected: dict[str, int]) -> None: + """Tests if the financial data can be filtered correctly.""" + assert data_transfer.filter_financials(given) == expected diff --git a/tests/utils/transfer_news_test.py b/tests/utils/transfer_news_test.py index 3c22800..935f3f9 100644 --- a/tests/utils/transfer_news_test.py +++ b/tests/utils/transfer_news_test.py @@ -123,7 +123,6 @@ def test_add_article_to_sql(news: News, full_db: Session) -> None: transfer_news.add_article_to_sql(news, full_db) -@pytest.mark.working_on() def test_transfer_news_to_sql(full_db: Session, monkeypatch: MonkeyPatch) -> None: """Tests if a set of news articles / sentiments can be added to the sql db.""" monkeypatch.setattr(