Added a filter for financial reports. (#372)

Finanical reports are now filtered before beeing added to the SQL
database to only added knwon keys.
Some matching is also done.
The most importend missing reports are printed to be implemented later
on.
Rapidfuzz could be used.
This commit is contained in:
Philipp Horstenkamp 2023-11-13 18:52:12 +01:00 committed by GitHub
parent da340d5ec5
commit d0677287b6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 47 additions and 16 deletions

View File

@ -1,6 +1,7 @@
"""This module contains the data transfer and refinement functionalities between staging and production DB."""
import argparse
import sys
from collections import Counter
from datetime import date
from functools import lru_cache
from typing import Any, Final, Literal
@ -24,6 +25,7 @@ from aki_prj23_transparenzregister.models.company import (
CurrencyEnum,
RelationshipRoleEnum,
)
from aki_prj23_transparenzregister.utils.enum_types import FinancialKPIEnum
from aki_prj23_transparenzregister.utils.logger_config import (
add_logger_options_to_argparse,
configer_logger,
@ -41,6 +43,8 @@ from aki_prj23_transparenzregister.utils.string_tools import simplify_string
nomi = pgeocode.Nominatim("de")
count_unknown_financial_reports: Final[Counter] = Counter()
class DataInvalidError(ValueError):
"""This error is thrown if a db entry can't be parsed for the production db."""
@ -521,8 +525,30 @@ def add_relationships(companies: list[dict[str, dict]], db: Session) -> None:
# yearly_results
def filter_financials(reports: dict[str, float]) -> dict[str, float]:
"""Filters financial reports to norm the names and filter for only normed values.
Args:
reports: A financial report. Currently unfiltered.
Returns:
Filtered financial data.
"""
filtered = {}
kpi_names = [_.value for _ in FinancialKPIEnum]
for name, value in reports.items():
name_processed = name.lower().replace(" ", "_")
if name_processed in kpi_names:
filtered[name_processed] = value
continue
count_unknown_financial_reports[name.lower()] += 1
# add rapidfuzz
return filtered
def add_annual_report(company_id: int, year: int, report: dict, db: Session) -> None:
"""Ads a annual financial report to the SQL database.
"""Ads an annual financial report to the SQL database.
The added report is linked with the company.
@ -544,7 +570,7 @@ def add_annual_report(company_id: int, year: int, report: dict, db: Session) ->
entities.AnnualFinanceStatement(
company_id=company_id,
date=date(year, 1, 1),
**report.get("financials", {}),
**filter_financials(report.get("financials", {})),
),
)
for auditor in report.get("auditors", ""):
@ -626,6 +652,10 @@ def add_annual_financial_reports(companies: list[dict], db: Session) -> None:
pbar.update()
db.commit()
logger.info("Company connections added.")
if count_unknown_financial_reports.total() > 10: # noqa: PLR2004
logger.warning(
f"The following financial reports couldn't be assigned but are most common: {count_unknown_financial_reports.most_common(10)} ."
)
def transfer_data(config_provider: ConfigProvider) -> None:

View File

@ -1143,19 +1143,6 @@ def test_add_annual_report_unknown_audit(
assert added.number_of_links == number_of_years
def test_add_annual_report_financial_key_error(full_db: Session) -> None:
"""Tests if an error is thrown financial data is tried to be added with an unknown financial record type."""
with pytest.raises(
TypeError, match="is an invalid keyword argument for AnnualFinanceStatement"
):
data_transfer.add_annual_report(
2,
2023,
{"financials": {"something-strange": 123.12}, "auditors": {}},
db=full_db,
)
def test_company_relation_missing(empty_db: Session) -> None:
"""Check if adding missing company to a query list works."""
data_transfer.company_relation_missing("Some_company", None, None, empty_db)
@ -1311,3 +1298,18 @@ def test_transfer_data_cli_env(
data_transfer.transfer_data_cli()
spy.assert_called_once()
@pytest.mark.parametrize(
("given", "expected"),
[
({}, {}),
({"REVENUE": 2}, {"revenue": 2}),
({"GROSS PROFIT": 10}, {"gross_profit": 10}),
({"I dont know this one": 5, "GROSS PROFIT": 10}, {"gross_profit": 10}),
({"I dont know this one": 5}, {}),
],
)
def test_filter_financials(given: dict[str, float], expected: dict[str, int]) -> None:
"""Tests if the financial data can be filtered correctly."""
assert data_transfer.filter_financials(given) == expected

View File

@ -123,7 +123,6 @@ def test_add_article_to_sql(news: News, full_db: Session) -> None:
transfer_news.add_article_to_sql(news, full_db)
@pytest.mark.working_on()
def test_transfer_news_to_sql(full_db: Session, monkeypatch: MonkeyPatch) -> None:
"""Tests if a set of news articles / sentiments can be added to the sql db."""
monkeypatch.setattr(