checkpoint: Update Mongo Company entries with yearly_results

This commit is contained in:
TrisNol 2023-08-19 18:06:33 +02:00
parent 6514db2954
commit d1c09d51a2
4 changed files with 40 additions and 28 deletions

2
poetry.lock generated
View File

@ -1164,7 +1164,7 @@ weiterbildungssuche = ["de-weiterbildungssuche (>=0.1.0,<0.2.0)"]
type = "git" type = "git"
url = "https://github.com/TrisNol/deutschland.git" url = "https://github.com/TrisNol/deutschland.git"
reference = "hotfix/python-3.11-support" reference = "hotfix/python-3.11-support"
resolved_reference = "29dd9d833707c68ae1574539bb6e62f4d4f5b683" resolved_reference = "507901b95cffa8dede08b69adab23dd8eda92d87"
[[package]] [[package]]
name = "distlib" name = "distlib"

View File

@ -1,4 +1,6 @@
"""Add financial data to companies.""" """Add financial data to companies."""
import typing
from aki_prj23_transparenzregister.config.config_providers import JsonFileConfigProvider from aki_prj23_transparenzregister.config.config_providers import JsonFileConfigProvider
from aki_prj23_transparenzregister.utils.data_extraction.bundesanzeiger import ( from aki_prj23_transparenzregister.utils.data_extraction.bundesanzeiger import (
Bundesanzeiger, Bundesanzeiger,
@ -8,30 +10,33 @@ from aki_prj23_transparenzregister.utils.mongo.company_mongo_service import (
) )
from aki_prj23_transparenzregister.utils.mongo.connector import MongoConnector from aki_prj23_transparenzregister.utils.mongo.connector import MongoConnector
def work(company: typing.Any, company_service: CompanyMongoService) -> None:
"""...
Args:
company (dict): _description_
company_service (CompanyMongoService): _description_
"""
# print(company["name"])
yearly_results = Bundesanzeiger().get_information(company_name=company["name"])
# print(len(yearly_results))
yearly_results_data = {}
for _index, row in yearly_results.iterrows():
yearly_results_data[row.jahr] = {
"auditors": [auditor.to_dict() for auditor in row.auditors],
"financials": row.financial_results,
}
company_service.add_yearly_results(company["_id"], yearly_results_data)
if __name__ == "__main__": if __name__ == "__main__":
config_provider = JsonFileConfigProvider("./secrets.json") config_provider = JsonFileConfigProvider("./secrets.json")
mongo_connector = MongoConnector(config_provider.get_mongo_connection_string()) mongo_connector = MongoConnector(config_provider.get_mongo_connection_string())
company_service = CompanyMongoService(mongo_connector) company_service = CompanyMongoService(mongo_connector)
id = "649f16a2e198338c3b4429b0" companies = company_service.get_all()
company = company_service.get_by_object_id(id) for company in companies:
if company is None: work(company, company_service)
raise Exception("No entry found with given ID")
# print(company['name'])
# print("------------")
bundesanzeiger_service = Bundesanzeiger()
yearly_results = bundesanzeiger_service.get_information(
company_name=company["name"]
)
# print(yearly_results.head())
yearly_results_data = {}
for _index, row in yearly_results.iterrows():
yearly_results_data[row.jahr] = {
"auditors": row.auditors,
"financials": row.financial_results,
}
company_service.add_yearly_results(id, yearly_results_data)
# print("------------")
# print(company)

View File

@ -1,5 +1,5 @@
"""Auditor model.""" """Auditor model."""
from dataclasses import dataclass from dataclasses import asdict, dataclass
@dataclass @dataclass
@ -8,3 +8,11 @@ class Auditor:
name: str name: str
company: str | None company: str | None
def to_dict(self) -> dict:
"""_summary_.
Returns:
dict: _description_
"""
return asdict(self)

View File

@ -11,10 +11,6 @@ from aki_prj23_transparenzregister.models.auditor import Auditor
class Bundesanzeiger: class Bundesanzeiger:
"""Bundesanzeiger wrapper to export relevant information.""" """Bundesanzeiger wrapper to export relevant information."""
def __init__(self) -> None:
"""Init."""
self.__ba = Ba()
def get_information(self, company_name: str) -> pd.DataFrame: def get_information(self, company_name: str) -> pd.DataFrame:
"""Extract relevant information from all found yearly results for the given company. """Extract relevant information from all found yearly results for the given company.
@ -24,13 +20,16 @@ class Bundesanzeiger:
Returns: Returns:
pd.DataFrame: Result pd.DataFrame: Result
""" """
ba = Ba()
# Get Bundesanzeiger entries for company # Get Bundesanzeiger entries for company
reports = self.__ba.get_reports(company_name) reports = ba.get_reports(company_name)
# Transform to list of data # Transform to list of data
report_contents = [] report_contents = []
for key in reports: for key in reports:
report_contents.append(reports[key]) report_contents.append(reports[key])
if len(report_contents) == 0:
return pd.DataFrame()
# Transform to DataFrame and filter out irrelevant entries # Transform to DataFrame and filter out irrelevant entries
df_data = pd.DataFrame(report_contents) df_data = pd.DataFrame(report_contents)
df_data = self.filter_reports(df_data) df_data = self.filter_reports(df_data)