diff --git a/poetry.lock b/poetry.lock index 1f370fd..da63a0b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1164,7 +1164,7 @@ weiterbildungssuche = ["de-weiterbildungssuche (>=0.1.0,<0.2.0)"] type = "git" url = "https://github.com/TrisNol/deutschland.git" reference = "hotfix/python-3.11-support" -resolved_reference = "29dd9d833707c68ae1574539bb6e62f4d4f5b683" +resolved_reference = "507901b95cffa8dede08b69adab23dd8eda92d87" [[package]] name = "distlib" diff --git a/src/aki_prj23_transparenzregister/apps/enrich_company_financials.py b/src/aki_prj23_transparenzregister/apps/enrich_company_financials.py index a942ad8..47bca4e 100644 --- a/src/aki_prj23_transparenzregister/apps/enrich_company_financials.py +++ b/src/aki_prj23_transparenzregister/apps/enrich_company_financials.py @@ -1,4 +1,6 @@ """Add financial data to companies.""" +import typing + from aki_prj23_transparenzregister.config.config_providers import JsonFileConfigProvider from aki_prj23_transparenzregister.utils.data_extraction.bundesanzeiger import ( Bundesanzeiger, @@ -8,30 +10,33 @@ from aki_prj23_transparenzregister.utils.mongo.company_mongo_service import ( ) from aki_prj23_transparenzregister.utils.mongo.connector import MongoConnector + +def work(company: typing.Any, company_service: CompanyMongoService) -> None: + """... + + Args: + company (dict): _description_ + company_service (CompanyMongoService): _description_ + """ + # print(company["name"]) + yearly_results = Bundesanzeiger().get_information(company_name=company["name"]) + # print(len(yearly_results)) + yearly_results_data = {} + for _index, row in yearly_results.iterrows(): + yearly_results_data[row.jahr] = { + "auditors": [auditor.to_dict() for auditor in row.auditors], + "financials": row.financial_results, + } + + company_service.add_yearly_results(company["_id"], yearly_results_data) + + if __name__ == "__main__": config_provider = JsonFileConfigProvider("./secrets.json") mongo_connector = MongoConnector(config_provider.get_mongo_connection_string()) company_service = CompanyMongoService(mongo_connector) - id = "649f16a2e198338c3b4429b0" - company = company_service.get_by_object_id(id) - if company is None: - raise Exception("No entry found with given ID") - # print(company['name']) - # print("------------") - bundesanzeiger_service = Bundesanzeiger() - yearly_results = bundesanzeiger_service.get_information( - company_name=company["name"] - ) - # print(yearly_results.head()) - yearly_results_data = {} - for _index, row in yearly_results.iterrows(): - yearly_results_data[row.jahr] = { - "auditors": row.auditors, - "financials": row.financial_results, - } - - company_service.add_yearly_results(id, yearly_results_data) - # print("------------") - # print(company) + companies = company_service.get_all() + for company in companies: + work(company, company_service) diff --git a/src/aki_prj23_transparenzregister/models/auditor.py b/src/aki_prj23_transparenzregister/models/auditor.py index 9997616..28856a8 100644 --- a/src/aki_prj23_transparenzregister/models/auditor.py +++ b/src/aki_prj23_transparenzregister/models/auditor.py @@ -1,5 +1,5 @@ """Auditor model.""" -from dataclasses import dataclass +from dataclasses import asdict, dataclass @dataclass @@ -8,3 +8,11 @@ class Auditor: name: str company: str | None + + def to_dict(self) -> dict: + """_summary_. + + Returns: + dict: _description_ + """ + return asdict(self) diff --git a/src/aki_prj23_transparenzregister/utils/data_extraction/bundesanzeiger.py b/src/aki_prj23_transparenzregister/utils/data_extraction/bundesanzeiger.py index 637c947..c00f8e4 100644 --- a/src/aki_prj23_transparenzregister/utils/data_extraction/bundesanzeiger.py +++ b/src/aki_prj23_transparenzregister/utils/data_extraction/bundesanzeiger.py @@ -11,10 +11,6 @@ from aki_prj23_transparenzregister.models.auditor import Auditor class Bundesanzeiger: """Bundesanzeiger wrapper to export relevant information.""" - def __init__(self) -> None: - """Init.""" - self.__ba = Ba() - def get_information(self, company_name: str) -> pd.DataFrame: """Extract relevant information from all found yearly results for the given company. @@ -24,13 +20,16 @@ class Bundesanzeiger: Returns: pd.DataFrame: Result """ + ba = Ba() # Get Bundesanzeiger entries for company - reports = self.__ba.get_reports(company_name) + reports = ba.get_reports(company_name) # Transform to list of data report_contents = [] for key in reports: report_contents.append(reports[key]) + if len(report_contents) == 0: + return pd.DataFrame() # Transform to DataFrame and filter out irrelevant entries df_data = pd.DataFrame(report_contents) df_data = self.filter_reports(df_data)