From 6514db2954a719510cc11180f6238e9569cb3084 Mon Sep 17 00:00:00 2001 From: TrisNol Date: Sat, 19 Aug 2023 15:12:41 +0200 Subject: [PATCH] checkpoint: Enrich Company entity with yearly results --- .../apps/__init__.py | 1 + .../apps/enrich_company_financials.py | 37 +++++++++++++++++++ .../models/company.py | 9 +++++ .../utils/data_extraction/bundesanzeiger.py | 3 ++ .../utils/mongo/company_mongo_service.py | 31 +++++++++++++++- 5 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 src/aki_prj23_transparenzregister/apps/__init__.py create mode 100644 src/aki_prj23_transparenzregister/apps/enrich_company_financials.py diff --git a/src/aki_prj23_transparenzregister/apps/__init__.py b/src/aki_prj23_transparenzregister/apps/__init__.py new file mode 100644 index 0000000..69748f7 --- /dev/null +++ b/src/aki_prj23_transparenzregister/apps/__init__.py @@ -0,0 +1 @@ +"""Main applications.""" diff --git a/src/aki_prj23_transparenzregister/apps/enrich_company_financials.py b/src/aki_prj23_transparenzregister/apps/enrich_company_financials.py new file mode 100644 index 0000000..a942ad8 --- /dev/null +++ b/src/aki_prj23_transparenzregister/apps/enrich_company_financials.py @@ -0,0 +1,37 @@ +"""Add financial data to companies.""" +from aki_prj23_transparenzregister.config.config_providers import JsonFileConfigProvider +from aki_prj23_transparenzregister.utils.data_extraction.bundesanzeiger import ( + Bundesanzeiger, +) +from aki_prj23_transparenzregister.utils.mongo.company_mongo_service import ( + CompanyMongoService, +) +from aki_prj23_transparenzregister.utils.mongo.connector import MongoConnector + +if __name__ == "__main__": + config_provider = JsonFileConfigProvider("./secrets.json") + + mongo_connector = MongoConnector(config_provider.get_mongo_connection_string()) + company_service = CompanyMongoService(mongo_connector) + + id = "649f16a2e198338c3b4429b0" + company = company_service.get_by_object_id(id) + if company is None: + raise Exception("No entry found with given ID") + # print(company['name']) + # print("------------") + bundesanzeiger_service = Bundesanzeiger() + yearly_results = bundesanzeiger_service.get_information( + company_name=company["name"] + ) + # print(yearly_results.head()) + yearly_results_data = {} + for _index, row in yearly_results.iterrows(): + yearly_results_data[row.jahr] = { + "auditors": row.auditors, + "financials": row.financial_results, + } + + company_service.add_yearly_results(id, yearly_results_data) + # print("------------") + # print(company) diff --git a/src/aki_prj23_transparenzregister/models/company.py b/src/aki_prj23_transparenzregister/models/company.py index dedd7cd..8905513 100644 --- a/src/aki_prj23_transparenzregister/models/company.py +++ b/src/aki_prj23_transparenzregister/models/company.py @@ -45,6 +45,14 @@ class CompanyRelationship(ABC): location: Location +@dataclass +class FinancialResults: + """_summary_.""" + + year: int + kpis: dict[str, float] + + @dataclass class Company: """_summary_. @@ -58,6 +66,7 @@ class Company: name: str last_update: str relationships: list[CompanyRelationship] + # yearly_results: list[FinancialResults] def to_dict(self) -> dict: """_summary_. diff --git a/src/aki_prj23_transparenzregister/utils/data_extraction/bundesanzeiger.py b/src/aki_prj23_transparenzregister/utils/data_extraction/bundesanzeiger.py index bb756ab..637c947 100644 --- a/src/aki_prj23_transparenzregister/utils/data_extraction/bundesanzeiger.py +++ b/src/aki_prj23_transparenzregister/utils/data_extraction/bundesanzeiger.py @@ -35,6 +35,9 @@ class Bundesanzeiger: df_data = pd.DataFrame(report_contents) df_data = self.filter_reports(df_data) + # Filter out entries of different companies + df_data = df_data.loc[df_data.company == company_name] + # Add Auditor information df_data["auditors"] = df_data.raw_report.apply(self.extract_auditors) diff --git a/src/aki_prj23_transparenzregister/utils/mongo/company_mongo_service.py b/src/aki_prj23_transparenzregister/utils/mongo/company_mongo_service.py index 40f4574..ae8bac2 100644 --- a/src/aki_prj23_transparenzregister/utils/mongo/company_mongo_service.py +++ b/src/aki_prj23_transparenzregister/utils/mongo/company_mongo_service.py @@ -1,5 +1,6 @@ """CompanyMongoService.""" -from pymongo.results import InsertOneResult +from bson.objectid import ObjectId +from pymongo.results import InsertOneResult, UpdateResult from aki_prj23_transparenzregister.models.company import Company, CompanyID from aki_prj23_transparenzregister.utils.mongo.connector import MongoConnector @@ -39,6 +40,20 @@ class CompanyMongoService: return result[0] return None + def get_by_object_id(self, _id: str) -> dict | None: + """Find an object by given _id. + + Args: + _id (str): ID + + Returns: + Company | None: Entry if found, otherwise None + """ + result = list(self.collection.find({"_id": ObjectId(_id)})) + if len(result) == 1: + return result[0] + return None + def insert(self, company: Company) -> InsertOneResult: """_summary_. @@ -49,3 +64,17 @@ class CompanyMongoService: _type_: _description_ """ return self.collection.insert_one(company.to_dict()) + + def add_yearly_results(self, _id: str, yearly_results: dict) -> UpdateResult: + """Add the `yearly_results` field to a Company entry. + + Args: + _id (str): ID of the object + yearly_results (dict): Yearly results dictionary + + Returns: + UpdateResult: Result + """ + return self.collection.update_one( + {"_id": ObjectId(_id)}, {"$set": {"yearly_results": yearly_results}} + )