checkpoint: Enrich Company entity with yearly results

This commit is contained in:
TrisNol 2023-08-19 15:12:41 +02:00
parent e8e354932c
commit 6514db2954
5 changed files with 80 additions and 1 deletions

View File

@ -0,0 +1 @@
"""Main applications."""

View File

@ -0,0 +1,37 @@
"""Add financial data to companies."""
from aki_prj23_transparenzregister.config.config_providers import JsonFileConfigProvider
from aki_prj23_transparenzregister.utils.data_extraction.bundesanzeiger import (
Bundesanzeiger,
)
from aki_prj23_transparenzregister.utils.mongo.company_mongo_service import (
CompanyMongoService,
)
from aki_prj23_transparenzregister.utils.mongo.connector import MongoConnector
if __name__ == "__main__":
config_provider = JsonFileConfigProvider("./secrets.json")
mongo_connector = MongoConnector(config_provider.get_mongo_connection_string())
company_service = CompanyMongoService(mongo_connector)
id = "649f16a2e198338c3b4429b0"
company = company_service.get_by_object_id(id)
if company is None:
raise Exception("No entry found with given ID")
# print(company['name'])
# print("------------")
bundesanzeiger_service = Bundesanzeiger()
yearly_results = bundesanzeiger_service.get_information(
company_name=company["name"]
)
# print(yearly_results.head())
yearly_results_data = {}
for _index, row in yearly_results.iterrows():
yearly_results_data[row.jahr] = {
"auditors": row.auditors,
"financials": row.financial_results,
}
company_service.add_yearly_results(id, yearly_results_data)
# print("------------")
# print(company)

View File

@ -45,6 +45,14 @@ class CompanyRelationship(ABC):
location: Location location: Location
@dataclass
class FinancialResults:
"""_summary_."""
year: int
kpis: dict[str, float]
@dataclass @dataclass
class Company: class Company:
"""_summary_. """_summary_.
@ -58,6 +66,7 @@ class Company:
name: str name: str
last_update: str last_update: str
relationships: list[CompanyRelationship] relationships: list[CompanyRelationship]
# yearly_results: list[FinancialResults]
def to_dict(self) -> dict: def to_dict(self) -> dict:
"""_summary_. """_summary_.

View File

@ -35,6 +35,9 @@ class Bundesanzeiger:
df_data = pd.DataFrame(report_contents) df_data = pd.DataFrame(report_contents)
df_data = self.filter_reports(df_data) df_data = self.filter_reports(df_data)
# Filter out entries of different companies
df_data = df_data.loc[df_data.company == company_name]
# Add Auditor information # Add Auditor information
df_data["auditors"] = df_data.raw_report.apply(self.extract_auditors) df_data["auditors"] = df_data.raw_report.apply(self.extract_auditors)

View File

@ -1,5 +1,6 @@
"""CompanyMongoService.""" """CompanyMongoService."""
from pymongo.results import InsertOneResult from bson.objectid import ObjectId
from pymongo.results import InsertOneResult, UpdateResult
from aki_prj23_transparenzregister.models.company import Company, CompanyID from aki_prj23_transparenzregister.models.company import Company, CompanyID
from aki_prj23_transparenzregister.utils.mongo.connector import MongoConnector from aki_prj23_transparenzregister.utils.mongo.connector import MongoConnector
@ -39,6 +40,20 @@ class CompanyMongoService:
return result[0] return result[0]
return None return None
def get_by_object_id(self, _id: str) -> dict | None:
"""Find an object by given _id.
Args:
_id (str): ID
Returns:
Company | None: Entry if found, otherwise None
"""
result = list(self.collection.find({"_id": ObjectId(_id)}))
if len(result) == 1:
return result[0]
return None
def insert(self, company: Company) -> InsertOneResult: def insert(self, company: Company) -> InsertOneResult:
"""_summary_. """_summary_.
@ -49,3 +64,17 @@ class CompanyMongoService:
_type_: _description_ _type_: _description_
""" """
return self.collection.insert_one(company.to_dict()) return self.collection.insert_one(company.to_dict())
def add_yearly_results(self, _id: str, yearly_results: dict) -> UpdateResult:
"""Add the `yearly_results` field to a Company entry.
Args:
_id (str): ID of the object
yearly_results (dict): Yearly results dictionary
Returns:
UpdateResult: Result
"""
return self.collection.update_one(
{"_id": ObjectId(_id)}, {"$set": {"yearly_results": yearly_results}}
)