From 5a7472cd3cf0061ff0d61737cb40b5fb244a15b9 Mon Sep 17 00:00:00 2001 From: TrisNol Date: Sat, 23 Sep 2023 12:07:07 +0200 Subject: [PATCH] checkpoint(data-extraction): Adapt load to update exisitng entries in order to keep yearly_results --- .../models/company.py | 8 ++++ .../unternehmensregister/load.py | 13 ++---- .../utils/mongo/company_mongo_service.py | 42 ++++++++++++++++--- .../utils/mongo/company_mongo_service_test.py | 17 +++++--- 4 files changed, 61 insertions(+), 19 deletions(-) diff --git a/src/aki_prj23_transparenzregister/models/company.py b/src/aki_prj23_transparenzregister/models/company.py index 9c2e446..e52afd5 100644 --- a/src/aki_prj23_transparenzregister/models/company.py +++ b/src/aki_prj23_transparenzregister/models/company.py @@ -76,6 +76,14 @@ class CompanyID: district_court: DistrictCourt hr_number: str + def to_dict(self) -> dict: + """Transform to dict. + + Returns: + dict: Dictionary + """ + return asdict(self) + @dataclass class Location: diff --git a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/load.py b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/load.py index 52bf304..621b723 100644 --- a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/load.py +++ b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/load.py @@ -5,23 +5,18 @@ import os from tqdm import tqdm +from aki_prj23_transparenzregister.config.config_providers import JsonFileConfigProvider from aki_prj23_transparenzregister.models.company import Company from aki_prj23_transparenzregister.utils.mongo.company_mongo_service import ( CompanyMongoService, ) from aki_prj23_transparenzregister.utils.mongo.connector import ( - MongoConnection, MongoConnector, ) if __name__ == "__main__": - conn_string = MongoConnection( - hostname="localhost", - database="transparenzregister", - username="username", - password="", - port=27017, - ) + provider = JsonFileConfigProvider("secrets.json") + conn_string = provider.get_mongo_connection_string() connector = MongoConnector(conn_string) service = CompanyMongoService(connector) @@ -32,4 +27,4 @@ if __name__ == "__main__": data = json.loads(file_object.read()) company: Company = Company(**data) - service.insert(company) + service.migrations_of_base_data(company) diff --git a/src/aki_prj23_transparenzregister/utils/mongo/company_mongo_service.py b/src/aki_prj23_transparenzregister/utils/mongo/company_mongo_service.py index c2641bb..a2f0012 100644 --- a/src/aki_prj23_transparenzregister/utils/mongo/company_mongo_service.py +++ b/src/aki_prj23_transparenzregister/utils/mongo/company_mongo_service.py @@ -24,23 +24,37 @@ class CompanyMongoService: """_summary_. Returns: - list[Company]: _description_ + list[Company]: List of retrieved companies """ with self.lock: result = self.collection.find() return list(result) - def get_by_id(self, id: str) -> Company | None: + def get_by_id(self, id: dict) -> dict | None: """_summary_. Args: - id (str): _description_ + id (CompanyID): CompanyID Returns: - Company | None: _description_ + dict | None: Company if found """ with self.lock: - result = list(self.collection.find({"id": id})) + result = list( + self.collection.find( + { + "id": { + "$eq": { + "hr_number": id["hr_number"], + "district_court": { + "name": id["district_court"]["name"], + "city": id["district_court"]["city"], + }, + } + } + } + ) + ) if len(result) == 1: return result[0] return None @@ -106,3 +120,21 @@ class CompanyMongoService: return self.collection.update_one( {"_id": ObjectId(_id)}, {"$set": {"yearly_results": yearly_results}} ) + + def migrations_of_base_data(self, data: Company) -> InsertOneResult | UpdateResult: + """Updates or inserts a document of type company depending on whether an entry with the same id (CompanyID) can be found. + + Args: + data (Company): Company related data to persist + + Returns: + InsertOneResult | UpdateResult: Result depending on action + """ + entry = self.get_by_id(data.id.to_dict()) + if entry is None: + return self.insert(data) + statement = {"$set": dict(data.to_dict().items())} + with self.lock: + return self.collection.update_one( + {"_id": ObjectId(entry["_id"])}, statement + ) diff --git a/tests/utils/mongo/company_mongo_service_test.py b/tests/utils/mongo/company_mongo_service_test.py index 1a1c98d..ed45849 100644 --- a/tests/utils/mongo/company_mongo_service_test.py +++ b/tests/utils/mongo/company_mongo_service_test.py @@ -3,7 +3,12 @@ from unittest.mock import Mock import pytest -from aki_prj23_transparenzregister.models.company import Company, CompanyID, Location +from aki_prj23_transparenzregister.models.company import ( + Company, + CompanyID, + DistrictCourt, + Location, +) from aki_prj23_transparenzregister.utils.mongo.company_mongo_service import ( CompanyMongoService, ) @@ -73,21 +78,23 @@ def test_by_id_no_result(mock_mongo_connector: Mock, mock_collection: Mock) -> N mock_mongo_connector.database = {"companies": mock_collection} service = CompanyMongoService(mock_mongo_connector) mock_collection.find.return_value = [] - assert service.get_by_id("Does not exist") is None + id = CompanyID(DistrictCourt("a", "b"), "c").to_dict() + assert service.get_by_id(id) is None def test_by_id_result(mock_mongo_connector: Mock, mock_collection: Mock) -> None: """Test CompanyMongoService get_by_id with result. Args: - mock_mongo_connector (Mock): Mocked MongoConnector library - mock_collection (Mock): Mocked pymongo collection + mock_mongo_connector (Mock): Mocked MongoConnector library + mock_collection (Mock): Mocked pymongo collection. """ mock_mongo_connector.database = {"companies": mock_collection} service = CompanyMongoService(mock_mongo_connector) mock_entry = {"id": "Does exist", "vaue": 42} mock_collection.find.return_value = [mock_entry] - assert service.get_by_id("Does exist") == mock_entry + id = CompanyID(DistrictCourt("a", "b"), "c").to_dict() + assert service.get_by_id(id) == mock_entry def test_insert(mock_mongo_connector: Mock, mock_collection: Mock) -> None: