checkpoint(data-extraction): Adapt load to update exisitng entries in order to keep yearly_results

This commit is contained in:
TrisNol 2023-09-23 12:07:07 +02:00
parent 1e23a8d5a3
commit 5a7472cd3c
4 changed files with 61 additions and 19 deletions

View File

@ -76,6 +76,14 @@ class CompanyID:
district_court: DistrictCourt district_court: DistrictCourt
hr_number: str hr_number: str
def to_dict(self) -> dict:
"""Transform to dict.
Returns:
dict: Dictionary
"""
return asdict(self)
@dataclass @dataclass
class Location: class Location:

View File

@ -5,23 +5,18 @@ import os
from tqdm import tqdm from tqdm import tqdm
from aki_prj23_transparenzregister.config.config_providers import JsonFileConfigProvider
from aki_prj23_transparenzregister.models.company import Company from aki_prj23_transparenzregister.models.company import Company
from aki_prj23_transparenzregister.utils.mongo.company_mongo_service import ( from aki_prj23_transparenzregister.utils.mongo.company_mongo_service import (
CompanyMongoService, CompanyMongoService,
) )
from aki_prj23_transparenzregister.utils.mongo.connector import ( from aki_prj23_transparenzregister.utils.mongo.connector import (
MongoConnection,
MongoConnector, MongoConnector,
) )
if __name__ == "__main__": if __name__ == "__main__":
conn_string = MongoConnection( provider = JsonFileConfigProvider("secrets.json")
hostname="localhost", conn_string = provider.get_mongo_connection_string()
database="transparenzregister",
username="username",
password="",
port=27017,
)
connector = MongoConnector(conn_string) connector = MongoConnector(conn_string)
service = CompanyMongoService(connector) service = CompanyMongoService(connector)
@ -32,4 +27,4 @@ if __name__ == "__main__":
data = json.loads(file_object.read()) data = json.loads(file_object.read())
company: Company = Company(**data) company: Company = Company(**data)
service.insert(company) service.migrations_of_base_data(company)

View File

@ -24,23 +24,37 @@ class CompanyMongoService:
"""_summary_. """_summary_.
Returns: Returns:
list[Company]: _description_ list[Company]: List of retrieved companies
""" """
with self.lock: with self.lock:
result = self.collection.find() result = self.collection.find()
return list(result) return list(result)
def get_by_id(self, id: str) -> Company | None: def get_by_id(self, id: dict) -> dict | None:
"""_summary_. """_summary_.
Args: Args:
id (str): _description_ id (CompanyID): CompanyID
Returns: Returns:
Company | None: _description_ dict | None: Company if found
""" """
with self.lock: with self.lock:
result = list(self.collection.find({"id": id})) result = list(
self.collection.find(
{
"id": {
"$eq": {
"hr_number": id["hr_number"],
"district_court": {
"name": id["district_court"]["name"],
"city": id["district_court"]["city"],
},
}
}
}
)
)
if len(result) == 1: if len(result) == 1:
return result[0] return result[0]
return None return None
@ -106,3 +120,21 @@ class CompanyMongoService:
return self.collection.update_one( return self.collection.update_one(
{"_id": ObjectId(_id)}, {"$set": {"yearly_results": yearly_results}} {"_id": ObjectId(_id)}, {"$set": {"yearly_results": yearly_results}}
) )
def migrations_of_base_data(self, data: Company) -> InsertOneResult | UpdateResult:
"""Updates or inserts a document of type company depending on whether an entry with the same id (CompanyID) can be found.
Args:
data (Company): Company related data to persist
Returns:
InsertOneResult | UpdateResult: Result depending on action
"""
entry = self.get_by_id(data.id.to_dict())
if entry is None:
return self.insert(data)
statement = {"$set": dict(data.to_dict().items())}
with self.lock:
return self.collection.update_one(
{"_id": ObjectId(entry["_id"])}, statement
)

View File

@ -3,7 +3,12 @@ from unittest.mock import Mock
import pytest import pytest
from aki_prj23_transparenzregister.models.company import Company, CompanyID, Location from aki_prj23_transparenzregister.models.company import (
Company,
CompanyID,
DistrictCourt,
Location,
)
from aki_prj23_transparenzregister.utils.mongo.company_mongo_service import ( from aki_prj23_transparenzregister.utils.mongo.company_mongo_service import (
CompanyMongoService, CompanyMongoService,
) )
@ -73,21 +78,23 @@ def test_by_id_no_result(mock_mongo_connector: Mock, mock_collection: Mock) -> N
mock_mongo_connector.database = {"companies": mock_collection} mock_mongo_connector.database = {"companies": mock_collection}
service = CompanyMongoService(mock_mongo_connector) service = CompanyMongoService(mock_mongo_connector)
mock_collection.find.return_value = [] mock_collection.find.return_value = []
assert service.get_by_id("Does not exist") is None id = CompanyID(DistrictCourt("a", "b"), "c").to_dict()
assert service.get_by_id(id) is None
def test_by_id_result(mock_mongo_connector: Mock, mock_collection: Mock) -> None: def test_by_id_result(mock_mongo_connector: Mock, mock_collection: Mock) -> None:
"""Test CompanyMongoService get_by_id with result. """Test CompanyMongoService get_by_id with result.
Args: Args:
mock_mongo_connector (Mock): Mocked MongoConnector library mock_mongo_connector (Mock): Mocked MongoConnector library
mock_collection (Mock): Mocked pymongo collection mock_collection (Mock): Mocked pymongo collection.
""" """
mock_mongo_connector.database = {"companies": mock_collection} mock_mongo_connector.database = {"companies": mock_collection}
service = CompanyMongoService(mock_mongo_connector) service = CompanyMongoService(mock_mongo_connector)
mock_entry = {"id": "Does exist", "vaue": 42} mock_entry = {"id": "Does exist", "vaue": 42}
mock_collection.find.return_value = [mock_entry] mock_collection.find.return_value = [mock_entry]
assert service.get_by_id("Does exist") == mock_entry id = CompanyID(DistrictCourt("a", "b"), "c").to_dict()
assert service.get_by_id(id) == mock_entry
def test_insert(mock_mongo_connector: Mock, mock_collection: Mock) -> None: def test_insert(mock_mongo_connector: Mock, mock_collection: Mock) -> None: