checkpoint(data-extraction): Adapt load to update exisitng entries in order to keep yearly_results

This commit is contained in:
TrisNol 2023-09-23 12:07:07 +02:00
parent 1e23a8d5a3
commit 5a7472cd3c
4 changed files with 61 additions and 19 deletions

View File

@ -76,6 +76,14 @@ class CompanyID:
district_court: DistrictCourt
hr_number: str
def to_dict(self) -> dict:
"""Transform to dict.
Returns:
dict: Dictionary
"""
return asdict(self)
@dataclass
class Location:

View File

@ -5,23 +5,18 @@ import os
from tqdm import tqdm
from aki_prj23_transparenzregister.config.config_providers import JsonFileConfigProvider
from aki_prj23_transparenzregister.models.company import Company
from aki_prj23_transparenzregister.utils.mongo.company_mongo_service import (
CompanyMongoService,
)
from aki_prj23_transparenzregister.utils.mongo.connector import (
MongoConnection,
MongoConnector,
)
if __name__ == "__main__":
conn_string = MongoConnection(
hostname="localhost",
database="transparenzregister",
username="username",
password="",
port=27017,
)
provider = JsonFileConfigProvider("secrets.json")
conn_string = provider.get_mongo_connection_string()
connector = MongoConnector(conn_string)
service = CompanyMongoService(connector)
@ -32,4 +27,4 @@ if __name__ == "__main__":
data = json.loads(file_object.read())
company: Company = Company(**data)
service.insert(company)
service.migrations_of_base_data(company)

View File

@ -24,23 +24,37 @@ class CompanyMongoService:
"""_summary_.
Returns:
list[Company]: _description_
list[Company]: List of retrieved companies
"""
with self.lock:
result = self.collection.find()
return list(result)
def get_by_id(self, id: str) -> Company | None:
def get_by_id(self, id: dict) -> dict | None:
"""_summary_.
Args:
id (str): _description_
id (CompanyID): CompanyID
Returns:
Company | None: _description_
dict | None: Company if found
"""
with self.lock:
result = list(self.collection.find({"id": id}))
result = list(
self.collection.find(
{
"id": {
"$eq": {
"hr_number": id["hr_number"],
"district_court": {
"name": id["district_court"]["name"],
"city": id["district_court"]["city"],
},
}
}
}
)
)
if len(result) == 1:
return result[0]
return None
@ -106,3 +120,21 @@ class CompanyMongoService:
return self.collection.update_one(
{"_id": ObjectId(_id)}, {"$set": {"yearly_results": yearly_results}}
)
def migrations_of_base_data(self, data: Company) -> InsertOneResult | UpdateResult:
"""Updates or inserts a document of type company depending on whether an entry with the same id (CompanyID) can be found.
Args:
data (Company): Company related data to persist
Returns:
InsertOneResult | UpdateResult: Result depending on action
"""
entry = self.get_by_id(data.id.to_dict())
if entry is None:
return self.insert(data)
statement = {"$set": dict(data.to_dict().items())}
with self.lock:
return self.collection.update_one(
{"_id": ObjectId(entry["_id"])}, statement
)

View File

@ -3,7 +3,12 @@ from unittest.mock import Mock
import pytest
from aki_prj23_transparenzregister.models.company import Company, CompanyID, Location
from aki_prj23_transparenzregister.models.company import (
Company,
CompanyID,
DistrictCourt,
Location,
)
from aki_prj23_transparenzregister.utils.mongo.company_mongo_service import (
CompanyMongoService,
)
@ -73,21 +78,23 @@ def test_by_id_no_result(mock_mongo_connector: Mock, mock_collection: Mock) -> N
mock_mongo_connector.database = {"companies": mock_collection}
service = CompanyMongoService(mock_mongo_connector)
mock_collection.find.return_value = []
assert service.get_by_id("Does not exist") is None
id = CompanyID(DistrictCourt("a", "b"), "c").to_dict()
assert service.get_by_id(id) is None
def test_by_id_result(mock_mongo_connector: Mock, mock_collection: Mock) -> None:
"""Test CompanyMongoService get_by_id with result.
Args:
mock_mongo_connector (Mock): Mocked MongoConnector library
mock_collection (Mock): Mocked pymongo collection
mock_mongo_connector (Mock): Mocked MongoConnector library
mock_collection (Mock): Mocked pymongo collection.
"""
mock_mongo_connector.database = {"companies": mock_collection}
service = CompanyMongoService(mock_mongo_connector)
mock_entry = {"id": "Does exist", "vaue": 42}
mock_collection.find.return_value = [mock_entry]
assert service.get_by_id("Does exist") == mock_entry
id = CompanyID(DistrictCourt("a", "b"), "c").to_dict()
assert service.get_by_id(id) == mock_entry
def test_insert(mock_mongo_connector: Mock, mock_collection: Mock) -> None: