feat(data-extraction): MongoWrapper, DataClasses and services for News and Company data

This commit is contained in:
TrisNol
2023-07-10 18:58:31 +02:00
parent 4c65d37816
commit 4c95550dbf
17 changed files with 384 additions and 135 deletions

View File

@ -1,13 +0,0 @@
from dataclasses import asdict, dataclass
@dataclass
class News:
id: str
title: str
date: str
text: str
source_url: str
def dict(self):
return asdict(self)

View File

@ -1,17 +0,0 @@
from abc import ABC
from News.models.News import News
class NewsServiceInterface(ABC):
def get_all(self) -> list[News]:
raise NotImplementedError
def get_by_id(self, id: str) -> News | None:
raise NotImplementedError
def insert(self, news: News):
raise NotImplementedError
def insert_many(self, news: list[News]):
raise NotImplementedError

View File

@ -1,67 +0,0 @@
import pymongo
from News.models.News import News
from News.utils.NewsServiceInterface import NewsServiceInterface
class MongoConnector:
def __init__(
self,
hostname,
database: str,
port: int | None,
username: str | None = None,
password: str | None = None,
):
self.client = self.connect(hostname, port, username, password)
databases = self.client.list_database_names()
if database not in databases:
print(f"Database {database} will be created")
self.database = self.client[database]
def connect(self, hostname, port, username, password) -> pymongo.MongoClient:
if username is not None and password is not None:
connection_string = f"mongodb+srv://{username}:{password}@{hostname}"
else:
connection_string = f"mongodb+srv://{hostname}"
if port is not None:
connection_string += f":{port}"
connection_string = connection_string.replace("mongodb+srv", "mongodb")
print(connection_string)
return pymongo.MongoClient(connection_string)
class MongoNewsService(NewsServiceInterface):
def __init__(self, connector: MongoConnector):
self.collection = connector.database["news"]
def get_all(self) -> list[News]:
result = self.collection.find()
return [MongoEntryTransformer.transform_outgoing(elem) for elem in result]
def get_by_id(self, id: str) -> News | None:
result = list(self.collection.find({"_id": id}))
if len(result) == 1:
return MongoEntryTransformer.transform_outgoing(list(result)[0])
return None
def insert(self, news: News):
return self.collection.insert_one(MongoEntryTransformer.transform_ingoing(news))
class MongoEntryTransformer:
@staticmethod
def transform_ingoing(news: News) -> dict:
transport_object = news.dict()
transport_object["_id"] = news.id
del transport_object["id"]
return transport_object
@staticmethod
def transform_outgoing(data: dict) -> News:
return News(
id=data["_id"],
title=data["title"],
date=data["date"],
text=data["text"],
source_url=data["source_url"],
)

View File

@ -1,40 +0,0 @@
from abc import ABC
from dataclasses import asdict, dataclass
from enum import Enum
class RelationshipRoleEnum(Enum):
STAKEHOLDER = ""
ORGANISATION = "ORGANISATION"
@dataclass
class CompayID:
district_court: str
hr_number: str
@dataclass
class Location:
city: str
street: str | None = None
house_number: str | None = None
zip_code: str | None = None
@dataclass
class CompanyRelationship(ABC):
role: RelationshipRoleEnum
location: Location
@dataclass
class Company:
id: CompayID
location: Location
name: str
last_update: str
relationships: list[CompanyRelationship]
def dict(self):
return asdict(self)

View File

@ -1,21 +0,0 @@
from News.utils.mongodb.mongo import MongoConnector
from Unternehmensregister.models.Company import Company
from Unternehmensregister.utils.CompanyServiceInterface import CompanyServiceInterface
class CompanyMongoService(CompanyServiceInterface):
def __init__(self, connector: MongoConnector):
self.collection = connector.database["companies"]
def get_all(self) -> list[Company]:
result = self.collection.find()
return list(result)
def get_by_id(self, id: str) -> Company | None:
result = list(self.collection.find({"id": id}))
if len(result) == 1:
return result[0]
return None
def insert(self, company: Company):
return self.collection.insert_one(company.dict())

View File

@ -1,14 +0,0 @@
from abc import ABC
from models import Company
class CompanyServiceInterface(ABC):
def get_all(self) -> list[Company.Company]:
raise NotImplementedError()
def get_by_id(self, id: Company.CompayID) -> Company.Company | None:
raise NotImplementedError()
def insert(self, company: Company.Company):
raise NotImplementedError()