checkpoint(db): Refactor mongo utils, extract postgres entities from Juptyer

This commit is contained in:
TrisNol 2023-08-11 15:12:18 +02:00
parent d493fd0978
commit d565770b99
19 changed files with 866 additions and 441 deletions

View File

@ -8,7 +8,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"import configuration as conf\n", "import configuration as conf\n",
"import aki_prj23_transparenzregister.utils.mongo as mongo" "import aki_prj23_transparenzregister.utils.mongo.connector as mongo"
] ]
}, },
{ {
@ -39,7 +39,9 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# Create Connection-Object\n", "# Create Connection-Object\n",
"MongoObject=mongo.MongoConnection(conf.HOSTNAME,conf.DATABASE,conf.PORT,conf.USERNAME,conf.PASSWORD)" "MongoObject = mongo.MongoConnection(\n",
" conf.HOSTNAME, conf.DATABASE, conf.PORT, conf.USERNAME, conf.PASSWORD\n",
")"
] ]
}, },
{ {
@ -104,7 +106,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"cursor=CollNews.find({'date':'2023-06-16T13:29:18.901+02:00'})\n", "cursor = CollNews.find({\"date\": \"2023-06-16T13:29:18.901+02:00\"})\n",
"NewsList = list(cursor)" "NewsList = list(cursor)"
] ]
}, },

View File

@ -8,7 +8,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"import configurationFH as conf # hier wird die Configuration für das K8 cluster verwendet\n", "import configurationFH as conf # hier wird die Configuration für das K8 cluster verwendet\n",
"import aki_prj23_transparenzregister.utils.mongo as mongo" "import aki_prj23_transparenzregister.utils.mongo.connector as mongo"
] ]
}, },
{ {
@ -39,7 +39,9 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# Create Connection-Object\n", "# Create Connection-Object\n",
"MongoObject=mongo.MongoConnection(conf.HOSTNAME,conf.DATABASE,conf.PORT,conf.USERNAME,conf.PASSWORD)" "MongoObject = mongo.MongoConnection(\n",
" conf.HOSTNAME, conf.DATABASE, conf.PORT, conf.USERNAME, conf.PASSWORD\n",
")"
] ]
}, },
{ {
@ -191,10 +193,10 @@
"\n", "\n",
"# select the news article\n", "# select the news article\n",
"data = NewsList[212]\n", "data = NewsList[212]\n",
"jsonID=data['_id']\n", "jsonID = data[\"_id\"]\n",
"\n", "\n",
"# add the new attribute\n", "# add the new attribute\n",
"data['company'] = '[Deutsche Bahn, GDL]'\n", "data[\"company\"] = \"[Deutsche Bahn, GDL]\"\n",
"\n", "\n",
"print(data)" "print(data)"
] ]
@ -241,8 +243,8 @@
"CollNews.update_one(\n", "CollNews.update_one(\n",
" {\"_id\": data[\"_id\"]},\n", " {\"_id\": data[\"_id\"]},\n",
" {\"$set\": data},\n", " {\"$set\": data},\n",
" upsert=True # Setze upsert=True, um das Dokument einzufügen, falls es noch nicht existiert\n", " upsert=True, # Setze upsert=True, um das Dokument einzufügen, falls es noch nicht existiert\n",
")\n" ")"
] ]
}, },
{ {

View File

@ -1,10 +0,0 @@
1. Orga:
- Markdown Templates:
- Meeting Notes (Tristan)
- Requirements Liste, Richtung Pflichtenheft (Tim)
- Zeitplan und Meilensteine (Sebastian)
- Zugang GitHub Repo (Philipp)
2. Recherche:
- Rechtliche Rahmenbedingungen (Sascha)
- Datenanforderungen - Welche Informtionen/Entitäten benötigen wir? (Kim)
- APIs & Datenquellen (Philipp, Tristan)

9
docker-compose.yml Normal file
View File

@ -0,0 +1,9 @@
version: '3.8'
services:
postgres:
image: postgres
restart: always
environment:
POSTGRES_PASSWORD: postgres
ports:
- 5432:5432

857
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -27,9 +27,11 @@ readme = "README.md"
version = "0.1.0" version = "0.1.0"
[tool.poetry.dependencies] [tool.poetry.dependencies]
SQLAlchemy = "^2.0.19"
loguru = "^0.7.0" loguru = "^0.7.0"
matplotlib = "^3.7.1" matplotlib = "^3.7.1"
plotly = "^5.14.1" plotly = "^5.14.1"
psycopg2 = "^2.9.7"
pymongo = "^4.4.1" pymongo = "^4.4.1"
python = "^3.11" python = "^3.11"
seaborn = "^0.12.2" seaborn = "^0.12.2"

View File

@ -0,0 +1 @@
"""App configuration."""

View File

@ -0,0 +1,13 @@
"""Config interface templates."""
from dataclasses import dataclass
@dataclass
class PostgreConnectionString:
"""PostgreSQL Connection String args wrapper."""
username: str
password: str | None
host: str
database: str
port: int | None

View File

@ -0,0 +1 @@
"""MongoDB related modules."""

View File

@ -1,6 +1,6 @@
"""CompanyMongoService.""" """CompanyMongoService."""
from aki_prj23_transparenzregister.models.company import Company, CompanyID from aki_prj23_transparenzregister.models.company import Company, CompanyID
from aki_prj23_transparenzregister.utils.mongo import MongoConnector from aki_prj23_transparenzregister.utils.mongo.connector import MongoConnector
class CompanyMongoService: class CompanyMongoService:

View File

@ -1,6 +1,6 @@
"""MongoNewsService.""" """MongoNewsService."""
from aki_prj23_transparenzregister.models.news import News from aki_prj23_transparenzregister.models.news import News
from aki_prj23_transparenzregister.utils.mongo import MongoConnector from aki_prj23_transparenzregister.utils.mongo.connector import MongoConnector
class MongoNewsService: class MongoNewsService:

View File

@ -0,0 +1 @@
"""PostgreSQL related modules."""

View File

@ -0,0 +1,36 @@
"""Module containing connection utils for PostgreSQL DB."""
from sqlalchemy import create_engine
from sqlalchemy.engine import URL
from sqlalchemy.orm import declarative_base
from aki_prj23_transparenzregister.config.config_template import PostgreConnectionString
def get_engine(conn_args: PostgreConnectionString):
"""Creates an engine connected to a Postgre instance.
Returns:
sqlalchemy.engine: connection engine
"""
url = URL.create(
drivername="postgresql",
username=conn_args.username,
password=conn_args.password,
host=conn_args.host,
database=conn_args.database,
port=conn_args.port,
)
return create_engine(url)
if __name__ == "__main__":
"""Main flow creating tables"""
conn_args = PostgreConnectionString(
"postgres", "postgres", "localhost", "postgres", 5432
)
engine = get_engine(conn_args)
with engine.connect() as connection:
Base = declarative_base()
Base.metadata.create_all(engine)

View File

@ -0,0 +1,189 @@
"""ORM entities for Prod. DB."""
import enum
from datetime import datetime
from sqlalchemy import (
Column,
DateTime,
Enum,
Float,
ForeignKey,
ForeignKeyConstraint,
Integer,
PrimaryKeyConstraint,
String,
)
from sqlalchemy.orm import (
declarative_base,
mapped_column,
relationship,
)
# # create an object *district_court* which inherits attributes from Base-class
Base = declarative_base()
class DistrictCourt(Base): # type: ignore
"""DistrictCourt."""
__tablename__ = "district_court"
id = Column(Integer(), primary_key=True)
city = Column(String(100), nullable=False)
name = Column(String(100), nullable=False)
class Company(Base): # type: ignore
"""Company."""
__tablename__ = "company"
hr = Column(Integer(), nullable=False, primary_key=True)
court_id = Column(
Integer, ForeignKey("district_court.id"), nullable=False, primary_key=True
)
name = Column(String(100), nullable=False)
street = Column(String(100), nullable=False)
zip_code = Column(String(5), nullable=False)
city = Column(String(100), nullable=False)
sector = Column(String(100), nullable=False)
__table_args__ = (
PrimaryKeyConstraint("hr", "court_id", name="pk_company_hr_court"),
)
class Finance(Base): # type: ignore
"""Finance."""
__tablename__ = "finance"
id = Column(Integer, primary_key=True)
company_hr = Column(Integer)
company_court = Column(Integer)
date = Column(DateTime, default=datetime.now)
total_volume = Column(Float)
ebit = Column(Float)
ebitda = Column(Float)
ebit_margin = Column(Float)
total_balance = Column(Float)
equity = Column(Float)
debt = Column(Float)
return_on_equity = Column(Float)
capital_turnover_rate = Column(Float)
company = relationship("Company")
__table_args__ = (
ForeignKeyConstraint(
[company_hr, company_court], [Company.hr, Company.court_id]
),
)
class SentimentTypeEnum(enum.Enum):
"""SentimentTypeEnum."""
employee_voting = "employee_voting"
sustainability = "sustainability"
environmental_aspects = "environmental_aspects"
perception = "perception"
class Sentiment(Base): # type: ignore
"""Sentiment."""
__tablename__ = "sentiment"
id = Column(Integer(), primary_key=True)
# company_hr = mapped_column(ForeignKey("company.hr"))
# company_court = mapped_column(ForeignKey("company.court_id"))
company_hr = Column(Integer)
company_court = Column(Integer)
date = Column(DateTime(), default=datetime.now)
sentiment_type = Column(Enum(SentimentTypeEnum), nullable=False)
value = Column(Float(), nullable=False)
source = Column(String(100))
sentiment = relationship("Company")
__table_args__ = (
ForeignKeyConstraint(
[company_hr, company_court], [Company.hr, Company.court_id]
),
)
# create person object
class Person(Base): # type: ignore
"""Person."""
__tablename__ = "person"
id = Column(Integer(), primary_key=True)
name = Column(String(100), nullable=False)
surname = Column(String(100), nullable=False)
works_for = Column(String(100))
class RelationTypeEnum(enum.Enum):
"""RelationTypeEnum."""
executive = "Executive"
auditor = "Auditor"
supervisory_board = "Supervisory_Board"
managing_director = "Managing_Directory"
authorized_representative = "Authorized_Representative"
final_auditor = "Final_Auditor"
# create own relation type and person_relation object
class PersonRelation(Base): # type: ignore
"""PersonRelation."""
__tablename__ = "person_relation"
id = Column(Integer(), primary_key=True)
# company_hr = mapped_column(ForeignKey("company.hr"))
# company_court = mapped_column(ForeignKey("company.court_id"))
company_hr = Column(Integer)
company_court = Column(Integer)
person_id = mapped_column(ForeignKey("person.id"))
date_from = Column(DateTime(), default=datetime.now)
date_to = Column(DateTime(), default=datetime.now)
relation = Column(Enum(RelationTypeEnum), nullable=False)
# company = relationship("Company")
# person = relationship("Person", foreign_keys=[person_id])
# company = relationship('Company', foreign_keys=[company_hr,company_court])
__table_args__ = (
ForeignKeyConstraint(
[company_hr, company_court], [Company.hr, Company.court_id]
),
)
class RelationTypeCompanyEnum(enum.Enum):
"""RelationTypeCompanyEnum."""
participates_with = "participates_with"
has_shares_of = "has_shares_of"
is_supplied_by = "is_supplied_by"
works_with = "works_with"
# create own relation type and company_relation object
class CompanyRelation(Base): # type: ignore
"""CompanyRelation."""
__tablename__ = "company_relation"
id = Column(Integer(), primary_key=True)
company1_id = Column(Integer, nullable=False)
company2_id = Column(Integer, nullable=False)
date_from = Column(DateTime(), default=datetime.now)
date_to = Column(DateTime(), default=datetime.now)
relation = Column(Enum(RelationTypeCompanyEnum), nullable=False)
# company = relationship("Company")
__table_args__ = {"extend_existing": True}

View File

@ -0,0 +1 @@
"""Mongo utils module."""

View File

@ -4,7 +4,7 @@ from unittest.mock import Mock
import pytest import pytest
from aki_prj23_transparenzregister.models.company import Company from aki_prj23_transparenzregister.models.company import Company
from aki_prj23_transparenzregister.utils.company_mongo_service import ( from aki_prj23_transparenzregister.utils.mongo.company_mongo_service import (
CompanyMongoService, CompanyMongoService,
) )
@ -21,7 +21,8 @@ def mock_mongo_connector(mocker) -> Mock:
""" """
mock = Mock() mock = Mock()
mocker.patch( mocker.patch(
"aki_prj23_transparenzregister.utils.mongo.MongoConnector", return_value=mock "aki_prj23_transparenzregister.utils.mongo.connector.MongoConnector",
return_value=mock,
) )
return mock return mock

View File

@ -1,6 +1,9 @@
from unittest.mock import patch from unittest.mock import patch
from aki_prj23_transparenzregister.utils.mongo import MongoConnection, MongoConnector from aki_prj23_transparenzregister.utils.mongo.connector import (
MongoConnection,
MongoConnector,
)
def test_get_conn_string_no_credentials(): def test_get_conn_string_no_credentials():

View File

@ -3,7 +3,7 @@ from unittest.mock import Mock, patch
import pytest import pytest
from aki_prj23_transparenzregister.models.news import News from aki_prj23_transparenzregister.models.news import News
from aki_prj23_transparenzregister.utils.news_mongo_service import ( from aki_prj23_transparenzregister.utils.mongo.news_mongo_service import (
MongoEntryTransformer, MongoEntryTransformer,
MongoNewsService, MongoNewsService,
) )
@ -21,7 +21,8 @@ def mock_mongo_connector(mocker) -> Mock:
""" """
mock = Mock() mock = Mock()
mocker.patch( mocker.patch(
"aki_prj23_transparenzregister.utils.mongo.MongoConnector", return_value=mock "aki_prj23_transparenzregister.utils.mongo.connector.MongoConnector",
return_value=mock,
) )
return mock return mock
@ -61,7 +62,7 @@ def test_get_by_id_with_result(mock_mongo_connector, mock_collection):
service = MongoNewsService(mock_mongo_connector) service = MongoNewsService(mock_mongo_connector)
with patch( with patch(
"aki_prj23_transparenzregister.utils.news_mongo_service.MongoEntryTransformer.transform_outgoing" "aki_prj23_transparenzregister.utils.mongo.news_mongo_service.MongoEntryTransformer.transform_outgoing"
) as mock_out: ) as mock_out:
mock_collection.find.return_value = [{}] mock_collection.find.return_value = [{}]
mock_out.return_value = {} mock_out.return_value = {}
@ -81,7 +82,7 @@ def test_insert(mock_mongo_connector, mock_collection):
service = MongoNewsService(mock_mongo_connector) service = MongoNewsService(mock_mongo_connector)
with patch( with patch(
"aki_prj23_transparenzregister.utils.news_mongo_service.MongoEntryTransformer.transform_ingoing" "aki_prj23_transparenzregister.utils.mongo.news_mongo_service.MongoEntryTransformer.transform_ingoing"
) as mock_in: ) as mock_in:
mock_collection.insert_one.return_value = {} mock_collection.insert_one.return_value = {}
mock_in.return_value = {} mock_in.return_value = {}