SQL fixes after new mongo ingest (#199)

This commit is contained in:
Philipp Horstenkamp 2023-10-06 18:22:19 +02:00 committed by GitHub
parent 8bb27f5195
commit b1ca268a62
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 181 additions and 202 deletions

1
.gitignore vendored
View File

@ -234,3 +234,4 @@ replay_pid*
secrets*.json
*.db-journal
*.db
remote.json

View File

@ -1,5 +1,6 @@
FROM python:3.11-slim as base
LABEL AUTHOR="AKI Projektseminar 23"
ENV SQLALCHEMY_SILENCE_UBER_WARNING="1"
ARG APP_HOME="transparenzregister"
WORKDIR /${APP_HOME}/

View File

@ -175,7 +175,6 @@ def kennzahlen_layout(selected_finance_df: pd.DataFrame) -> html:
"""Create metrics tab.
Args:
selected_company_id: Id of the chosen company in the dropdown.
selected_finance_df: A dataframe containing all available finance information of the companies.
Returns:

View File

@ -28,8 +28,8 @@ def layout(value: str = "1") -> html:
# get all necessary data of the selected person
selected_person_stats = data_elements.get_person_data(session).loc[person_id]
selected_person_name = (
selected_person_stats["person_name"]
selected_person_stats["person_firstname"]
+ " "
+ selected_person_stats["person_surname"]
+ selected_person_stats["person_lastname"]
)
return header_elements.create_selection_header(selected_person_name)

View File

@ -18,8 +18,11 @@ from aki_prj23_transparenzregister.config.config_providers import (
ConfigProvider,
get_config_provider,
)
from aki_prj23_transparenzregister.models.company import CapitalTypeEnum, CurrencyEnum
from aki_prj23_transparenzregister.utils.enum_types import RelationTypeEnum
from aki_prj23_transparenzregister.models.company import (
CapitalTypeEnum,
CompanyRelationshipEnum,
CurrencyEnum,
)
from aki_prj23_transparenzregister.utils.logger_config import (
add_logger_options_to_argparse,
configer_logger,
@ -46,6 +49,10 @@ class DataInvalidError(ValueError):
super().__init__(message)
class CompanyNotFoundError(ValueError):
"""A company partner is missing."""
def _refine_district_court_entry(name: str, city: str | None) -> tuple[str, str]:
"""Refines the district court entry and tests for consistency.
@ -105,8 +112,8 @@ def _read_person_id(
"""
return (
db.query(entities.Person.id)
.filter(entities.Person.name == name)
.filter(entities.Person.surname == surname)
.filter(entities.Person.firstname == name)
.filter(entities.Person.lastname == surname)
.filter(entities.Person.date_of_birth == date_of_birth)
.scalar()
)
@ -136,9 +143,14 @@ def get_district_court_id(name: str, city: str | None, db: Session) -> int:
return court.id # type: ignore
@cached(cache=LRUCache(maxsize=2000), key=lambda name, surname, date_of_birth, db: hash((name, surname, date_of_birth))) # type: ignore
@cached(
cache=LRUCache(maxsize=2000),
key=lambda firstname, lastname, date_of_birth, db: hash(
(firstname, lastname, date_of_birth)
),
) # type: ignore
def get_person_id(
name: str, surname: str, date_of_birth: date | str | None, db: Session
firstname: str, lastname: str, date_of_birth: date | str | None, db: Session
) -> int:
"""Identifies the id of and court.
@ -146,8 +158,8 @@ def get_person_id(
A lru_cache is used to increase the speed of this application.
Args:
name: The first name of the person.
surname: The last name of the person.
firstname: The first name of the person.
lastname: The last name of the person.
date_of_birth: The date the person was born.
db: A session to connect to an SQL db via SQLAlchemy.
@ -156,14 +168,19 @@ def get_person_id(
"""
if isinstance(date_of_birth, str) and date_of_birth:
date_of_birth = date.fromisoformat(date_of_birth)
if not name or not surname or not date_of_birth:
if not firstname or not lastname or not date_of_birth:
raise DataInvalidError(
f'At least one of the three values name: "{name}", surname: "{surname}" or date_of_birth: "{date_of_birth}" is empty.'
f'At least one of the three values name: "{firstname}", '
f'surname: "{lastname}" or date_of_birth: "{date_of_birth}" is empty.'
)
assert isinstance(date_of_birth, date) # noqa: S101
if (person_id := _read_person_id(name, surname, date_of_birth, db)) is not None:
if (
person_id := _read_person_id(firstname, lastname, date_of_birth, db)
) is not None:
return person_id
person = entities.Person(name=name, surname=surname, date_of_birth=date_of_birth)
person = entities.Person(
firstname=firstname, lastname=lastname, date_of_birth=date_of_birth
)
db.add(person)
db.commit()
return person.id # type: ignore
@ -207,7 +224,9 @@ def get_company_id(
.scalar() # todo ensure uniqueness
)
if company_id is None:
raise KeyError(f"No corresponding company could be found to {name}.")
raise CompanyNotFoundError(
f"No corresponding company could be found to {name}."
)
return company_id
@ -360,6 +379,81 @@ def company_relation_missing(
db.add(entities.MissingCompany(name=name, city=city, zip_code=zip_code))
def add_person_relation(person: dict[str, Any], company_id: int, db: Session) -> None:
"""Adds a person to company relationship.
Args:
person: The relationship and the relationship partner.
company_id: The company id the relations is rooted in.
db: A session to connect to an SQL db via SQLAlchemy.
"""
date_of_brith: str = person["date_of_birth"]
try:
person_id = get_person_id(
**person["name"],
date_of_birth=date_of_brith,
db=db,
)
except DataInvalidError:
# logger.exception("Test except: ")
if date_of_brith:
# print("Break")
raise
# TODO enable the following line
# logger.warning(f"No date of birth birth for {name['firstname']} {name['lastname']}")
db.rollback()
return
except TypeError as error:
# TODO remove unhashable type catcher
if "unhashable type: 'dict'" in str(error):
return
raise
except Exception:
logger.exception("Test except")
raise
relation = entities.PersonRelation(
person_id=person_id,
company_id=company_id,
relation=person.get("role"),
)
db.add(relation)
def add_company_relation(
company_relation: dict[str, Any], company_id: int, db: Session
) -> None:
"""Adds a relationship from company to another company.
Args:
company_relation: The relationship and the relationship partner.
company_id: The company id the relations is rooted in.
db: A session to connect to an SQL db via SQLAlchemy.
"""
try:
relation_to: int = get_company_id(
company_relation["name"],
**company_relation["location"],
db=db,
)
except CompanyNotFoundError as err:
logger.debug(err)
company_relation_missing(
company_relation["name"], **company_relation["location"], db=db
)
return
if company_id == relation_to:
raise DataInvalidError(
"For a valid relation both parties can't be the same entity."
)
relation = entities.CompanyRelation(
company_id=company_id,
relation=company_relation.get("role"),
company2_id=relation_to,
)
db.add(relation)
@logger.catch(level="WARNING", reraise=True)
def add_relationship(
relationship: dict[str, Any], company_id: int, db: Session
@ -371,46 +465,18 @@ def add_relationship(
company_id: The company id the relations is rooted in.
db: A session to connect to an SQL db via SQLAlchemy.
"""
relation_type = RelationTypeEnum.get_enum_from_name(relationship.get("role"))
relation: entities.CompanyRelation | entities.PersonRelation
if "date_of_birth" in relationship:
name = relationship["name"]
date_of_brith: str = relationship["date_of_birth"]
person_id = get_person_id(
name["firstname"],
name["lastname"],
date_of_brith,
db,
)
relation = entities.PersonRelation(
person_id=person_id,
company_id=company_id,
relation=relation_type,
)
else:
partner_type = CompanyRelationshipEnum(relationship["type"])
if partner_type == CompanyRelationshipEnum.PERSON:
add_person_relation(relationship, company_id, db)
elif partner_type == CompanyRelationshipEnum.COMPANY:
add_company_relation(relationship, company_id, db)
try:
relation_to: int = get_company_id(
relationship["description"],
**relationship["location"],
db=db,
)
except KeyError as err:
company_relation_missing(
relationship["description"], **relationship["location"], db=db
)
logger.warning(err)
return
if company_id == relation_to:
raise DataInvalidError(
"For a valid relation both parties can't be the same entity."
)
relation = entities.CompanyRelation(
company_id=company_id,
relation=relation_type,
company2_id=relation_to,
)
db.add(relation)
db.commit()
except Exception:
logger.exception("Debug")
# print("break")
add_company_relation(relationship, company_id, db)
raise
def add_relationships(companies: list[dict[str, dict]], db: Session) -> None:
@ -561,3 +627,8 @@ def transfer_data_cli() -> None:
parsed = parser.parse_args(sys.argv[1:])
configer_logger(namespace=parsed)
transfer_data(get_config_provider(parsed.config))
if __name__ == "__main__":
configer_logger(level="info", path="")
transfer_data(get_config_provider("secrets.json"))

View File

@ -2,64 +2,6 @@
import enum
class RelationTypeEnum(enum.IntEnum):
"""RelationTypeEnum."""
GESCHAEFTSFUEHRER = enum.auto()
KOMMANDITIST = enum.auto()
VORSTAND = enum.auto()
PROKURIST = enum.auto()
LIQUIDATOR = enum.auto()
INHABER = enum.auto()
PERSOENLICH_HAFTENDER_GESELLSCHAFTER = enum.auto()
PARTNER = enum.auto()
DIREKTOR = enum.auto()
RECHTSNACHFOLGER = enum.auto()
ORGANISATION = enum.auto()
@staticmethod
def get_enum_from_name(relation_name: str | None) -> "RelationTypeEnum":
"""Translates relation name into a RelationTypeEnum.
If no translation can be found a warning is given.
Args:
relation_name: The name of the relation to be translated.
Returns:
The identified translation or None if no translation can be found.
"""
if relation_name is None:
raise ValueError("A relation type needs to be given.")
relation_name = (
relation_name.strip()
.replace("(in)", "")
.replace("(r)", "r")
.strip()
.lower()
)
name = {
"geschäftsführer": RelationTypeEnum.GESCHAEFTSFUEHRER,
"kommanditist": RelationTypeEnum.KOMMANDITIST,
"vorstand": RelationTypeEnum.VORSTAND,
"vorstandsvorsitzender": RelationTypeEnum.VORSTAND,
"prokurist": RelationTypeEnum.PROKURIST,
"liquidator": RelationTypeEnum.LIQUIDATOR,
"inhaber": RelationTypeEnum.INHABER,
"persönlich haftender gesellschafter": RelationTypeEnum.PERSOENLICH_HAFTENDER_GESELLSCHAFTER,
"organisation": RelationTypeEnum.ORGANISATION,
"partner": RelationTypeEnum.PARTNER,
"direktor": RelationTypeEnum.DIREKTOR,
"geschäftsführender direktor": RelationTypeEnum.DIREKTOR,
"mitglied des leitungsorgans": RelationTypeEnum.VORSTAND,
"rechtsnachfolger": RelationTypeEnum.RECHTSNACHFOLGER,
}.get(relation_name)
if name is not None:
return name
raise ValueError(f'Relation type "{relation_name}" is not yet implemented!')
class SentimentTypeEnum(enum.Enum):
"""SentimentTypeEnum."""

View File

@ -7,9 +7,9 @@ from aki_prj23_transparenzregister.models.company import (
CapitalTypeEnum,
CompanyTypeEnum,
CurrencyEnum,
RelationshipRoleEnum,
)
from aki_prj23_transparenzregister.utils.enum_types import (
RelationTypeEnum,
SentimentTypeEnum,
)
from aki_prj23_transparenzregister.utils.sql.connector import Base
@ -83,12 +83,12 @@ class Person(Base):
"""Person."""
__tablename__ = "person"
__table_args__ = (sa.UniqueConstraint("name", "surname", "date_of_birth"),)
__table_args__ = (sa.UniqueConstraint("firstname", "lastname", "date_of_birth"),)
# TODO add a constraint that asks for a minlength of 2 for name and surname
id = sa.Column(sa.Integer, primary_key=True)
name = sa.Column(sa.String(100), nullable=False)
surname = sa.Column(sa.String(100), nullable=False)
firstname = sa.Column(sa.String(100), nullable=False)
lastname = sa.Column(sa.String(100), nullable=False)
date_of_birth = sa.Column(sa.Date, nullable=False)
works_for = sa.Column(sa.String(100), nullable=True)
@ -153,7 +153,7 @@ class Relation(Base):
date_from = sa.Column(sa.DateTime(timezone=True), nullable=True)
date_to = sa.Column(sa.DateTime(timezone=True), nullable=True)
relation = sa.Column(sa.Enum(RelationTypeEnum), nullable=False)
relation = sa.Column(sa.Enum(RelationshipRoleEnum), nullable=False)
# create own relation type and person_relation object

View File

@ -109,28 +109,28 @@ def full_db(empty_db: Session, finance_statements: list[dict[str, Any]]) -> Sess
entities.DistrictCourt(name="Amtsgericht Bochum", city="Bochum"),
entities.DistrictCourt(name="Amtsgericht Dortmund", city="Dortmund"),
entities.Person(
name="Max",
surname="Mustermann",
firstname="Max",
lastname="Mustermann",
date_of_birth=datetime.date(2023, 1, 1),
),
entities.Person(
name="Sabine",
surname="Mustermann",
firstname="Sabine",
lastname="Mustermann",
date_of_birth=datetime.date(2023, 1, 1),
),
entities.Person(
name="Some Firstname",
surname="Some Surname",
firstname="Some Firstname",
lastname="Some Surname",
date_of_birth=datetime.date(2023, 1, 1),
),
entities.Person(
name="Some Firstname",
surname="Some Surname",
firstname="Some Firstname",
lastname="Some Surname",
date_of_birth=datetime.date(2023, 1, 2),
),
entities.Person(
name="Other Firstname",
surname="Other Surname",
firstname="Other Firstname",
lastname="Other Surname",
date_of_birth=datetime.date(2023, 1, 2),
),
]

View File

@ -56,6 +56,14 @@ def test_main_of_app_env(
MagicMock()
monkeypatch.setattr(sys, "argv", [sys.argv[0], "ENV" if upper else "env"])
mocked = mocker.patch("aki_prj23_transparenzregister.ui.app.Dash.run")
mocked_config = mocker.patch(
"aki_prj23_transparenzregister.ui.app.get_config_provider"
)
mocked_connector = mocker.patch(
"aki_prj23_transparenzregister.ui.app.connector.get_session"
)
mocked.return_value = None
mocked_config.return_value = None
mocked_connector.return_value = None
app.main()
mocked.assert_called_once()

View File

@ -16,10 +16,12 @@ from sqlalchemy.orm import Session
from aki_prj23_transparenzregister.models.company import (
CapitalTypeEnum,
CompanyRelationshipEnum,
CompanyTypeEnum,
CurrencyEnum,
)
from aki_prj23_transparenzregister.utils import data_transfer
from aki_prj23_transparenzregister.utils.data_transfer import CompanyNotFoundError
from aki_prj23_transparenzregister.utils.sql import entities
@ -219,7 +221,7 @@ def test_get_company_id_not_found(
full_db: Session,
) -> None:
"""Test the accessing of missing companies."""
with pytest.raises(KeyError):
with pytest.raises(CompanyNotFoundError):
data_transfer.get_company_id(name, zip_code, city, full_db)
@ -431,35 +433,21 @@ def test_add_companies_corrupted_data(
assert spy_debug.call_count == len(companies) - 1
@pytest.mark.parametrize("company_id", list(range(5)))
def test_add_relationship_no_relation(company_id: int, full_db: Session) -> None:
"""Tests if an error is thrown if the relation type/role is not defined."""
with pytest.raises(ValueError, match="A relation type needs to be given."):
data_transfer.add_relationship({}, company_id, full_db)
@pytest.mark.parametrize("company_id", list(range(5)))
def test_add_relationship_unknown_relation(company_id: int, full_db: Session) -> None:
"""Tests if an error is thrown if the relation type/role is unknown."""
with pytest.raises(ValueError, match="Relation type .* is not yet implemented!"):
data_transfer.add_relationship(
{"role": "something strange"}, company_id, full_db
)
@pytest.mark.parametrize("company_id", [1, 2, 3])
@pytest.mark.parametrize(
("firstname", "surname", "date_of_birth"),
("firstname", "lastname", "date_of_birth"),
[
("Max", "Mustermann", "2023-01-01"),
("Some Firstname", "Some Surname", "2023-01-01"),
("Other Firstname", "Other Surname", "1900-01-02"),
],
)
@pytest.mark.parametrize("role", ["Partner", "direktor", "liquidator"])
@pytest.mark.parametrize(
"role", ["Prokurist(in)", "Geschäftsführer(in)", "Geschäftsführer"]
)
def test_add_relationship_person( # noqa: PLR0913
firstname: str,
surname: str,
lastname: str,
date_of_birth: str,
full_db: Session,
company_id: int,
@ -469,8 +457,9 @@ def test_add_relationship_person( # noqa: PLR0913
relation = {
"name": {
"firstname": firstname,
"lastname": surname,
"lastname": lastname,
},
"type": CompanyRelationshipEnum.PERSON.value,
"date_of_birth": date.fromisoformat(date_of_birth),
"role": role,
}
@ -483,7 +472,6 @@ def test_add_relationship_person( # noqa: PLR0913
[
("Max", None, "2023-01-01"),
(None, "Some Surname", "2023-01-01"),
("Other Firstname", "Other Surname", None),
],
)
@pytest.mark.parametrize("role", ["Partner"])
@ -505,6 +493,7 @@ def test_add_relationship_person_missing_data( # noqa: PLR0913
},
"date_of_birth": date_of_birth if date_of_birth else None,
"role": role,
"type": CompanyRelationshipEnum.PERSON.value,
}
with pytest.raises(
data_transfer.DataInvalidError, match="At least one of the three values name:"
@ -534,12 +523,13 @@ def test_add_relationship_company(
"""Tests if a relationship to another company can be added."""
data_transfer.add_relationship(
{
"description": company_name,
"name": company_name,
"location": {
"zip_code": zip_code,
"city": city,
},
"role": "organisation",
"type": CompanyRelationshipEnum.COMPANY.value,
},
company_id,
full_db,
@ -569,12 +559,13 @@ def test_add_relationship_company_self_reference(
):
data_transfer.add_relationship(
{
"description": company_name,
"name": company_name,
"location": {
"zip_code": zip_code,
"city": city,
},
"role": "organisation",
"type": CompanyRelationshipEnum.COMPANY.value,
},
company_id,
full_db,
@ -597,7 +588,7 @@ def test_add_relationship_company_unknown(
mocker: MockerFixture,
) -> None:
"""Tests if a relationship to another company can be added."""
spy_warning = mocker.spy(data_transfer.logger, "warning")
spy_debug = mocker.spy(data_transfer.logger, "debug")
spy_info = mocker.spy(data_transfer.logger, "info")
data_transfer.add_relationship(
{
@ -607,11 +598,13 @@ def test_add_relationship_company_unknown(
"city": city,
},
"role": "organisation",
"type": CompanyRelationshipEnum.COMPANY.value,
"name": "company name",
},
company_id,
full_db,
)
spy_warning.assert_called_once()
spy_debug.assert_called_once()
spy_info.assert_not_called()
@ -622,6 +615,7 @@ def test_add_relationships_none(empty_relations: list, full_db: Session) -> None
# noinspection SpellCheckingInspection
@pytest.mark.working_on()
@pytest.mark.parametrize(
"documents",
[
@ -649,28 +643,33 @@ def test_add_relationships_none(empty_relations: list, full_db: Session) -> None
"date_of_birth": "1961-02-09",
"location": {"city": "Stuttgart"},
"role": "Geschäftsführer",
"type": CompanyRelationshipEnum.PERSON.value,
},
{
"name": {"firstname": "First Person", "lastname": "Jifpa"},
"date_of_birth": "1976-04-20",
"location": {"city": "Stuttgart"},
"role": "Geschäftsführer",
"type": CompanyRelationshipEnum.PERSON.value,
},
{
"name": {"firstname": "", "lastname": "Jiapa"},
"date_of_birth": "1976-04-20",
"location": {"city": "Stuttgart"},
"role": "Geschäftsführer",
"type": CompanyRelationshipEnum.PERSON.value,
},
{
"name": {"firstname": "Something", "lastname": ""},
"date_of_birth": "12i3u",
"location": {"city": "Stuttgart"},
"role": "Geschäftsführer",
"type": CompanyRelationshipEnum.PERSON.value,
},
{
"name": {"firstname": "First Person", "lastname": "Jipha"},
"name": {"lastname": "Jipha"},
"date_of_birth": "1976-04-20",
"type": CompanyRelationshipEnum.PERSON.value,
},
],
"yearly_results": {},
@ -730,7 +729,7 @@ def test_relationships(documents: list[dict[str, Any]], full_db: Session) -> Non
"company_id": {0: 1, 1: 1},
"date_from": {0: pd.NaT, 1: pd.NaT},
"date_to": {0: pd.NaT, 1: pd.NaT},
"relation": {0: "GESCHAEFTSFUEHRER", 1: "GESCHAEFTSFUEHRER"},
"relation": {0: "Geschäftsführer", 1: "Geschäftsführer"},
}
),
)
@ -739,7 +738,7 @@ def test_relationships(documents: list[dict[str, Any]], full_db: Session) -> Non
pd.DataFrame(
{
"id": {0: 1, 1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7},
"name": {
"firstname": {
0: "Max",
1: "Sabine",
2: "Some Firstname",
@ -748,7 +747,7 @@ def test_relationships(documents: list[dict[str, Any]], full_db: Session) -> Non
5: "Second person",
6: "First Person",
},
"surname": {
"lastname": {
0: "Mustermann",
1: "Mustermann",
2: "Some Surname",
@ -766,15 +765,7 @@ def test_relationships(documents: list[dict[str, Any]], full_db: Session) -> Non
5: pd.Timestamp("1961-02-09 00:00:00"),
6: pd.Timestamp("1976-04-20 00:00:00"),
},
"works_for": {
0: None,
1: None,
2: None,
3: None,
4: None,
5: None,
6: None,
},
"works_for": {_: None for _ in range(7)},
}
),
)
@ -1203,6 +1194,5 @@ def test_transfer_data_cli_env(
)
spy = mocker.spy(data_transfer, "transfer_data")
# with pytest.raises(KeyError):
data_transfer.transfer_data_cli()
spy.assert_called_once()

View File

@ -1,40 +1,7 @@
"""Tests for the enumeration types."""
import pytest
from aki_prj23_transparenzregister.utils import enum_types
def test_import() -> None:
"""Tests if enum_types can be imported."""
assert enum_types
@pytest.mark.parametrize("relation_name", ["Vorstand", "Prokurist", "Direktor"])
@pytest.mark.parametrize("changes", ["lower", "upper", None])
def test_relation_type_enum_from_string(
relation_name: str, changes: str | None
) -> None:
"""Tests the transformation of a name to an enumeration type."""
if changes == "lower":
relation_name = relation_name.lower()
elif changes == "upper":
relation_name = relation_name.upper()
assert isinstance(
enum_types.RelationTypeEnum.get_enum_from_name(relation_name),
enum_types.RelationTypeEnum,
)
@pytest.mark.parametrize("relation_name", ["does Not Exists", "Also not"])
@pytest.mark.parametrize("changes", ["lower", "upper", None])
def test_relation_type_enum_from_string_wrong(
relation_name: str, changes: str | None
) -> None:
"""Tests the transformation of a name to an enumeration type if no equivalent can be found."""
if changes == "lower":
relation_name = relation_name.lower()
elif changes == "upper":
relation_name = relation_name.upper()
with pytest.raises(ValueError, match='Relation type ".*" is not yet implemented!'):
enum_types.RelationTypeEnum.get_enum_from_name(relation_name)