mirror of
https://github.com/fhswf/aki_prj23_transparenzregister.git
synced 2025-06-22 04:43:54 +02:00
Added longitude/latitude and positional accuracy to the company data (#180)
This commit is contained in:
794
poetry.lock
generated
794
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@ -33,7 +33,6 @@ description = "Data Integration, Extraction, and Visualization using Text Mining
|
||||
documentation = "https://cuddly-waffle-r416zgy.pages.github.io/"
|
||||
homepage = "https://cuddly-waffle-r416zgy.pages.github.io/"
|
||||
keywords = ["deutschland", "economy", "transparenzregister", "dataintegration", "handelsregister"]
|
||||
|
||||
maintainers = [
|
||||
"Philipp Horstenkamp <philipp@horstenkamp.de>",
|
||||
"Tristan Nolde <contact@trisnol.dev>",
|
||||
@ -49,8 +48,8 @@ repository = "https://github.com/fhswf/aki_prj23_transparenzregister"
|
||||
version = "0.1.0"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
aenum = "^3.1.15"
|
||||
SQLAlchemy = "^1.4.49"
|
||||
aenum = "^3.1.15"
|
||||
cachetools = "^5.3.1"
|
||||
dash = "^2.13.0"
|
||||
dash-auth = "^2.0.0"
|
||||
@ -58,6 +57,7 @@ dash-bootstrap-components = "^1.5.0"
|
||||
deutschland = {git = "https://github.com/TrisNol/deutschland.git", branch = "hotfix/python-3.11-support"}
|
||||
loguru = "^0.7.0"
|
||||
matplotlib = "^3.7.2"
|
||||
pgeocode = "^0.4.1"
|
||||
psycopg2-binary = "^2.9.7"
|
||||
pymongo = "^4.5.0"
|
||||
python = "^3.11"
|
||||
|
@ -1,7 +1,9 @@
|
||||
"""This module contains the data transfer and refinement functionalities between staging and production DB."""
|
||||
from datetime import date
|
||||
from functools import lru_cache
|
||||
from typing import Any
|
||||
|
||||
import pgeocode
|
||||
import sqlalchemy as sa
|
||||
from cachetools import LRUCache, cached
|
||||
from loguru import logger
|
||||
@ -22,6 +24,8 @@ from aki_prj23_transparenzregister.utils.sql.connector import (
|
||||
)
|
||||
from aki_prj23_transparenzregister.utils.string_tools import simplify_string
|
||||
|
||||
nomi = pgeocode.Nominatim("de")
|
||||
|
||||
|
||||
class DataInvalidError(ValueError):
|
||||
"""This error is thrown if a db entry can't be parsed for the production db."""
|
||||
@ -192,6 +196,26 @@ def get_company_id(
|
||||
return company_id
|
||||
|
||||
|
||||
@lru_cache(1000)
|
||||
def get_geocodes(
|
||||
zip_code: str,
|
||||
) -> dict[str, float]:
|
||||
"""Adds additional geo positioning data to locations.
|
||||
|
||||
Args:
|
||||
zip_code: The zipcode where the company is located.
|
||||
|
||||
Returns:
|
||||
latitude, longitude and positional accuracy.
|
||||
"""
|
||||
if not zip_code:
|
||||
return {}
|
||||
zip_query = nomi.query_postal_code(zip_code)[["latitude", "longitude", "accuracy"]]
|
||||
if zip_query.isna().any():
|
||||
return {}
|
||||
return dict(zip_query[["latitude", "longitude"]], pos_accuracy=zip_query.accuracy)
|
||||
|
||||
|
||||
@logger.catch(level="WARNING", reraise=True)
|
||||
def add_company(company: dict[str, Any], db: Session) -> None:
|
||||
"""Add a company with all its data found in the mongodb company entry.
|
||||
@ -218,6 +242,7 @@ def add_company(company: dict[str, Any], db: Session) -> None:
|
||||
zip_code=simplify_string(location.get("zip_code")),
|
||||
street=simplify_string(location.get("street")),
|
||||
last_update=last_update,
|
||||
**get_geocodes(location.get("zip_code")), # type: ignore
|
||||
)
|
||||
db.add(company_entry)
|
||||
db.commit()
|
||||
|
@ -44,6 +44,10 @@ class Company(Base):
|
||||
street = sa.Column(sa.String(100), nullable=True)
|
||||
zip_code = sa.Column(sa.String(5), nullable=True)
|
||||
city = sa.Column(sa.String(100), nullable=True)
|
||||
longitude = sa.Column(sa.Float, nullable=True)
|
||||
latitude = sa.Column(sa.Float, nullable=True)
|
||||
pos_accuracy = sa.Column(sa.Float, nullable=True)
|
||||
|
||||
last_update = sa.Column(sa.Date, nullable=False)
|
||||
sector = sa.Column(sa.String(100), nullable=True)
|
||||
|
||||
|
@ -144,18 +144,24 @@ def full_db(empty_db: Session, finance_statements: list[dict[str, Any]]) -> Sess
|
||||
court_id=2,
|
||||
name="Some Company GmbH",
|
||||
street="Sesamstr.",
|
||||
zip_code="12345",
|
||||
zip_code="58644",
|
||||
city="TV City",
|
||||
last_update=datetime.date.fromisoformat("2023-01-01"),
|
||||
latitude=51.3246,
|
||||
longitude=7.6968,
|
||||
pos_accuracy=4.0,
|
||||
),
|
||||
entities.Company(
|
||||
hr="HRB 123",
|
||||
court_id=1,
|
||||
name="Other Company GmbH",
|
||||
street="Sesamstr.",
|
||||
zip_code="12345",
|
||||
zip_code="58636",
|
||||
city="TV City",
|
||||
last_update=datetime.date.fromisoformat("2023-01-01"),
|
||||
latitude=51.38,
|
||||
longitude=7.7032,
|
||||
pos_accuracy=4.0,
|
||||
),
|
||||
entities.Company(
|
||||
hr="HRB 12",
|
||||
|
@ -26,8 +26,11 @@ def test_get_company_data(full_db: Session) -> None:
|
||||
2: "Third Company GmbH",
|
||||
},
|
||||
"company_street": {0: "Sesamstr.", 1: "Sesamstr.", 2: None},
|
||||
"company_zip_code": {0: "12345", 1: "12345", 2: None},
|
||||
"company_zip_code": {0: "58644", 1: "58636", 2: None},
|
||||
"company_city": {0: "TV City", 1: "TV City", 2: None},
|
||||
"company_longitude": {0: 7.6968, 1: 7.7032, 2: None},
|
||||
"company_latitude": {0: 51.3246, 1: 51.38, 2: None},
|
||||
"company_pos_accuracy": {0: 4.0, 1: 4.0, 2: None},
|
||||
"company_last_update": {
|
||||
0: "2023-01-01",
|
||||
1: "2023-01-01",
|
||||
|
@ -177,12 +177,12 @@ def test_get_person_id_value_check(
|
||||
("name", "zip_code", "city", "id"),
|
||||
[
|
||||
("Some Company GmbH", "", "", 1),
|
||||
("Some Company GmbH", "12345", "", 1),
|
||||
("Some Company GmbH", "12345", "TV City", 1),
|
||||
("Some Company GmbH", "58644", "", 1),
|
||||
("Some Company GmbH", "58644", "TV City", 1),
|
||||
("Some Company GmbH", "", "TV City", 1),
|
||||
("Other Company GmbH", "", "", 2),
|
||||
("Other Company GmbH", "12345", "", 2),
|
||||
("Other Company GmbH", "12345", "TV City", 2),
|
||||
("Other Company GmbH", "58636", "", 2),
|
||||
("Other Company GmbH", "58636", "TV City", 2),
|
||||
("Other Company GmbH", "", "TV City", 2),
|
||||
("Third Company GmbH", "", "", 3),
|
||||
],
|
||||
@ -672,8 +672,11 @@ def test_relationships(documents: list[dict[str, Any]], full_db: Session) -> Non
|
||||
2: "Third Company GmbH",
|
||||
},
|
||||
"street": {0: "Sesamstr.", 1: "Sesamstr.", 2: None},
|
||||
"zip_code": {0: "12345", 1: "12345", 2: None},
|
||||
"zip_code": {0: "58644", 1: "58636", 2: None},
|
||||
"city": {0: "TV City", 1: "TV City", 2: None},
|
||||
"longitude": {0: 7.6968, 1: 7.7032, 2: None},
|
||||
"latitude": {0: 51.3246, 1: 51.38, 2: None},
|
||||
"pos_accuracy": {0: 4.0, 1: 4.0, 2: None},
|
||||
"last_update": {
|
||||
0: pd.Timestamp("2023-01-01 00:00:00"),
|
||||
1: pd.Timestamp("2023-01-01 00:00:00"),
|
||||
@ -1014,3 +1017,18 @@ def test_add_annual_report_financial_key_error(full_db: Session) -> None:
|
||||
{"financials": {"something-strange": 123.12}, "auditors": {}},
|
||||
db=full_db,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.working_on()
|
||||
@pytest.mark.parametrize(
|
||||
("zip_code", "results"),
|
||||
[
|
||||
("44809", {"latitude": 51.4997, "longitude": 7.1944, "pos_accuracy": 4.0}),
|
||||
(None, {}),
|
||||
("", {}),
|
||||
("60547", {}),
|
||||
("58590", {}),
|
||||
],
|
||||
)
|
||||
def test_get_geocodes(zip_code: str | None, results: dict) -> None:
|
||||
assert data_transfer.get_geocodes(zip_code) == results
|
||||
|
Reference in New Issue
Block a user