Stop table reset of better persistent tables. (#373)

This commit is contained in:
Philipp Horstenkamp 2023-11-12 14:27:44 +01:00 committed by GitHub
parent d66e4e2b67
commit af8a907cf9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 108 additions and 41 deletions

View File

@ -61,6 +61,7 @@ dashvis = "^0.1.3"
datetime = "^5.2" datetime = "^5.2"
deutschland = {git = "https://github.com/TrisNol/deutschland.git", branch = "hotfix/python-3.11-support"} deutschland = {git = "https://github.com/TrisNol/deutschland.git", branch = "hotfix/python-3.11-support"}
frozendict = "^2.3.8" frozendict = "^2.3.8"
html5lib = "^1.1"
loguru = "^0.7.0" loguru = "^0.7.0"
matplotlib = "^3.8.1" matplotlib = "^3.8.1"
networkx = "^3.2.1" networkx = "^3.2.1"
@ -82,7 +83,6 @@ torchvision = {version = "*", source = "torch-cpu"}
tqdm = "^4.66.1" tqdm = "^4.66.1"
transformers = {version = "*", extras = ["torch"]} transformers = {version = "*", extras = ["torch"]}
xmltodict = "^0.13.0" xmltodict = "^0.13.0"
html5lib = "^1.1"
[tool.poetry.extras] [tool.poetry.extras]
ingest = ["selenium", "deutschland", "xmltodict", "html5lib"] ingest = ["selenium", "deutschland", "xmltodict", "html5lib"]
@ -143,7 +143,7 @@ copy-sql = "aki_prj23_transparenzregister.utils.sql.copy_sql:copy_db_cli"
data-processing = "aki_prj23_transparenzregister.utils.data_processing:cli" data-processing = "aki_prj23_transparenzregister.utils.data_processing:cli"
data-transformation = "aki_prj23_transparenzregister.utils.data_transfer:transfer_data_cli" data-transformation = "aki_prj23_transparenzregister.utils.data_transfer:transfer_data_cli"
fetch-news-schedule = "aki_prj23_transparenzregister.apps.fetch_news:fetch_news_cli" fetch-news-schedule = "aki_prj23_transparenzregister.apps.fetch_news:fetch_news_cli"
reset-sql = "aki_prj23_transparenzregister.utils.sql.connector:reset_all_tables_cli" reset-sql = "aki_prj23_transparenzregister.utils.sql.reset_sql:cli"
webserver = "aki_prj23_transparenzregister.ui.app:main" webserver = "aki_prj23_transparenzregister.ui.app:main"
[[tool.poetry.source]] [[tool.poetry.source]]

View File

@ -35,8 +35,8 @@ from aki_prj23_transparenzregister.utils.mongo.connector import MongoConnector
from aki_prj23_transparenzregister.utils.sql import entities from aki_prj23_transparenzregister.utils.sql import entities
from aki_prj23_transparenzregister.utils.sql.connector import ( from aki_prj23_transparenzregister.utils.sql.connector import (
get_session, get_session,
reset_all_tables,
) )
from aki_prj23_transparenzregister.utils.sql.reset_sql import reset_tables
from aki_prj23_transparenzregister.utils.string_tools import simplify_string from aki_prj23_transparenzregister.utils.string_tools import simplify_string
nomi = pgeocode.Nominatim("de") nomi = pgeocode.Nominatim("de")
@ -639,7 +639,7 @@ def transfer_data(config_provider: ConfigProvider) -> None:
companies: list[dict[str, Any]] = mongo_company.get_all() # type: ignore companies: list[dict[str, Any]] = mongo_company.get_all() # type: ignore
del mongo_company del mongo_company
db = get_session(config_provider) db = get_session(config_provider)
reset_all_tables(db) reset_tables(db, all_tables=False)
add_companies(companies, db) add_companies(companies, db)
reset_relation_counter(db) reset_relation_counter(db)

View File

@ -1,28 +1,19 @@
"""Module containing connection utils for PostgreSQL DB.""" """Module containing connection utils for PostgreSQL DB."""
import argparse
import sys
import sqlalchemy as sa import sqlalchemy as sa
from loguru import logger
from sqlalchemy.engine import URL, Engine from sqlalchemy.engine import URL, Engine
from sqlalchemy.orm import Session, declarative_base, sessionmaker from sqlalchemy.orm import Session, declarative_base, sessionmaker
from sqlalchemy.pool import SingletonThreadPool from sqlalchemy.pool import SingletonThreadPool
from aki_prj23_transparenzregister.config.config_providers import ( from aki_prj23_transparenzregister.config.config_providers import (
HELP_TEXT_CONFIG,
ConfigProvider, ConfigProvider,
JsonFileConfigProvider, JsonFileConfigProvider,
get_config_provider,
) )
from aki_prj23_transparenzregister.config.config_template import ( from aki_prj23_transparenzregister.config.config_template import (
PostgreConnectionString, PostgreConnectionString,
SQLConnectionString, SQLConnectionString,
SQLiteConnectionString, SQLiteConnectionString,
) )
from aki_prj23_transparenzregister.utils.logger_config import (
add_logger_options_to_argparse,
configer_logger,
)
def get_engine(conn_args: SQLConnectionString) -> Engine: def get_engine(conn_args: SQLConnectionString) -> Engine:
@ -79,32 +70,6 @@ def init_db(db: Session) -> None:
Base.metadata.create_all(db.bind) Base.metadata.create_all(db.bind)
def reset_all_tables(db: Session) -> None:
"""Drops all SQL tables and recreates them."""
logger.info("Resetting all SQL tables.")
Base.metadata.drop_all(db.bind)
init_db(db)
def reset_all_tables_cli() -> None:
"""Resets all tables via a cli."""
parser = argparse.ArgumentParser(
prog="Reset SQL",
description="Copy data from one SQL database to another.",
epilog="Example: 'reset-sql secrets.json' or 'reset-sql ENV_VARS_'",
)
parser.add_argument(
"config",
metavar="config",
default="ENV",
help=HELP_TEXT_CONFIG,
)
add_logger_options_to_argparse(parser)
parsed = parser.parse_args(sys.argv[1:])
configer_logger(namespace=parsed)
reset_all_tables(get_session(get_config_provider(parsed.config)))
if __name__ == "__main__": if __name__ == "__main__":
"""Main flow creating tables""" """Main flow creating tables"""
init_db(get_session(JsonFileConfigProvider("./secrets.json"))) init_db(get_session(JsonFileConfigProvider("./secrets.json")))

View File

@ -15,8 +15,8 @@ from aki_prj23_transparenzregister.utils.logger_config import (
from aki_prj23_transparenzregister.utils.sql.connector import ( from aki_prj23_transparenzregister.utils.sql.connector import (
Base, Base,
get_session, get_session,
reset_all_tables,
) )
from aki_prj23_transparenzregister.utils.sql.reset_sql import reset_tables
@logger.catch(reraise=True) @logger.catch(reraise=True)
@ -27,7 +27,7 @@ def transfer_db_function(*, source: Session, destination: Session) -> None:
source: A session to a source db data should be copied from. source: A session to a source db data should be copied from.
destination: A session to a db where the data should be copied to. destination: A session to a db where the data should be copied to.
""" """
reset_all_tables(destination) reset_tables(destination, all_tables=True)
# init_db(destination) # init_db(destination)
sbind = source.bind sbind = source.bind
dbind = destination.bind dbind = destination.bind

View File

@ -0,0 +1,61 @@
"""Functions to reset the SQL db partially or completely."""
import argparse
import sys
from loguru import logger
from sqlalchemy.orm import Session
from aki_prj23_transparenzregister.config.config_providers import (
HELP_TEXT_CONFIG,
get_config_provider,
)
from aki_prj23_transparenzregister.utils.logger_config import (
add_logger_options_to_argparse,
configer_logger,
)
from aki_prj23_transparenzregister.utils.sql import entities
from aki_prj23_transparenzregister.utils.sql.connector import get_session, init_db
from aki_prj23_transparenzregister.utils.sql.entities import Base
def reset_tables(db: Session, all_tables: bool = False) -> None:
"""Drops all SQL tables and recreates them."""
if all_tables:
logger.warning(f"Resetting all SQL tables in {db.bind}.")
Base.metadata.drop_all(db.bind)
db.commit()
else:
logger.info(f"Resetting the main SQL tables in {db.bind}.")
for table in Base.metadata.sorted_tables:
if str(table) == entities.MissingCompany.__tablename__:
continue
logger.debug(f"Dropping {table}")
table.drop(db.bind)
db.commit()
init_db(db)
def cli() -> None:
"""Resets all tables via a cli."""
parser = argparse.ArgumentParser(
prog="Reset SQL",
description="Copy data from one SQL database to another.",
epilog="Example: 'reset-sql secrets.json' or 'reset-sql ENV_VARS_'",
)
parser.add_argument(
"-a",
"--all",
default=False,
action="store_true",
help="If set, resets all tables. Default is False.",
)
parser.add_argument(
"config",
metavar="config",
default="ENV",
help=HELP_TEXT_CONFIG,
)
add_logger_options_to_argparse(parser)
parsed = parser.parse_args(sys.argv[1:])
configer_logger(namespace=parsed)
reset_tables(get_session(get_config_provider(parsed.config)), all_tables=parsed.all)

View File

@ -183,6 +183,9 @@ def full_db(empty_db: Session, finance_statements: list[dict[str, Any]]) -> Sess
for finance_statement in finance_statements for finance_statement in finance_statements
] ]
) )
empty_db.add(
entities.MissingCompany(name="Some company missing", zip_code="", city="")
)
empty_db.commit() empty_db.commit()
# print(pd.read_sql_table("company", empty_db.bind).to_string()) # print(pd.read_sql_table("company", empty_db.bind).to_string())
return empty_db return empty_db

View File

@ -0,0 +1,38 @@
"""Tests for sql rests."""
import sys
import pandas as pd
import pytest
from _pytest.monkeypatch import MonkeyPatch
from sqlalchemy.orm import Session
from aki_prj23_transparenzregister.utils.sql import entities, reset_sql
def test_reset_sql_all(full_db: Session) -> None:
"""Tests if all sql tables are reset."""
reset_sql.reset_tables(all_tables=True, db=full_db)
assert pd.read_sql_table(
entities.MissingCompany.__tablename__, con=full_db.bind # type:ignore
).empty
assert pd.read_sql_table(
entities.Company.__tablename__, con=full_db.bind # type:ignore
).empty
def test_reset_sql(full_db: Session) -> None:
"""Tests if only most sql tables are reset."""
reset_sql.reset_tables(all_tables=False, db=full_db)
assert pd.read_sql_table(
entities.Company.__tablename__, con=full_db.bind # type:ignore
).empty
assert not pd.read_sql_table(
entities.MissingCompany.__tablename__, con=full_db.bind # type:ignore
).empty
def test_reset_help(monkeypatch: MonkeyPatch) -> None:
"""Tests if all sql tables are reset."""
monkeypatch.setattr(sys, "argv", [sys.argv[0], "-h"])
with pytest.raises(SystemExit):
reset_sql.cli()