mirror of
https://github.com/fhswf/aki_prj23_transparenzregister.git
synced 2025-06-21 23:43:55 +02:00
Data transfer script (#114)
Transfers data betwenn two sql instances. Limited in data volume. Should be good enough for now. --------- Co-authored-by: Tim <tim.ronneburg@outlook.de>
This commit is contained in:
@ -14,7 +14,7 @@ See the [CONTRIBUTING.md](CONTRIBUTING.md) about how code should be formatted an
|
|||||||
|
|
||||||
## DB Connection settings
|
## DB Connection settings
|
||||||
|
|
||||||
To connect to the SQL db see [sql/connector.py](./src/aki_prj23_transparenzregister/utils/postgres/connector.py)
|
To connect to the SQL db see [sql/connector.py](./src/aki_prj23_transparenzregister/utils/sql/connector.py)
|
||||||
To connect to the Mongo db see [connect]
|
To connect to the Mongo db see [connect]
|
||||||
|
|
||||||
Create a `secrets.json` in the root of this repo with the following structure (values to be replaces by desired config):
|
Create a `secrets.json` in the root of this repo with the following structure (values to be replaces by desired config):
|
||||||
@ -38,8 +38,10 @@ Create a `secrets.json` in the root of this repo with the following structure (v
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
Alternatively, the secrets can be provided as environment variables. One option to do so is to add a `.env` file with the following layout:
|
Alternatively, the secrets can be provided as environment variables. One option to do so is to add a `.env` file with
|
||||||
```ini
|
the following layout:
|
||||||
|
|
||||||
|
```
|
||||||
PYTHON_POSTGRES_USERNAME=postgres
|
PYTHON_POSTGRES_USERNAME=postgres
|
||||||
PYTHON_POSTGRES_PASSWORD=postgres
|
PYTHON_POSTGRES_PASSWORD=postgres
|
||||||
PYTHON_POSTGRES_HOST=localhost
|
PYTHON_POSTGRES_HOST=localhost
|
||||||
|
@ -1,11 +1,13 @@
|
|||||||
"""Module containing connection utils for PostgreSQL DB."""
|
"""Module containing connection utils for PostgreSQL DB."""
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
import sqlalchemy as sa
|
import sqlalchemy as sa
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from sqlalchemy.engine import URL, Engine
|
from sqlalchemy.engine import URL, Engine
|
||||||
from sqlalchemy.orm import Session, declarative_base, sessionmaker
|
from sqlalchemy.orm import Session, declarative_base, sessionmaker
|
||||||
from sqlalchemy.pool import SingletonThreadPool
|
from sqlalchemy.pool import SingletonThreadPool
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
from aki_prj23_transparenzregister.config.config_providers import (
|
from aki_prj23_transparenzregister.config.config_providers import (
|
||||||
ConfigProvider,
|
ConfigProvider,
|
||||||
@ -83,11 +85,31 @@ def init_db(db: Session) -> None:
|
|||||||
|
|
||||||
def reset_all_tables(db: Session) -> None:
|
def reset_all_tables(db: Session) -> None:
|
||||||
"""Drops all SQL tables and recreates them."""
|
"""Drops all SQL tables and recreates them."""
|
||||||
logger.info("Resetting all PostgreSQL tables.")
|
logger.info("Resetting all SQL tables.")
|
||||||
Base.metadata.drop_all(db.bind)
|
Base.metadata.drop_all(db.bind)
|
||||||
init_db(db)
|
init_db(db)
|
||||||
|
|
||||||
|
|
||||||
|
@logger.catch(reraise=True)
|
||||||
|
def transfer_db(*, source: Session, destination: Session) -> None:
|
||||||
|
"""Transfers the data from on db to another db.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
source: A session to a source db data should be copied from.
|
||||||
|
destination: A session to a db where the data should be copied to.
|
||||||
|
"""
|
||||||
|
reset_all_tables(destination)
|
||||||
|
init_db(destination)
|
||||||
|
sbind = source.bind
|
||||||
|
dbind = destination.bind
|
||||||
|
assert isinstance(sbind, Engine) # noqa: S101
|
||||||
|
assert isinstance(dbind, Engine) # noqa: S101
|
||||||
|
for table in tqdm(Base.metadata.sorted_tables):
|
||||||
|
pd.read_sql_table(str(table), sbind).to_sql(
|
||||||
|
str(table), dbind, if_exists="append", index=False
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
"""Main flow creating tables"""
|
"""Main flow creating tables"""
|
||||||
init_db(get_session(JsonFileConfigProvider("./secrets.json")))
|
init_db(get_session(JsonFileConfigProvider("./secrets.json")))
|
||||||
|
@ -1 +0,0 @@
|
|||||||
"""Tests for data_extraction."""
|
|
@ -4,15 +4,19 @@ from collections.abc import Generator
|
|||||||
from typing import Any
|
from typing import Any
|
||||||
from unittest.mock import Mock, patch
|
from unittest.mock import Mock, patch
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
import pytest
|
import pytest
|
||||||
from sqlalchemy.engine import Engine
|
from sqlalchemy.engine import Engine
|
||||||
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
from aki_prj23_transparenzregister.config.config_providers import JsonFileConfigProvider
|
from aki_prj23_transparenzregister.config.config_providers import JsonFileConfigProvider
|
||||||
from aki_prj23_transparenzregister.config.config_template import PostgreConnectionString
|
from aki_prj23_transparenzregister.config.config_template import PostgreConnectionString
|
||||||
from aki_prj23_transparenzregister.utils.sql.connector import (
|
from aki_prj23_transparenzregister.utils.sql.connector import (
|
||||||
|
Base,
|
||||||
get_pg_engine,
|
get_pg_engine,
|
||||||
get_session,
|
get_session,
|
||||||
init_db,
|
init_db,
|
||||||
|
transfer_db,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -27,6 +31,36 @@ def test_get_engine_pg() -> None:
|
|||||||
assert get_pg_engine(conn_args) == result
|
assert get_pg_engine(conn_args) == result
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def destination_db() -> Generator[Session, None, None]:
|
||||||
|
"""Generates a db Session to a sqlite db to copy data to."""
|
||||||
|
if os.path.exists("secondary.db"):
|
||||||
|
os.remove("secondary.db")
|
||||||
|
db = get_session("sqlite:///secondary.db")
|
||||||
|
init_db(db)
|
||||||
|
yield db
|
||||||
|
db.close()
|
||||||
|
bind = db.bind
|
||||||
|
assert isinstance(bind, Engine)
|
||||||
|
bind.dispose()
|
||||||
|
os.remove("secondary.db")
|
||||||
|
|
||||||
|
|
||||||
|
def test_transfer_db(full_db: Session, destination_db: Session) -> None:
|
||||||
|
"""Tests if the data transfer between two sql tables works."""
|
||||||
|
transfer_db(source=full_db, destination=destination_db)
|
||||||
|
sbind = full_db.bind
|
||||||
|
dbind = destination_db.bind
|
||||||
|
assert isinstance(sbind, Engine)
|
||||||
|
assert isinstance(dbind, Engine)
|
||||||
|
|
||||||
|
for table in Base.metadata.sorted_tables:
|
||||||
|
pd.testing.assert_frame_equal(
|
||||||
|
pd.read_sql_table(str(table), dbind),
|
||||||
|
pd.read_sql_table(str(table), sbind),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture()
|
@pytest.fixture()
|
||||||
def delete_sqlite_table() -> Generator[str, None, None]:
|
def delete_sqlite_table() -> Generator[str, None, None]:
|
||||||
"""Cleans a path before and deletes the table after a test.
|
"""Cleans a path before and deletes the table after a test.
|
||||||
|
Reference in New Issue
Block a user