Added test

This commit is contained in:
Tim
2023-11-07 21:18:58 +01:00
parent 41af7e2d18
commit 410b690873
18 changed files with 619 additions and 384 deletions

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,919 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"import os.path\n",
"\n",
"import pandas as pd\n",
"\n",
"# if not os.path.exists(\"src\"):\n",
"# %cd \"../\"\n",
"# os.path.abspath(\".\")"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"from aki_prj23_transparenzregister.utils.sql import entities\n",
"from sqlalchemy.orm import aliased\n",
"from sqlalchemy import func, text\n",
"\n",
"# Alias for Company table for the base company\n",
"base_company = aliased(entities.Company, name=\"base_company\")\n",
"\n",
"# Alias for Company table for the head company\n",
"head_company = aliased(entities.Company, name=\"head_company\")"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"from aki_prj23_transparenzregister.config.config_providers import JsonFileConfigProvider\n",
"from aki_prj23_transparenzregister.utils.sql.connector import get_session\n",
"\n",
"session = get_session(JsonFileConfigProvider(\"../secrets.json\"))"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'SELECT base_company.name AS name_company_base, relation.relation AS relation_type, head_company.name AS name_company_head \\nFROM company AS base_company JOIN (relation JOIN company_relation ON relation.id = company_relation.id) ON relation.company_id = base_company.id JOIN company AS head_company ON company_relation.company2_id = head_company.id'"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Query to fetch relations between companies\n",
"relations_query = (\n",
" session.query(\n",
" base_company.name.label(\"name_company_base\"),\n",
" entities.CompanyRelation.relation.label(\"relation_type\"),\n",
" head_company.name.label(\"name_company_head\"),\n",
" )\n",
" .join(\n",
" entities.CompanyRelation,\n",
" entities.CompanyRelation.company_id == base_company.id,\n",
" )\n",
" .join(\n",
" head_company,\n",
" entities.CompanyRelation.company2_id == head_company.id,\n",
" )\n",
")\n",
"str(relations_query)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"121 ms ± 9.27 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%timeit pd.read_sql_query(str(relations_query), session.bind)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name_company_base</th>\n",
" <th>relation_type</th>\n",
" <th>name_company_head</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2. Schaper Objekt GmbH &amp; Co. Kiel KG</td>\n",
" <td>KOMMANDITIST</td>\n",
" <td>Multi-Center Warenvertriebs GmbH</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Alb-Windkraft GmbH &amp; Co. KG</td>\n",
" <td>KOMMANDITIST</td>\n",
" <td>EnBW Windkraftprojekte GmbH</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Anneliese Köster GmbH &amp; Co. KG</td>\n",
" <td>KOMMANDITIST</td>\n",
" <td>INDUS Holding Aktiengesellschaft</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>AURELIUS Equity Opportunities SE &amp; Co. KGaA</td>\n",
" <td>HAFTENDER_GESELLSCHAFTER</td>\n",
" <td>AURELIUS Management SE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Aurelius KG</td>\n",
" <td>HAFTENDER_GESELLSCHAFTER</td>\n",
" <td>Aurelius Verwaltungs GmbH</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>573</th>\n",
" <td>Zalando BTD 011 SE &amp; Co. KG</td>\n",
" <td>HAFTENDER_GESELLSCHAFTER</td>\n",
" <td>Zalando SE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>574</th>\n",
" <td>Zalando BTD 011 SE &amp; Co. KG</td>\n",
" <td>KOMMANDITIST</td>\n",
" <td>Zalando Operations GmbH</td>\n",
" </tr>\n",
" <tr>\n",
" <th>575</th>\n",
" <td>zLabels Creation &amp; Sales GmbH &amp; Co. KG</td>\n",
" <td>HAFTENDER_GESELLSCHAFTER</td>\n",
" <td>zLabels GmbH</td>\n",
" </tr>\n",
" <tr>\n",
" <th>576</th>\n",
" <td>Zalando Customer Care International SE &amp; Co. KG</td>\n",
" <td>HAFTENDER_GESELLSCHAFTER</td>\n",
" <td>Zalando SE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>577</th>\n",
" <td>Zalando Customer Care International SE &amp; Co. KG</td>\n",
" <td>KOMMANDITIST</td>\n",
" <td>Zalando Operations GmbH</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>578 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" name_company_base \\\n",
"0 2. Schaper Objekt GmbH & Co. Kiel KG \n",
"1 Alb-Windkraft GmbH & Co. KG \n",
"2 Anneliese Köster GmbH & Co. KG \n",
"3 AURELIUS Equity Opportunities SE & Co. KGaA \n",
"4 Aurelius KG \n",
".. ... \n",
"573 Zalando BTD 011 SE & Co. KG \n",
"574 Zalando BTD 011 SE & Co. KG \n",
"575 zLabels Creation & Sales GmbH & Co. KG \n",
"576 Zalando Customer Care International SE & Co. KG \n",
"577 Zalando Customer Care International SE & Co. KG \n",
"\n",
" relation_type name_company_head \n",
"0 KOMMANDITIST Multi-Center Warenvertriebs GmbH \n",
"1 KOMMANDITIST EnBW Windkraftprojekte GmbH \n",
"2 KOMMANDITIST INDUS Holding Aktiengesellschaft \n",
"3 HAFTENDER_GESELLSCHAFTER AURELIUS Management SE \n",
"4 HAFTENDER_GESELLSCHAFTER Aurelius Verwaltungs GmbH \n",
".. ... ... \n",
"573 HAFTENDER_GESELLSCHAFTER Zalando SE \n",
"574 KOMMANDITIST Zalando Operations GmbH \n",
"575 HAFTENDER_GESELLSCHAFTER zLabels GmbH \n",
"576 HAFTENDER_GESELLSCHAFTER Zalando SE \n",
"577 KOMMANDITIST Zalando Operations GmbH \n",
"\n",
"[578 rows x 3 columns]"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"company_relations = pd.read_sql_query(str(relations_query), session.bind)\n",
"company_relations"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"relations_query = (\n",
" session.query(\n",
" entities.Company.name.label(\"name_company\"),\n",
" entities.PersonRelation.relation.label(\"relation_type\"),\n",
" entities.Person.lastname.label(\"lastname\"),\n",
" entities.Person.firstname.label(\"firstname\"),\n",
" entities.Person.date_of_birth.label(\"date_of_birth\"),\n",
" )\n",
" .join(\n",
" entities.PersonRelation,\n",
" entities.PersonRelation.company_id == entities.Company.id,\n",
" )\n",
" .join(\n",
" entities.Person,\n",
" entities.PersonRelation.person_id == entities.Person.id,\n",
" )\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"373 ms ± 25.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%timeit pd.read_sql_query(str(relations_query), session.bind)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name_company</th>\n",
" <th>relation_type</th>\n",
" <th>lastname</th>\n",
" <th>firstname</th>\n",
" <th>date_of_birth</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0 10 24 Telefondienste GmbH</td>\n",
" <td>GESCHAEFTSFUEHRER</td>\n",
" <td>Tetau</td>\n",
" <td>Nicolas</td>\n",
" <td>1971-01-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0 10 24 Telefondienste GmbH</td>\n",
" <td>PROKURIST</td>\n",
" <td>Dammast</td>\n",
" <td>Lutz</td>\n",
" <td>1966-12-06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1. Staiger Grundstücksverwaltung GmbH &amp; Co. KG</td>\n",
" <td>KOMMANDITIST</td>\n",
" <td>Tutsch</td>\n",
" <td>Rosemarie</td>\n",
" <td>1941-10-09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1. Staiger Grundstücksverwaltung GmbH &amp; Co. KG</td>\n",
" <td>KOMMANDITIST</td>\n",
" <td>Staiger</td>\n",
" <td>Marc</td>\n",
" <td>1969-10-22</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1. Staiger Grundstücksverwaltung GmbH &amp; Co. KG</td>\n",
" <td>KOMMANDITIST</td>\n",
" <td>Staiger</td>\n",
" <td>Michaela</td>\n",
" <td>1971-03-03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14891</th>\n",
" <td>Wohnungsbaugesellschaft mit beschränkter Haftu...</td>\n",
" <td>GESCHAEFTSFUEHRER</td>\n",
" <td>Weirich</td>\n",
" <td>Torsten</td>\n",
" <td>1975-07-21</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14892</th>\n",
" <td>Wohnungsbaugesellschaft mit beschränkter Haftu...</td>\n",
" <td>GESCHAEFTSFUEHRER</td>\n",
" <td>Brusinski</td>\n",
" <td>Bastian</td>\n",
" <td>1980-10-29</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14893</th>\n",
" <td>Zalando Customer Care International SE &amp; Co. KG</td>\n",
" <td>PROKURIST</td>\n",
" <td>Pape</td>\n",
" <td>Ute</td>\n",
" <td>1978-12-13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14894</th>\n",
" <td>zebotec GmbH</td>\n",
" <td>GESCHAEFTSFUEHRER</td>\n",
" <td>Neff</td>\n",
" <td>Werner</td>\n",
" <td>1981-11-24</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14895</th>\n",
" <td>zebotec GmbH</td>\n",
" <td>GESCHAEFTSFUEHRER</td>\n",
" <td>Morris</td>\n",
" <td>Richard</td>\n",
" <td>1971-01-02</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>14896 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" name_company relation_type \\\n",
"0 0 10 24 Telefondienste GmbH GESCHAEFTSFUEHRER \n",
"1 0 10 24 Telefondienste GmbH PROKURIST \n",
"2 1. Staiger Grundstücksverwaltung GmbH & Co. KG KOMMANDITIST \n",
"3 1. Staiger Grundstücksverwaltung GmbH & Co. KG KOMMANDITIST \n",
"4 1. Staiger Grundstücksverwaltung GmbH & Co. KG KOMMANDITIST \n",
"... ... ... \n",
"14891 Wohnungsbaugesellschaft mit beschränkter Haftu... GESCHAEFTSFUEHRER \n",
"14892 Wohnungsbaugesellschaft mit beschränkter Haftu... GESCHAEFTSFUEHRER \n",
"14893 Zalando Customer Care International SE & Co. KG PROKURIST \n",
"14894 zebotec GmbH GESCHAEFTSFUEHRER \n",
"14895 zebotec GmbH GESCHAEFTSFUEHRER \n",
"\n",
" lastname firstname date_of_birth \n",
"0 Tetau Nicolas 1971-01-02 \n",
"1 Dammast Lutz 1966-12-06 \n",
"2 Tutsch Rosemarie 1941-10-09 \n",
"3 Staiger Marc 1969-10-22 \n",
"4 Staiger Michaela 1971-03-03 \n",
"... ... ... ... \n",
"14891 Weirich Torsten 1975-07-21 \n",
"14892 Brusinski Bastian 1980-10-29 \n",
"14893 Pape Ute 1978-12-13 \n",
"14894 Neff Werner 1981-11-24 \n",
"14895 Morris Richard 1971-01-02 \n",
"\n",
"[14896 rows x 5 columns]"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.read_sql_query(str(relations_query), session.bind)\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>person_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2520</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>4993</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3202</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4611</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4095</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1804</th>\n",
" <td>3565</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1805</th>\n",
" <td>3510</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1806</th>\n",
" <td>530</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1807</th>\n",
" <td>536</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1808</th>\n",
" <td>4617</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1809 rows × 1 columns</p>\n",
"</div>"
],
"text/plain": [
" person_id\n",
"0 2520\n",
"1 4993\n",
"2 3202\n",
"3 4611\n",
"4 4095\n",
"... ...\n",
"1804 3565\n",
"1805 3510\n",
"1806 530\n",
"1807 536\n",
"1808 4617\n",
"\n",
"[1809 rows x 1 columns]"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sqlalchemy import func, text\n",
"\n",
"# Subquery to group and count the relations without joins\n",
"grouped_relations_subquery = (\n",
" session.query(\n",
" entities.PersonRelation.person_id,\n",
" )\n",
" .group_by(entities.PersonRelation.person_id)\n",
" .having(func.count() > 1)\n",
")\n",
"pd.DataFrame(grouped_relations_subquery.all())"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"relations_query = (\n",
" session.query(\n",
" entities.Company.name.label(\"name_company\"),\n",
" entities.PersonRelation.relation.label(\"relation_type\"),\n",
" entities.Person.lastname.label(\"lastname\"),\n",
" entities.Person.firstname.label(\"firstname\"),\n",
" entities.Person.date_of_birth.label(\"date_of_birth\"),\n",
" )\n",
" .join(\n",
" entities.PersonRelation,\n",
" entities.PersonRelation.company_id == entities.Company.id,\n",
" )\n",
" .join(\n",
" entities.Person,\n",
" entities.PersonRelation.person_id == entities.Person.id,\n",
" )\n",
" .filter(entities.PersonRelation.person_id.in_(grouped_relations_subquery))\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name_company</th>\n",
" <th>relation_type</th>\n",
" <th>lastname</th>\n",
" <th>firstname</th>\n",
" <th>date_of_birth</th>\n",
" <th>person_name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0 10 24 Telefondienste GmbH</td>\n",
" <td>RelationshipRoleEnum.GESCHAEFTSFUEHRER</td>\n",
" <td>Tetau</td>\n",
" <td>Nicolas</td>\n",
" <td>1971-01-02</td>\n",
" <td>TetauNicolas</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0 10 24 Telefondienste GmbH</td>\n",
" <td>RelationshipRoleEnum.PROKURIST</td>\n",
" <td>Dammast</td>\n",
" <td>Lutz</td>\n",
" <td>1966-12-06</td>\n",
" <td>DammastLutz</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>01050.com GmbH</td>\n",
" <td>RelationshipRoleEnum.GESCHAEFTSFUEHRER</td>\n",
" <td>Tetau</td>\n",
" <td>Nicolas</td>\n",
" <td>1971-01-02</td>\n",
" <td>TetauNicolas</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>01050.com GmbH</td>\n",
" <td>RelationshipRoleEnum.PROKURIST</td>\n",
" <td>Dammast</td>\n",
" <td>Lutz</td>\n",
" <td>1966-12-06</td>\n",
" <td>DammastLutz</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>AASP Filmproduktionsgesellschaft mbH &amp; Co. Leo...</td>\n",
" <td>RelationshipRoleEnum.KOMMANDITIST</td>\n",
" <td>Dellhofen</td>\n",
" <td>Jens</td>\n",
" <td>1977-04-19</td>\n",
" <td>DellhofenJens</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7071</th>\n",
" <td>Wohnungsbaugesellschaft mit beschränkter Haftu...</td>\n",
" <td>RelationshipRoleEnum.GESCHAEFTSFUEHRER</td>\n",
" <td>Karounos</td>\n",
" <td>Marita</td>\n",
" <td>1971-03-30</td>\n",
" <td>KarounosMarita</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7072</th>\n",
" <td>Wohnungsbaugesellschaft mit beschränkter Haftu...</td>\n",
" <td>RelationshipRoleEnum.PROKURIST</td>\n",
" <td>Groll</td>\n",
" <td>Michael</td>\n",
" <td>1967-12-24</td>\n",
" <td>GrollMichael</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7073</th>\n",
" <td>Wohnungsbaugesellschaft mit beschränkter Haftu...</td>\n",
" <td>RelationshipRoleEnum.GESCHAEFTSFUEHRER</td>\n",
" <td>Weirich</td>\n",
" <td>Torsten</td>\n",
" <td>1975-07-21</td>\n",
" <td>WeirichTorsten</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7074</th>\n",
" <td>Wohnungsbaugesellschaft mit beschränkter Haftu...</td>\n",
" <td>RelationshipRoleEnum.GESCHAEFTSFUEHRER</td>\n",
" <td>Brusinski</td>\n",
" <td>Bastian</td>\n",
" <td>1980-10-29</td>\n",
" <td>BrusinskiBastian</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7075</th>\n",
" <td>Zalando Customer Care International SE &amp; Co. KG</td>\n",
" <td>RelationshipRoleEnum.PROKURIST</td>\n",
" <td>Pape</td>\n",
" <td>Ute</td>\n",
" <td>1978-12-13</td>\n",
" <td>PapeUte</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>7076 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" name_company \\\n",
"0 0 10 24 Telefondienste GmbH \n",
"1 0 10 24 Telefondienste GmbH \n",
"2 01050.com GmbH \n",
"3 01050.com GmbH \n",
"4 AASP Filmproduktionsgesellschaft mbH & Co. Leo... \n",
"... ... \n",
"7071 Wohnungsbaugesellschaft mit beschränkter Haftu... \n",
"7072 Wohnungsbaugesellschaft mit beschränkter Haftu... \n",
"7073 Wohnungsbaugesellschaft mit beschränkter Haftu... \n",
"7074 Wohnungsbaugesellschaft mit beschränkter Haftu... \n",
"7075 Zalando Customer Care International SE & Co. KG \n",
"\n",
" relation_type lastname firstname \\\n",
"0 RelationshipRoleEnum.GESCHAEFTSFUEHRER Tetau Nicolas \n",
"1 RelationshipRoleEnum.PROKURIST Dammast Lutz \n",
"2 RelationshipRoleEnum.GESCHAEFTSFUEHRER Tetau Nicolas \n",
"3 RelationshipRoleEnum.PROKURIST Dammast Lutz \n",
"4 RelationshipRoleEnum.KOMMANDITIST Dellhofen Jens \n",
"... ... ... ... \n",
"7071 RelationshipRoleEnum.GESCHAEFTSFUEHRER Karounos Marita \n",
"7072 RelationshipRoleEnum.PROKURIST Groll Michael \n",
"7073 RelationshipRoleEnum.GESCHAEFTSFUEHRER Weirich Torsten \n",
"7074 RelationshipRoleEnum.GESCHAEFTSFUEHRER Brusinski Bastian \n",
"7075 RelationshipRoleEnum.PROKURIST Pape Ute \n",
"\n",
" date_of_birth person_name \n",
"0 1971-01-02 TetauNicolas \n",
"1 1966-12-06 DammastLutz \n",
"2 1971-01-02 TetauNicolas \n",
"3 1966-12-06 DammastLutz \n",
"4 1977-04-19 DellhofenJens \n",
"... ... ... \n",
"7071 1971-03-30 KarounosMarita \n",
"7072 1967-12-24 GrollMichael \n",
"7073 1975-07-21 WeirichTorsten \n",
"7074 1980-10-29 BrusinskiBastian \n",
"7075 1978-12-13 PapeUte \n",
"\n",
"[7076 rows x 6 columns]"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"relations_df = pd.DataFrame(relations_query.all())\n",
"relations_df[\"person_name\"] = relations_df[\"lastname\"] + relations_df[\"firstname\"]\n",
"relations_df.rename(\n",
" columns={\"oldName1\": \"newName1\", \"oldName2\": \"newName2\"}, inplace=True\n",
")\n",
"relations_df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"node_template = {\n",
" \"id\": \"(company|person)_\\d\",\n",
" \"label\": \"Name from entries\",\n",
" \"type\": \"Company|Person\",\n",
" \"shape\": \"dot\",\n",
" \"color\": \"#729b79ff\",\n",
" # TODO add title for hover effect in graph \"title\": \"\"\n",
"}\n",
"nodes = relations_df\n",
"for index in relations_df.index:\n",
" nodes[\"index\"] = {\n",
" \"label\": company_2.name,\n",
" \"type\": \"Company\",\n",
" \"shape\": \"dot\",\n",
" \"color\": \"#729b79ff\",\n",
" }"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import networkx as nx\n",
"import matplotlib.pyplot as plt\n",
"# relations_df[\"person_name\"] = relations_df[\"lastname\"] + relations_df[\"firstname\"]\n",
"\n",
"nodes = \n",
"# create edges from dataframe\n",
"graph = nx.from_pandas_edgelist(relations_df, source=\"name_company\", target=\"person_name\", edge_attr=\"relation_type\")\n",
"\n",
"# update node attributes from dataframe\n",
"nodes_attr = nodes.set_index(\"index\").to_dict(orient=\"index\")\n",
"nx.set_node_attributes(graph, nodes_attr)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from pyvis.network import Network\n",
"\n",
"net = Network(\n",
" directed=False, neighborhood_highlight=True, bgcolor=\"white\", font_color=\"black\"\n",
")\n",
"\n",
"# pass networkx graph to pyvis\n",
"net.from_nx(graph)\n",
"\n",
"net.inherit_edge_colors(False)\n",
"net.set_edge_smooth(\"dynamic\")\n",
"adj_list = net.get_adj_list()\n",
"\n",
"measure_type = \"degree\"\n",
"measure_vector = {}\n",
"\n",
"if measure_type == \"eigenvector\":\n",
" measure_vector = nx.eigenvector_centrality(graph)\n",
" df[\"eigenvector\"] = measure_vector.values()\n",
"if measure_type == \"degree\":\n",
" measure_vector = nx.degree_centrality(graph)\n",
" df[\"degree\"] = measure_vector.values()\n",
"if measure_type == \"betweeness\":\n",
" measure_vector = nx.betweenness_centrality(graph)\n",
" df[\"betweeness\"] = measure_vector.values()\n",
"if measure_type == \"closeness\":\n",
" measure_vector = nx.closeness_centrality(graph)\n",
" df[\"closeness\"] = measure_vector.values()\n",
"if measure_type == \"pagerank\":\n",
" measure_vector = nx.pagerank(graph)\n",
" df[\"pagerank\"] = measure_vector.values()\n",
"if measure_type == \"average_degree\":\n",
" measure_vector = nx.average_degree_connectivity(graph)\n",
" # df[\"average_degree\"] = measure_vector.values()\n",
" print(measure_vector.values())\n",
"\n",
"# calculate and update size of the nodes depending on their number of edges\n",
"for node_id, neighbors in adj_list.items():\n",
" # df[\"edges\"] = measure_vector.values()\n",
"\n",
" if measure_type == \"edges\":\n",
" size = 10 # len(neighbors)*5\n",
" else:\n",
" size = measure_vector[node_id] * 50\n",
" next(\n",
" (node.update({\"size\": size}) for node in net.nodes if node[\"id\"] == node_id),\n",
" None,\n",
" )\n",
"\n",
"\n",
"net.repulsion()\n",
"net.show_buttons(filter_=[\"physics\"])\n",
"\n",
"# net.show_buttons()\n",
"\n",
"# save graph as HTML\n",
"net.save_graph(\"./tmp.html\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "aki-prj23-transparenzregister-IY2hcXvW-py3.11",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,151 @@
"""Old NetworkX Graph which needs to be discarded in the next commits."""
import networkx as nx
import pandas as pd
import plotly.graph_objects as go
from dash import dcc, html
from aki_prj23_transparenzregister.config.config_providers import JsonFileConfigProvider
from aki_prj23_transparenzregister.utils.sql import connector, entities
test_company = 13 # 2213 # 13
def find_company_relations(company_id: int) -> pd.DataFrame:
"""_summary_.
Args:
company_id (int): _description_
Returns:
pd.DataFrame: _description_
"""
session = connector.get_session(JsonFileConfigProvider("./secrets.json"))
query_companies = session.query(entities.Company)
query_relations = session.query(entities.CompanyRelation)
companies_df: pd.DataFrame = pd.read_sql(str(query_companies), session.bind) # type: ignore
companies_relations_df: pd.DataFrame = pd.read_sql(str(query_relations), session.bind) # type: ignore
companies_relations_df = companies_relations_df.loc[
companies_relations_df["relation_id"] == company_id, :
][["relation_id", "company_relation_company2_id"]]
company_name = []
connected_company_name = []
for _, row in companies_relations_df.iterrows():
company_name.append(
companies_df.loc[companies_df["company_id"] == row["relation_id"]][
"company_name"
].iloc[0]
)
connected_company_name.append(
companies_df.loc[
companies_df["company_id"] == row["company_relation_company2_id"]
]["company_name"].iloc[0]
)
# print(company_name)
companies_relations_df["company_name"] = company_name
companies_relations_df["connected_company_name"] = connected_company_name
# print(companies_relations_df)
return companies_relations_df
# Plotly figure
def network_graph(company_id: int) -> go.Figure:
"""_summary_.
Args:
company_id (int): _description_
Returns:
go.Figure: _description_
"""
edges = []
for _, row in find_company_relations(company_id).iterrows():
edges.append([row["company_name"], row["connected_company_name"]])
network_graph = nx.Graph()
network_graph.add_edges_from(edges)
pos = nx.spring_layout(network_graph)
# edges trace
edge_x = []
edge_y = []
for edge in network_graph.edges():
x0, y0 = pos[edge[0]]
x1, y1 = pos[edge[1]]
edge_x.append(x0)
edge_x.append(x1)
edge_x.append(None)
edge_y.append(y0)
edge_y.append(y1)
edge_y.append(None)
edge_trace = go.Scatter(
x=edge_x,
y=edge_y,
line={"color": "black", "width": 1},
hoverinfo="none",
showlegend=False,
mode="lines",
)
# nodes trace
node_x = []
node_y = []
text = []
for node in network_graph.nodes():
x, y = pos[node]
node_x.append(x)
node_y.append(y)
text.append(node)
node_trace = go.Scatter(
x=node_x,
y=node_y,
text=text,
mode="markers+text",
showlegend=False,
hoverinfo="none",
marker={"color": "pink", "size": 50, "line": {"color": "black", "width": 1}},
)
# layout
layout = {
"plot_bgcolor": "white",
"paper_bgcolor": "white",
"margin": {"t": 10, "b": 10, "l": 10, "r": 10, "pad": 0},
"xaxis": {
"linecolor": "black",
"showgrid": False,
"showticklabels": False,
"mirror": True,
},
"yaxis": {
"linecolor": "black",
"showgrid": False,
"showticklabels": False,
"mirror": True,
},
}
# figure
return go.Figure(data=[edge_trace, node_trace], layout=layout)
def networkx_component(company_id: int) -> html.Div:
"""Retruns the Layout with a Graph.
Args:
company_id (int): _description_
Returns:
any: _description_
"""
return html.Div(
[
dcc.Graph(id="my-graph", figure=network_graph(company_id)),
]
)

View File

@ -0,0 +1,186 @@
"""Old Module for NetworkX Graphs."""
import networkx as nx
import pandas as pd
import plotly.graph_objects as go
from dash import Dash, Input, Output, dcc, html
from aki_prj23_transparenzregister.config.config_providers import JsonFileConfigProvider
from aki_prj23_transparenzregister.utils.sql import connector, entities
test_company = 13 # 2213 # 13
def find_all_company_relations() -> pd.DataFrame:
"""Searches for all companies and their relation in the DB.
Returns:
pd.DataFrame: _description_
"""
session = connector.get_session(JsonFileConfigProvider("./secrets.json"))
query_companies = session.query(entities.Company) # .all()
query_relations = session.query(entities.CompanyRelation) # .all()
companies_df: pd.DataFrame = pd.read_sql(str(query_companies), session.bind) # type: ignore
companies_relations_df: pd.DataFrame = pd.read_sql(str(query_relations), session.bind) # type: ignore
# print(companies_relations_df)
companies_relations_df = companies_relations_df[
["relation_id", "company_relation_company2_id"]
]
# print(companies_relations_df)
company_name = []
connected_company_name = []
companies_relations_df = companies_relations_df.head()
# print(companies_relations_df)
for _, row in companies_relations_df.iterrows():
company_name.append(
companies_df.loc[companies_df["company_id"] == row["relation_id"]][
"company_name"
].iloc[0]
)
connected_company_name.append(
companies_df.loc[
companies_df["company_id"] == row["company_relation_company2_id"]
]["company_name"].iloc[0]
)
# print(connected_company_name)
# print(company_name)
companies_relations_df["company_name"] = company_name
companies_relations_df["connected_company_name"] = connected_company_name
# print("Test")
# print(companies_relations_df)
return companies_relations_df
# Plotly figure
def create_network_graph() -> go.Figure:
"""Create a NetworkX Graph.
Returns:
go.Figure: _description_
"""
edges = []
for _, row in find_all_company_relations().iterrows():
edges.append([row["company_name"], row["connected_company_name"]])
network_graph = nx.Graph()
network_graph.add_edges_from(edges)
pos = nx.spring_layout(network_graph)
# edges trace
edge_x = []
edge_y = []
for edge in network_graph.edges():
x0, y0 = pos[edge[0]]
x1, y1 = pos[edge[1]]
edge_x.append(x0)
edge_x.append(x1)
edge_x.append(None)
edge_y.append(y0)
edge_y.append(y1)
edge_y.append(None)
edge_trace = go.Scatter(
x=edge_x,
y=edge_y,
line={"color": "black", "width": 1},
hoverinfo="none",
showlegend=False,
mode="lines",
)
# nodes trace
node_x = []
node_y = []
text = []
for node in network_graph.nodes():
x, y = pos[node]
node_x.append(x)
node_y.append(y)
text.append(node)
node_trace = go.Scatter(
x=node_x,
y=node_y,
text=text,
mode="markers+text",
showlegend=False,
hoverinfo="none",
marker={"color": "pink", "size": 50, "line": {"color": "black", "width": 1}},
)
# layout
layout = {
"plot_bgcolor": "white",
"paper_bgcolor": "white",
"margin": {"t": 10, "b": 10, "l": 10, "r": 10, "pad": 0},
"xaxis": {
"linecolor": "black",
"showgrid": False,
"showticklabels": False,
"mirror": True,
},
"yaxis": {
"linecolor": "black",
"showgrid": False,
"showticklabels": False,
"mirror": True,
},
}
measure_vector = {}
network_metrics_df = pd.DataFrame()
measure_vector = nx.eigenvector_centrality(network_graph)
network_metrics_df["eigenvector"] = measure_vector.values()
measure_vector = nx.degree_centrality(network_graph)
network_metrics_df["degree"] = measure_vector.values()
measure_vector = nx.betweenness_centrality(network_graph)
network_metrics_df["betweeness"] = measure_vector.values()
measure_vector = nx.closeness_centrality(network_graph)
network_metrics_df["closeness"] = measure_vector.values()
# figure
return go.Figure(data=[edge_trace, node_trace], layout=layout)
# Dash App
app = Dash(__name__)
app.title = "Dash Networkx"
# className="networkx_style"
app.layout = html.Div(
style={"width": "49%"},
children=[
html.I("Write your EDGE_VAR"),
html.Br(),
# dcc.Dropdown(['eigenvector', 'degree', 'betweeness', 'closeness'], 'eigenvector', id='metric-dropdown'),
dcc.Input(id="EGDE_VAR", type="text", value="K", debounce=True),
dcc.Graph(id="my-graph", style={"width": "49%"}),
],
)
@app.callback(
Output("my-graph", "figure"),
# Input('metric-dropdown', 'value'),
[Input("EGDE_VAR", "value")],
)
def update_output() -> go.Figure:
"""Just Returns the go Figure of Plotly.
Returns:
go.Figure: Returns a HTML Figure for Plotly.
"""
return create_network_graph()
if __name__ == "__main__":
"""Main Method to test this page."""
app.run(debug=True)

View File

@ -0,0 +1,366 @@
"""Dash elements."""
import pandas as pd
import plotly.graph_objs as go
from cachetools import TTLCache, cached
from dash import dash_table, dcc, html
from sqlalchemy.engine import Engine
from sqlalchemy.orm import Session
from aki_prj23_transparenzregister.ui.archive.networkx_dash import networkx_component
from aki_prj23_transparenzregister.utils.sql import entities
COLORS = {
"light": "#edefef",
"lavender-blush": "#f3e8ee",
"ash-gray": "#bacdb0",
"cambridge-blue": "#729b79",
"paynes-gray": "#475b63",
"raisin-black": "#2e2c2f",
}
def get_company_data(session: Session) -> pd.DataFrame:
"""Creates a session to the database and get's all available company data.
Args:
session: A session connecting to the database.
Returns:
A dataframe containing all available company data including the corresponding district court.
"""
query_company = session.query(entities.Company, entities.DistrictCourt.name).join(
entities.DistrictCourt
)
engine = session.bind
if not isinstance(engine, Engine):
raise TypeError
return pd.read_sql(str(query_company), engine, index_col="company_id")
def get_finance_data(session: Session) -> pd.DataFrame:
"""Collects all available company data.
Args:
session: A session connecting to the database.
Returns:
A dataframe containing all financial data of all companies.
"""
query_finance = session.query(
entities.AnnualFinanceStatement, entities.Company.name, entities.Company.id
).join(entities.Company)
engine = session.bind
if not isinstance(engine, Engine):
raise TypeError
return pd.read_sql(str(query_finance), engine)
@cached( # type: ignore
cache=TTLCache(maxsize=1, ttl=300),
key=lambda session: 0 if session is None else str(session.bind),
)
def get_options(session: Session | None) -> dict[int, str]:
"""Collects the search options for the companies.
Args:
session: A session connecting to the database.
Returns:
A dict containing the company id as key and its name.
"""
if not session:
return {}
return get_company_data(session)["company_name"].to_dict()
def create_header(options: dict) -> html:
"""Creates header for dashboard.
Args:
options: A dictionary with company names and ids for the dropdown.
Returns:
The html div to create the page's header including the name of the page and the search for companies.
"""
return html.Div(
className="header-wrapper",
children=[
html.Div(
className="header-title",
children=[
html.I(
id="home-button",
n_clicks=0,
className="bi-house-door-fill",
),
html.H1(
className="header-title-text",
children="Transparenzregister für Kapitalgesellschaften",
),
],
),
html.Div(
className="header-search",
children=[
html.Div(
className="header-search-dropdown",
children=[
dcc.Dropdown(
id="select_company",
options=[
{"label": o, "value": key}
for key, o in options.items()
],
placeholder="Suche nach Unternehmen oder Person",
),
],
),
],
),
],
)
def create_company_header(selected_company_name: str) -> html:
"""Create company header based on selected company.
Args:
selected_company_name: The company name that has been chosen in the dropdown.
Returns:
The html div to create the company header.
"""
return html.Div(
className="company-header",
children=[
html.H1(
className="company-header-title",
id="id-company-header-title",
children=selected_company_name,
),
],
)
def create_company_stats(selected_company_data: pd.Series) -> html:
"""Create company stats.
Args:
selected_company_data: A series containing all company information of the selected company.
Returns:
The html div to create the company stats table and the three small widgets.
"""
company_data = {
"col1": ["Unternehmen", "Straße", "Stadt"],
"col2": [
selected_company_data["company_name"],
selected_company_data["company_street"],
str(
selected_company_data["company_zip_code"]
+ " "
+ selected_company_data["company_city"]
),
],
"col3": ["Branche", "Amtsgericht", "Gründungsjahr"],
"col4": [
selected_company_data["company_sector"],
selected_company_data["district_court_name"],
"xxx",
],
}
df_company_data = pd.DataFrame(data=company_data)
return html.Div(
className="stats-wrapper",
children=[
html.Div(
className="widget-large",
children=[
html.H3(
className="widget-title",
children="Stammdaten",
),
dash_table.DataTable(
df_company_data.to_dict("records"),
[{"name": i, "id": i} for i in df_company_data.columns],
style_table={
"width": "90%",
"marginLeft": "auto",
"marginRight": "auto",
"paddingBottom": "20px",
"color": COLORS["raisin-black"],
},
# hide header of table
css=[
{
"selector": "tr:first-child",
"rule": "display: none",
},
],
style_cell={"textAlign": "center"},
style_cell_conditional=[
{"if": {"column_id": c}, "fontWeight": "bold"}
for c in ["col1", "col3"]
],
style_data={
"whiteSpace": "normal",
"height": "auto",
},
),
],
),
html.Div(
className="widget-small",
children=[
html.H3(
className="widget-title",
children="Stimmung",
),
],
),
html.Div(
className="widget-small",
children=[
html.H3(
className="widget-title",
children="Aktienkurs",
),
html.H1(
className="widget-content",
children="123",
),
],
),
html.Div(
className="widget-small",
children=[
html.H3(
className="widget-title",
children="Umsatz",
),
html.H1(
className="widget-content",
children="1234",
),
],
),
],
)
def create_tabs(selected_company_id: int, selected_finance_df: pd.DataFrame) -> html:
"""Create tabs for more company information.
Args:
selected_company_id: Id of the chosen company in the dropdown.
selected_finance_df: A dataframe containing all available finance information of the companies.
Returns:
The html div to create the tabs of the company page.
"""
return html.Div(
className="tabs",
children=[
dcc.Tabs(
id="tabs",
value="tab-1",
children=[
dcc.Tab(
label="Kennzahlen",
value="tab-1",
className="tab-style",
selected_className="selected-tab-style",
children=[kennzahlen_layout(selected_finance_df)],
),
dcc.Tab(
label="Beteiligte Personen",
value="tab-2",
className="tab-style",
selected_className="selected-tab-style",
),
dcc.Tab(
label="Stimmung",
value="tab-3",
className="tab-style",
selected_className="selected-tab-style",
),
dcc.Tab(
label="Verflechtungen",
value="tab-4",
className="tab-style",
selected_className="selected-tab-style",
children=[network_layout(selected_company_id)],
),
],
),
html.Div(id="tabs-example-content-1"),
],
)
def kennzahlen_layout(selected_finance_df: pd.DataFrame) -> html:
"""Create metrics tab.
Args:
selected_company_id: Id of the chosen company in the dropdown.
selected_finance_df: A dataframe containing all available finance information of the companies.
Returns:
The html div to create the metrics tab of the company page.
"""
return html.Div(
[
dcc.Graph(
figure=financials_figure(
selected_finance_df, "annual_finance_statement_ebit"
)
)
]
)
def financials_figure(selected_finance_df: pd.DataFrame, metric: str) -> go.Figure:
"""Creates plotly line chart for a specific company and a metric.
Args:
selected_finance_df: A dataframe containing all finance information of the selected company.
metric: The metric that should be visualized.
Returns:
A plotly figure showing the available metric data of the company.
"""
# create figure
fig_line = go.Figure()
# add trace for company 1
fig_line.add_trace(
go.Scatter(
x=selected_finance_df["annual_finance_statement_date"],
y=selected_finance_df[metric],
line_color=COLORS["raisin-black"],
marker_color=COLORS["raisin-black"],
)
)
# set title and labels
fig_line.update_layout(
title=metric,
xaxis_title="Jahr",
yaxis_title="in Mio.€",
plot_bgcolor=COLORS["light"],
)
return fig_line
def network_layout(selected_company_id: int) -> html:
"""Create network tab.
Args:
selected_company_id: Id of the chosen company in the dropdown.
Returns:
The html div to create the network tab of the company page.
"""
return networkx_component(selected_company_id)