Added Realtion_count MEthod

This commit is contained in:
Tim 2023-11-05 15:50:28 +01:00
parent 76af89ff32
commit f2ac0eda91
13 changed files with 175 additions and 108 deletions

View File

@ -34,16 +34,15 @@ def find_company_relations(company_id: int) -> pd.DataFrame:
connected_company_name = [] connected_company_name = []
for _, row in companies_relations_df.iterrows(): for _, row in companies_relations_df.iterrows():
# print(companies_df.loc[companies_df["company_id"] == row["relation_id"]]["company_name"].values[0])
company_name.append( company_name.append(
companies_df.loc[companies_df["company_id"] == row["relation_id"]][ companies_df.loc[companies_df["company_id"] == row["relation_id"]][
"company_name" "company_name"
].values[0] ].iloc[0]
) )
connected_company_name.append( connected_company_name.append(
companies_df.loc[ companies_df.loc[
companies_df["company_id"] == row["company_relation_company2_id"] companies_df["company_id"] == row["company_relation_company2_id"]
]["company_name"].values[0] ]["company_name"].iloc[0]
) )
# print(company_name) # print(company_name)
@ -54,7 +53,7 @@ def find_company_relations(company_id: int) -> pd.DataFrame:
# Plotly figure # Plotly figure
def networkGraph(company_id: int) -> go.Figure: def network_graph(company_id: int) -> go.Figure:
"""_summary_. """_summary_.
Args: Args:
@ -63,14 +62,10 @@ def networkGraph(company_id: int) -> go.Figure:
Returns: Returns:
go.Figure: _description_ go.Figure: _description_
""" """
# df = find_company_relations(test_company)
edges = [] edges = []
for index, row in find_company_relations(company_id).iterrows(): for _, row in find_company_relations(company_id).iterrows():
edges.append([row["company_name"], row["connected_company_name"]]) edges.append([row["company_name"], row["connected_company_name"]])
# print(row["company_name"], row["connected_company_name"])
# print(edges)
# edges = df[["relation_id","company_relation_company2_id"]]
# edges = [[EGDE_VAR, "B"], ["B", "C"], ["B", "D"]]
network_graph = nx.Graph() network_graph = nx.Graph()
network_graph.add_edges_from(edges) network_graph.add_edges_from(edges)
pos = nx.spring_layout(network_graph) pos = nx.spring_layout(network_graph)
@ -151,6 +146,6 @@ def networkx_component(company_id: int) -> html.Div:
""" """
return html.Div( return html.Div(
[ [
dcc.Graph(id="my-graph", figure=networkGraph(company_id)), dcc.Graph(id="my-graph", figure=network_graph(company_id)),
] ]
) )

View File

@ -34,18 +34,16 @@ def find_all_company_relations() -> pd.DataFrame:
# print(companies_relations_df) # print(companies_relations_df)
for _, row in companies_relations_df.iterrows(): for _, row in companies_relations_df.iterrows():
# print(companies_df.loc[companies_df["company_id"] == row["relation_id"]]["company_name"].values[0])
# print("TEst")
company_name.append( company_name.append(
companies_df.loc[companies_df["company_id"] == row["relation_id"]][ companies_df.loc[companies_df["company_id"] == row["relation_id"]][
"company_name" "company_name"
].values[0] ].iloc[0]
) )
connected_company_name.append( connected_company_name.append(
companies_df.loc[ companies_df.loc[
companies_df["company_id"] == row["company_relation_company2_id"] companies_df["company_id"] == row["company_relation_company2_id"]
]["company_name"].values[0] ]["company_name"].iloc[0]
) )
# print(connected_company_name) # print(connected_company_name)
@ -58,7 +56,7 @@ def find_all_company_relations() -> pd.DataFrame:
# Plotly figure # Plotly figure
def networkGraph(EGDE_VAR: None) -> go.Figure: def network_graph() -> go.Figure:
"""Create a NetworkX Graph. """Create a NetworkX Graph.
Args: Args:
@ -67,15 +65,10 @@ def networkGraph(EGDE_VAR: None) -> go.Figure:
Returns: Returns:
go.Figure: _description_ go.Figure: _description_
""" """
# find_all_company_relations()
edges = [] edges = []
for index, row in find_all_company_relations().iterrows(): for _, row in find_all_company_relations().iterrows():
edges.append([row["company_name"], row["connected_company_name"]]) edges.append([row["company_name"], row["connected_company_name"]])
# print(row["company_name"], row["connected_company_name"])
# print(edges)
# edges = df[["relation_id","company_relation_company2_id"]]
# edges = [[EGDE_VAR, "B"], ["B", "C"], ["B", "D"]]
network_graph = nx.Graph() network_graph = nx.Graph()
network_graph.add_edges_from(edges) network_graph.add_edges_from(edges)
pos = nx.spring_layout(network_graph) pos = nx.spring_layout(network_graph)
@ -141,7 +134,6 @@ def networkGraph(EGDE_VAR: None) -> go.Figure:
}, },
} }
print(nx.eigenvector_centrality(network_graph))
measure_vector = {} measure_vector = {}
network_metrics_df = pd.DataFrame() network_metrics_df = pd.DataFrame()
@ -157,20 +149,11 @@ def networkGraph(EGDE_VAR: None) -> go.Figure:
measure_vector = nx.closeness_centrality(network_graph) measure_vector = nx.closeness_centrality(network_graph)
network_metrics_df["closeness"] = measure_vector.values() network_metrics_df["closeness"] = measure_vector.values()
# measure_vector = nx.pagerank(network_graph)
# network_metrics_df["pagerank"] = measure_vector.values()
# measure_vector = nx.average_degree_connectivity(network_graph)
# network_metrics_df["average_degree"] = measure_vector.values()
print(network_metrics_df)
# figure # figure
return go.Figure(data=[edge_trace, node_trace], layout=layout) return go.Figure(data=[edge_trace, node_trace], layout=layout)
# Dash App # Dash App
app = Dash(__name__) app = Dash(__name__)
app.title = "Dash Networkx" app.title = "Dash Networkx"
@ -192,7 +175,7 @@ app.layout = html.Div(
# Input('metric-dropdown', 'value'), # Input('metric-dropdown', 'value'),
[Input("EGDE_VAR", "value")], [Input("EGDE_VAR", "value")],
) )
def update_output(EGDE_VAR: None) -> go.Figure: def update_output(edge_var: None) -> go.Figure:
"""Just Returns the go Figure of Plotly. """Just Returns the go Figure of Plotly.
Args: Args:
@ -201,7 +184,7 @@ def update_output(EGDE_VAR: None) -> go.Figure:
Returns: Returns:
go.Figure: _description_ go.Figure: _description_
""" """
return networkGraph(EGDE_VAR) return network_graph(edge_var)
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -7,8 +7,8 @@ from dash import dash_table, dcc, html
from sqlalchemy.engine import Engine from sqlalchemy.engine import Engine
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from aki_prj23_transparenzregister.utils.sql import entities
from aki_prj23_transparenzregister.ui.archive.networkx_dash import networkx_component from aki_prj23_transparenzregister.ui.archive.networkx_dash import networkx_component
from aki_prj23_transparenzregister.utils.sql import entities
COLORS = { COLORS = {
"light": "#edefef", "light": "#edefef",
@ -363,6 +363,4 @@ def network_layout(selected_company_id: int) -> html:
Returns: Returns:
The html div to create the network tab of the company page. The html div to create the network tab of the company page.
""" """
selected_company_id
return networkx_component(selected_company_id) return networkx_component(selected_company_id)
# return html.Div([f"Netzwerk von Unternehmen mit ID: {selected_company_id}"])

View File

@ -15,6 +15,7 @@ from aki_prj23_transparenzregister.utils.networkx.network_base import initialize
from aki_prj23_transparenzregister.utils.networkx.networkx_data import ( from aki_prj23_transparenzregister.utils.networkx.networkx_data import (
create_edge_and_node_list_for_company, create_edge_and_node_list_for_company,
find_company_relations, find_company_relations,
get_relations_number_from_id,
) )
COLORS = { COLORS = {
@ -379,13 +380,37 @@ def network_layout(selected_company_id: int) -> html.Div:
The html div to create the network tab of the company page. The html div to create the network tab of the company page.
""" """
person_relations, company_relations = find_company_relations(selected_company_id) person_relations, company_relations = find_company_relations(selected_company_id)
# get_all_metrics_from_id(selected_company_id)
get_relations_number_from_id(f"c_{selected_company_id}")
# Create Edge and Node List from data # Create Edge and Node List from data
nodes, edges = create_edge_and_node_list_for_company(company_relations) nodes, edges = create_edge_and_node_list_for_company(company_relations)
# Initialize the Network and receive the Graph and a DataFrame with Metrics # Initialize the Network and receive the Graph and a DataFrame with Metrics
if nodes != {}: if nodes != {}:
graph, metrics = initialize_network(nodes=nodes, edges=edges) graph, metrics = initialize_network(nodes=nodes, edges=edges)
metric = "None" metric = "None"
figure = create_2d_graph(graph, nodes, edges, metrics, metric, layout="Spring", edge_annotation=True, node_annotation=False, edge_thickness=1) figure = create_2d_graph(
graph,
return html.Div( children=[dcc.Graph(figure=figure, id="company-graph", className="graph-style")]) nodes,
return html.Div([html.H3(f"Leider gibt es keine Verbindungen vom Unternehmen mit ID: {selected_company_id}")]) edges,
metrics,
metric,
layout="Spring",
edge_annotation=True,
node_annotation=False,
edge_thickness=1,
)
return html.Div(
children=[
dcc.Graph(figure=figure, id="company-graph", className="graph-style")
]
)
return html.Div(
[
html.H3(
f"Leider gibt es keine Verbindungen vom Unternehmen mit ID: {selected_company_id}"
)
]
)

View File

@ -369,7 +369,7 @@ def update_graph_data(
allow_duplicate=True, allow_duplicate=True,
) )
# @lru_cache(20) # @lru_cache(20)
def update_figure( def update_figure( # noqa: PLR0913
selected_metric: str, selected_metric: str,
switch_value: bool, switch_value: bool,
# switch_node_annotaion_value: bool, # switch_node_annotaion_value: bool,

View File

@ -11,38 +11,6 @@ class SentimentLabel(MultiValueEnum):
NEGATIVE = -1, "negative" NEGATIVE = -1, "negative"
NEUTRAL = 0, "neutral" NEUTRAL = 0, "neutral"
@staticmethod
def get_string_from_enum(value: int | None) -> str:
"""Translates relation name into a RelationTypeEnum.
If no translation can be found a warning is given.
Args:
relation_name: The name of the relation to be translated.
Returns:
The identified translation or None if no translation can be found.
"""
tmp = RelationTypeEnum(value)
if value is None:
raise ValueError("A relation type needs to be given.")
name = {
RelationTypeEnum.GESCHAEFTSFUEHRER: "Geschäftsführer",
RelationTypeEnum.KOMMANDITIST: "Kommanditist",
RelationTypeEnum.VORSTAND: "Vorstand",
RelationTypeEnum.PROKURIST: "Prokurist",
RelationTypeEnum.LIQUIDATOR: "Liquidator",
RelationTypeEnum.INHABER: "Inhaber",
RelationTypeEnum.PERSOENLICH_HAFTENDER_GESELLSCHAFTER: "Persönlich haftender Gesellschafter",
RelationTypeEnum.ORGANISATION: "Organisation",
RelationTypeEnum.PARTNER: "Partner",
RelationTypeEnum.DIREKTOR: "Direktor",
RelationTypeEnum.RECHTSNACHFOLGER: "Rechtsnachfolger",
}.get(tmp)
if name is not None:
return name
raise ValueError(f'Relation type "{value}" is not yet implemented!')
class FinancialKPIEnum(Enum): class FinancialKPIEnum(Enum):
"""Financial KPI keys.""" """Financial KPI keys."""

View File

@ -5,7 +5,7 @@ import pandas as pd
import plotly.graph_objects as go import plotly.graph_objects as go
def create_3d_graph( def create_3d_graph( # noqa : PLR0913
graph: nx.Graph, graph: nx.Graph,
nodes: dict, nodes: dict,
edges: list, edges: list,

View File

@ -34,5 +34,66 @@ def initialize_network(edges: list, nodes: dict) -> tuple[nx.Graph, pd.DataFrame
metrics["pagerank"] = nx.pagerank(graph).values() metrics["pagerank"] = nx.pagerank(graph).values()
metrics["category"] = nx.get_node_attributes(graph, "type").values() metrics["category"] = nx.get_node_attributes(graph, "type").values()
metrics["designation"] = nx.get_node_attributes(graph, "name").values() metrics["designation"] = nx.get_node_attributes(graph, "name").values()
metrics["id"] = nx.get_node_attributes(graph, "id").values()
return graph, metrics return graph, metrics
def initialize_network_with_reduced_metrics(
edges: list, nodes: dict
) -> tuple[nx.Graph, pd.DataFrame]:
"""This Method creates a Network from the Framework NetworkX with the help of a Node and Edge List. Furthemore it creates a DataFrame with the most important Metrics.
Args:
edges (list): List with the connections between Nodes.
nodes (dict): Dict with all Nodes.
Returns:
Graph: Plotly Figure
Metrices: DataFrame with Metrics
"""
# create edge dataframe
df_edges = pd.DataFrame(edges, columns=["from", "to", "type"])
graph = nx.from_pandas_edgelist(
df_edges, source="from", target="to", edge_attr="type"
)
# update node attributes from dataframe
nx.set_node_attributes(graph, nodes)
# Create a DataFrame with all Metrics
metrics = pd.DataFrame(
columns=["degree", "eigenvector", "betweenness", "closeness", "pagerank"]
)
# metrics["eigenvector"] = nx.eigenvector_centrality(graph).values()
metrics["degree"] = nx.degree_centrality(graph).values()
metrics["betweenness"] = nx.betweenness_centrality(graph).values()
metrics["closeness"] = nx.closeness_centrality(graph).values()
# metrics["pagerank"] = nx.pagerank(graph).values()
metrics["category"] = nx.get_node_attributes(graph, "type").values()
metrics["designation"] = nx.get_node_attributes(graph, "name").values()
metrics["id"] = nx.get_node_attributes(graph, "id").values()
return graph, metrics
def initialize_network_without_metrics(edges: list, nodes: dict) -> nx.Graph:
"""This Method creates a Network from the Framework NetworkX with the help of a Node and Edge List. Furthemore it creates a DataFrame with the most important Metrics.
Args:
edges (list): List with the connections between Nodes.
nodes (dict): Dict with all Nodes.
Returns:
Graph: Plotly Figure
"""
# create edge dataframe
df_edges = pd.DataFrame(edges, columns=["from", "to", "type"])
graph = nx.from_pandas_edgelist(
df_edges, source="from", target="to", edge_attr="type"
)
# update node attributes from dataframe
nx.set_node_attributes(graph, nodes)
return graph

View File

@ -1,8 +1,15 @@
"""Module to receive and filter Data for working with NetworkX.""" """Module to receive and filter Data for working with NetworkX."""
from functools import lru_cache
import networkx as nx
import pandas as pd import pandas as pd
from sqlalchemy.orm import aliased from sqlalchemy.orm import aliased
from aki_prj23_transparenzregister.config.config_providers import JsonFileConfigProvider from aki_prj23_transparenzregister.config.config_providers import JsonFileConfigProvider
from aki_prj23_transparenzregister.utils.networkx.network_base import (
initialize_network_with_reduced_metrics,
initialize_network_without_metrics,
)
from aki_prj23_transparenzregister.utils.sql import connector, entities from aki_prj23_transparenzregister.utils.sql import connector, entities
from aki_prj23_transparenzregister.utils.sql.connector import get_session from aki_prj23_transparenzregister.utils.sql.connector import get_session
@ -15,6 +22,9 @@ to_company = aliased(entities.Company, name="to_company")
# Alias for Company table for the head company # Alias for Company table for the head company
from_company = aliased(entities.Company, name="from_company") from_company = aliased(entities.Company, name="from_company")
COLOR_COMPANY = "blue"
COLOR_PERSON = "red"
def find_all_company_relations() -> pd.DataFrame: def find_all_company_relations() -> pd.DataFrame:
"""_summary_. """_summary_.
@ -40,12 +50,12 @@ def find_all_company_relations() -> pd.DataFrame:
company_name.append( company_name.append(
companies_df.loc[companies_df["company_id"] == row["relation_id"]][ companies_df.loc[companies_df["company_id"] == row["relation_id"]][
"company_name" "company_name"
].values[0] ].iloc[0]
) )
connected_company_name.append( connected_company_name.append(
companies_df.loc[ companies_df.loc[
companies_df["company_id"] == row["company_relation_company2_id"] companies_df["company_id"] == row["company_relation_company2_id"]
]["company_name"].values[0] ]["company_name"].iloc[0]
) )
companies_relations_df["company_name"] = company_name companies_relations_df["company_name"] = company_name
@ -213,19 +223,16 @@ def create_edge_and_node_list(
nodes: dict = {} nodes: dict = {}
edges: list = [] edges: list = []
COLOR_COMPANY = "blue"
COLOR_PERSON = "red"
# Iterate over person relations # Iterate over person relations
for _index, row in person_relations.iterrows(): for _, row in person_relations.iterrows():
if node := nodes.get(row["id_company"]) is None: if nodes.get(row["id_company"]) is None:
nodes[row["id_company"]] = { nodes[row["id_company"]] = {
"id": row["id_company"], "id": row["id_company"],
"name": row["name_company"], "name": row["name_company"],
"color": COLOR_COMPANY, "color": COLOR_COMPANY,
"type": "company", "type": "company",
} }
if node := nodes.get(row["id_person"]) is None: if nodes.get(row["id_person"]) is None:
nodes[row["id_person"]] = { nodes[row["id_person"]] = {
"id": row["id_person"], "id": row["id_person"],
"name": str(row["firstname"]) + " " + str(row["lastname"]), "name": str(row["firstname"]) + " " + str(row["lastname"]),
@ -241,15 +248,15 @@ def create_edge_and_node_list(
} }
) )
for _index, row in company_relations.iterrows(): for _, row in company_relations.iterrows():
if node := nodes.get(row["id_company_from"]) is None: if nodes.get(row["id_company_from"]) is None: # noqa
nodes[row["id_company_from"]] = { nodes[row["id_company_from"]] = {
"id": row["id_company_from"], "id": row["id_company_from"],
"name": row["name_company_from"], "name": row["name_company_from"],
"color": COLOR_COMPANY, "color": COLOR_COMPANY,
"type": "company", "type": "company",
} }
if node := nodes.get(row["id_company_to"]) is None: if nodes.get(row["id_company_to"]) is None:
nodes[row["id_company_to"]] = { nodes[row["id_company_to"]] = {
"id": row["id_company_to"], "id": row["id_company_to"],
"name": row["name_company_to"], "name": row["name_company_to"],
@ -361,7 +368,7 @@ def create_edge_and_node_list_for_company(
return nodes, edges return nodes, edges
def get_all_metrics_from_id(company_id: int) -> pd.DataFrame: def get_all_metrics_from_id(company_id: int) -> pd.Series:
"""_summary_ """_summary_
Args: Args:
@ -370,10 +377,20 @@ def get_all_metrics_from_id(company_id: int) -> pd.DataFrame:
Returns: Returns:
pd.DataFrame: _description_ pd.DataFrame: _description_
""" """
return pd.DataFrame() # Get Data
person_df = get_all_person_relations()
company_df = get_all_company_relations()
# Create Edge and Node List from data
nodes_tmp, edges_tmp = create_edge_and_node_list(person_df, company_df)
graph, metrics = initialize_network_with_reduced_metrics(
nodes=nodes_tmp, edges=edges_tmp
)
return metrics.loc[metrics["id"] == company_id].iloc[0]
def get_relations_number_from_id(company_id: int) -> tuple[int, int, int]: @lru_cache
def get_relations_number_from_id(id: str) -> tuple[int, int, int]:
"""_summary_ """_summary_
Args: Args:
@ -382,4 +399,29 @@ def get_relations_number_from_id(company_id: int) -> tuple[int, int, int]:
Returns: Returns:
tuple[int,int,int]: _description_ tuple[int,int,int]: _description_
""" """
return (1, 2, 3) # Get Data
person_df = get_all_person_relations()
company_df = get_all_company_relations()
# Create Edge and Node List from data
nodes_tmp, edges_tmp = create_edge_and_node_list(person_df, company_df)
graph = initialize_network_without_metrics(nodes=nodes_tmp, edges=edges_tmp)
neighbors = nx.all_neighbors(graph, id)
relations_lv1 = set(neighbors)
relations_lv2 = set()
relations_lv3 = set()
for node in relations_lv1:
relations_lv2 |= set(nx.all_neighbors(graph, node))
relations_lv2.discard(id)
for sub_node in relations_lv2:
relations_lv3 |= set(nx.all_neighbors(graph, sub_node))
relations_lv2.difference(relations_lv3)
return (len(relations_lv1), len(relations_lv2), len(relations_lv3))

View File

@ -45,7 +45,7 @@ def get_engine(conn_args: SQLConnectionString) -> Engine:
return sa.create_engine( return sa.create_engine(
str(conn_args), str(conn_args),
connect_args={"check_same_thread": True}, connect_args={"check_same_thread": True},
poolclass=SingletonThreadPool poolclass=SingletonThreadPool,
) )
raise TypeError("The type of the configuration is invalid.") raise TypeError("The type of the configuration is invalid.")

View File

@ -2,6 +2,6 @@
from aki_prj23_transparenzregister.ui import networkx_dash from aki_prj23_transparenzregister.ui import networkx_dash
def networkGraph(Edges: None) -> None: def network_graph(Edges: None) -> None:
"""Checks if an import co company_stats_dash can be made.""" """Checks if an import co company_stats_dash can be made."""
assert networkx_dash is not None assert networkx_dash is not None

View File

@ -1,14 +1,11 @@
"""Test the initialize Network function.""" """Test the initialize Network function."""
import datetime import datetime
from unittest import TestCase
import networkx as nx import networkx as nx
import pandas as pd import pandas as pd
from aki_prj23_transparenzregister.utils.networkx.network_base import initialize_network from aki_prj23_transparenzregister.utils.networkx.network_base import initialize_network
tc = TestCase()
def test_initialize_network() -> None: def test_initialize_network() -> None:
edges: list = [ edges: list = [
@ -32,7 +29,7 @@ def test_initialize_network() -> None:
graph, metrics = initialize_network(edges=edges, nodes=nodes) graph, metrics = initialize_network(edges=edges, nodes=nodes)
assert type(graph) is nx.Graph assert type(graph) is nx.Graph
assert type(metrics) is pd.DataFrame assert type(metrics) is pd.DataFrame
tc.assertListEqual( assert (
list(metrics.columns), list(metrics.columns)
["degree", "eigenvector", "betweeness", "closeness", "pagerank"], == ["degree", "eigenvector", "betweeness", "closeness", "pagerank"],
) )

View File

@ -1,16 +1,14 @@
"""Test the initialize Network function.""" """Test the initialize Network function."""
import datetime import datetime
from unittest import TestCase
import networkx as nx import networkx as nx
import pandas as pd import pandas as pd
import pytest
from aki_prj23_transparenzregister.utils.networkx.network_base import initialize_network from aki_prj23_transparenzregister.utils.networkx.network_base import initialize_network
tc = TestCase()
import pytest
@pytest.mark.tim @pytest.mark.tim()
def test_initialize_network() -> None: def test_initialize_network() -> None:
edges: list = [ edges: list = [
{"from": "p_545", "to": "c_53", "type": "HAFTENDER_GESELLSCHAFTER"}, {"from": "p_545", "to": "c_53", "type": "HAFTENDER_GESELLSCHAFTER"},
@ -33,7 +31,7 @@ def test_initialize_network() -> None:
graph, metrics = initialize_network(edges=edges, nodes=nodes) graph, metrics = initialize_network(edges=edges, nodes=nodes)
assert isinstance(graph, nx.Graph) assert isinstance(graph, nx.Graph)
assert isinstance(metrics, pd.DataFrame) assert isinstance(metrics, pd.DataFrame)
tc.assertListEqual( assert (
list(metrics.columns), list(metrics.columns)
["degree", "eigenvector", "betweeness", "closeness", "pagerank"], == ["degree", "eigenvector", "betweeness", "closeness", "pagerank"],
) )