Added Realtion_count MEthod

This commit is contained in:
Tim 2023-11-05 15:50:28 +01:00
parent 76af89ff32
commit f2ac0eda91
13 changed files with 175 additions and 108 deletions

View File

@ -34,16 +34,15 @@ def find_company_relations(company_id: int) -> pd.DataFrame:
connected_company_name = []
for _, row in companies_relations_df.iterrows():
# print(companies_df.loc[companies_df["company_id"] == row["relation_id"]]["company_name"].values[0])
company_name.append(
companies_df.loc[companies_df["company_id"] == row["relation_id"]][
"company_name"
].values[0]
].iloc[0]
)
connected_company_name.append(
companies_df.loc[
companies_df["company_id"] == row["company_relation_company2_id"]
]["company_name"].values[0]
]["company_name"].iloc[0]
)
# print(company_name)
@ -54,7 +53,7 @@ def find_company_relations(company_id: int) -> pd.DataFrame:
# Plotly figure
def networkGraph(company_id: int) -> go.Figure:
def network_graph(company_id: int) -> go.Figure:
"""_summary_.
Args:
@ -63,14 +62,10 @@ def networkGraph(company_id: int) -> go.Figure:
Returns:
go.Figure: _description_
"""
# df = find_company_relations(test_company)
edges = []
for index, row in find_company_relations(company_id).iterrows():
for _, row in find_company_relations(company_id).iterrows():
edges.append([row["company_name"], row["connected_company_name"]])
# print(row["company_name"], row["connected_company_name"])
# print(edges)
# edges = df[["relation_id","company_relation_company2_id"]]
# edges = [[EGDE_VAR, "B"], ["B", "C"], ["B", "D"]]
network_graph = nx.Graph()
network_graph.add_edges_from(edges)
pos = nx.spring_layout(network_graph)
@ -151,6 +146,6 @@ def networkx_component(company_id: int) -> html.Div:
"""
return html.Div(
[
dcc.Graph(id="my-graph", figure=networkGraph(company_id)),
dcc.Graph(id="my-graph", figure=network_graph(company_id)),
]
)

View File

@ -34,18 +34,16 @@ def find_all_company_relations() -> pd.DataFrame:
# print(companies_relations_df)
for _, row in companies_relations_df.iterrows():
# print(companies_df.loc[companies_df["company_id"] == row["relation_id"]]["company_name"].values[0])
# print("TEst")
company_name.append(
companies_df.loc[companies_df["company_id"] == row["relation_id"]][
"company_name"
].values[0]
].iloc[0]
)
connected_company_name.append(
companies_df.loc[
companies_df["company_id"] == row["company_relation_company2_id"]
]["company_name"].values[0]
]["company_name"].iloc[0]
)
# print(connected_company_name)
@ -58,7 +56,7 @@ def find_all_company_relations() -> pd.DataFrame:
# Plotly figure
def networkGraph(EGDE_VAR: None) -> go.Figure:
def network_graph() -> go.Figure:
"""Create a NetworkX Graph.
Args:
@ -67,15 +65,10 @@ def networkGraph(EGDE_VAR: None) -> go.Figure:
Returns:
go.Figure: _description_
"""
# find_all_company_relations()
edges = []
for index, row in find_all_company_relations().iterrows():
for _, row in find_all_company_relations().iterrows():
edges.append([row["company_name"], row["connected_company_name"]])
# print(row["company_name"], row["connected_company_name"])
# print(edges)
# edges = df[["relation_id","company_relation_company2_id"]]
# edges = [[EGDE_VAR, "B"], ["B", "C"], ["B", "D"]]
network_graph = nx.Graph()
network_graph.add_edges_from(edges)
pos = nx.spring_layout(network_graph)
@ -141,7 +134,6 @@ def networkGraph(EGDE_VAR: None) -> go.Figure:
},
}
print(nx.eigenvector_centrality(network_graph))
measure_vector = {}
network_metrics_df = pd.DataFrame()
@ -157,20 +149,11 @@ def networkGraph(EGDE_VAR: None) -> go.Figure:
measure_vector = nx.closeness_centrality(network_graph)
network_metrics_df["closeness"] = measure_vector.values()
# measure_vector = nx.pagerank(network_graph)
# network_metrics_df["pagerank"] = measure_vector.values()
# measure_vector = nx.average_degree_connectivity(network_graph)
# network_metrics_df["average_degree"] = measure_vector.values()
print(network_metrics_df)
# figure
return go.Figure(data=[edge_trace, node_trace], layout=layout)
# Dash App
app = Dash(__name__)
app.title = "Dash Networkx"
@ -192,7 +175,7 @@ app.layout = html.Div(
# Input('metric-dropdown', 'value'),
[Input("EGDE_VAR", "value")],
)
def update_output(EGDE_VAR: None) -> go.Figure:
def update_output(edge_var: None) -> go.Figure:
"""Just Returns the go Figure of Plotly.
Args:
@ -201,7 +184,7 @@ def update_output(EGDE_VAR: None) -> go.Figure:
Returns:
go.Figure: _description_
"""
return networkGraph(EGDE_VAR)
return network_graph(edge_var)
if __name__ == "__main__":

View File

@ -7,8 +7,8 @@ from dash import dash_table, dcc, html
from sqlalchemy.engine import Engine
from sqlalchemy.orm import Session
from aki_prj23_transparenzregister.utils.sql import entities
from aki_prj23_transparenzregister.ui.archive.networkx_dash import networkx_component
from aki_prj23_transparenzregister.utils.sql import entities
COLORS = {
"light": "#edefef",
@ -363,6 +363,4 @@ def network_layout(selected_company_id: int) -> html:
Returns:
The html div to create the network tab of the company page.
"""
selected_company_id
return networkx_component(selected_company_id)
# return html.Div([f"Netzwerk von Unternehmen mit ID: {selected_company_id}"])

View File

@ -15,6 +15,7 @@ from aki_prj23_transparenzregister.utils.networkx.network_base import initialize
from aki_prj23_transparenzregister.utils.networkx.networkx_data import (
create_edge_and_node_list_for_company,
find_company_relations,
get_relations_number_from_id,
)
COLORS = {
@ -379,13 +380,37 @@ def network_layout(selected_company_id: int) -> html.Div:
The html div to create the network tab of the company page.
"""
person_relations, company_relations = find_company_relations(selected_company_id)
# get_all_metrics_from_id(selected_company_id)
get_relations_number_from_id(f"c_{selected_company_id}")
# Create Edge and Node List from data
nodes, edges = create_edge_and_node_list_for_company(company_relations)
# Initialize the Network and receive the Graph and a DataFrame with Metrics
if nodes != {}:
graph, metrics = initialize_network(nodes=nodes, edges=edges)
metric = "None"
figure = create_2d_graph(graph, nodes, edges, metrics, metric, layout="Spring", edge_annotation=True, node_annotation=False, edge_thickness=1)
figure = create_2d_graph(
graph,
nodes,
edges,
metrics,
metric,
layout="Spring",
edge_annotation=True,
node_annotation=False,
edge_thickness=1,
)
return html.Div(
children=[
dcc.Graph(figure=figure, id="company-graph", className="graph-style")
]
)
return html.Div( children=[dcc.Graph(figure=figure, id="company-graph", className="graph-style")])
return html.Div([html.H3(f"Leider gibt es keine Verbindungen vom Unternehmen mit ID: {selected_company_id}")])
return html.Div(
[
html.H3(
f"Leider gibt es keine Verbindungen vom Unternehmen mit ID: {selected_company_id}"
)
]
)

View File

@ -369,7 +369,7 @@ def update_graph_data(
allow_duplicate=True,
)
# @lru_cache(20)
def update_figure(
def update_figure( # noqa: PLR0913
selected_metric: str,
switch_value: bool,
# switch_node_annotaion_value: bool,

View File

@ -11,38 +11,6 @@ class SentimentLabel(MultiValueEnum):
NEGATIVE = -1, "negative"
NEUTRAL = 0, "neutral"
@staticmethod
def get_string_from_enum(value: int | None) -> str:
"""Translates relation name into a RelationTypeEnum.
If no translation can be found a warning is given.
Args:
relation_name: The name of the relation to be translated.
Returns:
The identified translation or None if no translation can be found.
"""
tmp = RelationTypeEnum(value)
if value is None:
raise ValueError("A relation type needs to be given.")
name = {
RelationTypeEnum.GESCHAEFTSFUEHRER: "Geschäftsführer",
RelationTypeEnum.KOMMANDITIST: "Kommanditist",
RelationTypeEnum.VORSTAND: "Vorstand",
RelationTypeEnum.PROKURIST: "Prokurist",
RelationTypeEnum.LIQUIDATOR: "Liquidator",
RelationTypeEnum.INHABER: "Inhaber",
RelationTypeEnum.PERSOENLICH_HAFTENDER_GESELLSCHAFTER: "Persönlich haftender Gesellschafter",
RelationTypeEnum.ORGANISATION: "Organisation",
RelationTypeEnum.PARTNER: "Partner",
RelationTypeEnum.DIREKTOR: "Direktor",
RelationTypeEnum.RECHTSNACHFOLGER: "Rechtsnachfolger",
}.get(tmp)
if name is not None:
return name
raise ValueError(f'Relation type "{value}" is not yet implemented!')
class FinancialKPIEnum(Enum):
"""Financial KPI keys."""

View File

@ -5,7 +5,7 @@ import pandas as pd
import plotly.graph_objects as go
def create_3d_graph(
def create_3d_graph( # noqa : PLR0913
graph: nx.Graph,
nodes: dict,
edges: list,

View File

@ -34,5 +34,66 @@ def initialize_network(edges: list, nodes: dict) -> tuple[nx.Graph, pd.DataFrame
metrics["pagerank"] = nx.pagerank(graph).values()
metrics["category"] = nx.get_node_attributes(graph, "type").values()
metrics["designation"] = nx.get_node_attributes(graph, "name").values()
metrics["id"] = nx.get_node_attributes(graph, "id").values()
return graph, metrics
def initialize_network_with_reduced_metrics(
edges: list, nodes: dict
) -> tuple[nx.Graph, pd.DataFrame]:
"""This Method creates a Network from the Framework NetworkX with the help of a Node and Edge List. Furthemore it creates a DataFrame with the most important Metrics.
Args:
edges (list): List with the connections between Nodes.
nodes (dict): Dict with all Nodes.
Returns:
Graph: Plotly Figure
Metrices: DataFrame with Metrics
"""
# create edge dataframe
df_edges = pd.DataFrame(edges, columns=["from", "to", "type"])
graph = nx.from_pandas_edgelist(
df_edges, source="from", target="to", edge_attr="type"
)
# update node attributes from dataframe
nx.set_node_attributes(graph, nodes)
# Create a DataFrame with all Metrics
metrics = pd.DataFrame(
columns=["degree", "eigenvector", "betweenness", "closeness", "pagerank"]
)
# metrics["eigenvector"] = nx.eigenvector_centrality(graph).values()
metrics["degree"] = nx.degree_centrality(graph).values()
metrics["betweenness"] = nx.betweenness_centrality(graph).values()
metrics["closeness"] = nx.closeness_centrality(graph).values()
# metrics["pagerank"] = nx.pagerank(graph).values()
metrics["category"] = nx.get_node_attributes(graph, "type").values()
metrics["designation"] = nx.get_node_attributes(graph, "name").values()
metrics["id"] = nx.get_node_attributes(graph, "id").values()
return graph, metrics
def initialize_network_without_metrics(edges: list, nodes: dict) -> nx.Graph:
"""This Method creates a Network from the Framework NetworkX with the help of a Node and Edge List. Furthemore it creates a DataFrame with the most important Metrics.
Args:
edges (list): List with the connections between Nodes.
nodes (dict): Dict with all Nodes.
Returns:
Graph: Plotly Figure
"""
# create edge dataframe
df_edges = pd.DataFrame(edges, columns=["from", "to", "type"])
graph = nx.from_pandas_edgelist(
df_edges, source="from", target="to", edge_attr="type"
)
# update node attributes from dataframe
nx.set_node_attributes(graph, nodes)
return graph

View File

@ -1,8 +1,15 @@
"""Module to receive and filter Data for working with NetworkX."""
from functools import lru_cache
import networkx as nx
import pandas as pd
from sqlalchemy.orm import aliased
from aki_prj23_transparenzregister.config.config_providers import JsonFileConfigProvider
from aki_prj23_transparenzregister.utils.networkx.network_base import (
initialize_network_with_reduced_metrics,
initialize_network_without_metrics,
)
from aki_prj23_transparenzregister.utils.sql import connector, entities
from aki_prj23_transparenzregister.utils.sql.connector import get_session
@ -15,6 +22,9 @@ to_company = aliased(entities.Company, name="to_company")
# Alias for Company table for the head company
from_company = aliased(entities.Company, name="from_company")
COLOR_COMPANY = "blue"
COLOR_PERSON = "red"
def find_all_company_relations() -> pd.DataFrame:
"""_summary_.
@ -40,12 +50,12 @@ def find_all_company_relations() -> pd.DataFrame:
company_name.append(
companies_df.loc[companies_df["company_id"] == row["relation_id"]][
"company_name"
].values[0]
].iloc[0]
)
connected_company_name.append(
companies_df.loc[
companies_df["company_id"] == row["company_relation_company2_id"]
]["company_name"].values[0]
]["company_name"].iloc[0]
)
companies_relations_df["company_name"] = company_name
@ -213,19 +223,16 @@ def create_edge_and_node_list(
nodes: dict = {}
edges: list = []
COLOR_COMPANY = "blue"
COLOR_PERSON = "red"
# Iterate over person relations
for _index, row in person_relations.iterrows():
if node := nodes.get(row["id_company"]) is None:
for _, row in person_relations.iterrows():
if nodes.get(row["id_company"]) is None:
nodes[row["id_company"]] = {
"id": row["id_company"],
"name": row["name_company"],
"color": COLOR_COMPANY,
"type": "company",
}
if node := nodes.get(row["id_person"]) is None:
if nodes.get(row["id_person"]) is None:
nodes[row["id_person"]] = {
"id": row["id_person"],
"name": str(row["firstname"]) + " " + str(row["lastname"]),
@ -241,15 +248,15 @@ def create_edge_and_node_list(
}
)
for _index, row in company_relations.iterrows():
if node := nodes.get(row["id_company_from"]) is None:
for _, row in company_relations.iterrows():
if nodes.get(row["id_company_from"]) is None: # noqa
nodes[row["id_company_from"]] = {
"id": row["id_company_from"],
"name": row["name_company_from"],
"color": COLOR_COMPANY,
"type": "company",
}
if node := nodes.get(row["id_company_to"]) is None:
if nodes.get(row["id_company_to"]) is None:
nodes[row["id_company_to"]] = {
"id": row["id_company_to"],
"name": row["name_company_to"],
@ -361,7 +368,7 @@ def create_edge_and_node_list_for_company(
return nodes, edges
def get_all_metrics_from_id(company_id: int) -> pd.DataFrame:
def get_all_metrics_from_id(company_id: int) -> pd.Series:
"""_summary_
Args:
@ -370,10 +377,20 @@ def get_all_metrics_from_id(company_id: int) -> pd.DataFrame:
Returns:
pd.DataFrame: _description_
"""
return pd.DataFrame()
# Get Data
person_df = get_all_person_relations()
company_df = get_all_company_relations()
# Create Edge and Node List from data
nodes_tmp, edges_tmp = create_edge_and_node_list(person_df, company_df)
graph, metrics = initialize_network_with_reduced_metrics(
nodes=nodes_tmp, edges=edges_tmp
)
return metrics.loc[metrics["id"] == company_id].iloc[0]
def get_relations_number_from_id(company_id: int) -> tuple[int, int, int]:
@lru_cache
def get_relations_number_from_id(id: str) -> tuple[int, int, int]:
"""_summary_
Args:
@ -382,4 +399,29 @@ def get_relations_number_from_id(company_id: int) -> tuple[int, int, int]:
Returns:
tuple[int,int,int]: _description_
"""
return (1, 2, 3)
# Get Data
person_df = get_all_person_relations()
company_df = get_all_company_relations()
# Create Edge and Node List from data
nodes_tmp, edges_tmp = create_edge_and_node_list(person_df, company_df)
graph = initialize_network_without_metrics(nodes=nodes_tmp, edges=edges_tmp)
neighbors = nx.all_neighbors(graph, id)
relations_lv1 = set(neighbors)
relations_lv2 = set()
relations_lv3 = set()
for node in relations_lv1:
relations_lv2 |= set(nx.all_neighbors(graph, node))
relations_lv2.discard(id)
for sub_node in relations_lv2:
relations_lv3 |= set(nx.all_neighbors(graph, sub_node))
relations_lv2.difference(relations_lv3)
return (len(relations_lv1), len(relations_lv2), len(relations_lv3))

View File

@ -45,7 +45,7 @@ def get_engine(conn_args: SQLConnectionString) -> Engine:
return sa.create_engine(
str(conn_args),
connect_args={"check_same_thread": True},
poolclass=SingletonThreadPool
poolclass=SingletonThreadPool,
)
raise TypeError("The type of the configuration is invalid.")

View File

@ -2,6 +2,6 @@
from aki_prj23_transparenzregister.ui import networkx_dash
def networkGraph(Edges: None) -> None:
def network_graph(Edges: None) -> None:
"""Checks if an import co company_stats_dash can be made."""
assert networkx_dash is not None

View File

@ -1,14 +1,11 @@
"""Test the initialize Network function."""
import datetime
from unittest import TestCase
import networkx as nx
import pandas as pd
from aki_prj23_transparenzregister.utils.networkx.network_base import initialize_network
tc = TestCase()
def test_initialize_network() -> None:
edges: list = [
@ -32,7 +29,7 @@ def test_initialize_network() -> None:
graph, metrics = initialize_network(edges=edges, nodes=nodes)
assert type(graph) is nx.Graph
assert type(metrics) is pd.DataFrame
tc.assertListEqual(
list(metrics.columns),
["degree", "eigenvector", "betweeness", "closeness", "pagerank"],
assert (
list(metrics.columns)
== ["degree", "eigenvector", "betweeness", "closeness", "pagerank"],
)

View File

@ -1,16 +1,14 @@
"""Test the initialize Network function."""
import datetime
from unittest import TestCase
import networkx as nx
import pandas as pd
import pytest
from aki_prj23_transparenzregister.utils.networkx.network_base import initialize_network
tc = TestCase()
import pytest
@pytest.mark.tim
@pytest.mark.tim()
def test_initialize_network() -> None:
edges: list = [
{"from": "p_545", "to": "c_53", "type": "HAFTENDER_GESELLSCHAFTER"},
@ -33,7 +31,7 @@ def test_initialize_network() -> None:
graph, metrics = initialize_network(edges=edges, nodes=nodes)
assert isinstance(graph, nx.Graph)
assert isinstance(metrics, pd.DataFrame)
tc.assertListEqual(
list(metrics.columns),
["degree", "eigenvector", "betweeness", "closeness", "pagerank"],
assert (
list(metrics.columns)
== ["degree", "eigenvector", "betweeness", "closeness", "pagerank"],
)