From f2ac0eda91079fdf62940bc96485e55dc12f235c Mon Sep 17 00:00:00 2001 From: Tim Date: Sun, 5 Nov 2023 15:50:28 +0100 Subject: [PATCH] Added Realtion_count MEthod --- .../ui/archive/networkx_dash.py | 17 ++--- .../ui/archive/networkx_dash_overall.py | 31 ++------ .../ui/archive/ui_elements.py | 4 +- .../ui/company_elements.py | 37 ++++++++-- .../ui/pages/home.py | 2 +- .../utils/enum_types.py | 32 --------- .../utils/networkx/network_3d.py | 2 +- .../utils/networkx/network_base.py | 61 ++++++++++++++++ .../utils/networkx/networkx_data.py | 72 +++++++++++++++---- .../utils/sql/connector.py | 2 +- tests/ui/networkx_dash_test.py | 2 +- tests/utils/networkx/network_2d_test.py | 9 +-- tests/utils/networkx/network_base_test.py | 12 ++-- 13 files changed, 175 insertions(+), 108 deletions(-) diff --git a/src/aki_prj23_transparenzregister/ui/archive/networkx_dash.py b/src/aki_prj23_transparenzregister/ui/archive/networkx_dash.py index 9b8634a..5700b19 100644 --- a/src/aki_prj23_transparenzregister/ui/archive/networkx_dash.py +++ b/src/aki_prj23_transparenzregister/ui/archive/networkx_dash.py @@ -34,16 +34,15 @@ def find_company_relations(company_id: int) -> pd.DataFrame: connected_company_name = [] for _, row in companies_relations_df.iterrows(): - # print(companies_df.loc[companies_df["company_id"] == row["relation_id"]]["company_name"].values[0]) company_name.append( companies_df.loc[companies_df["company_id"] == row["relation_id"]][ "company_name" - ].values[0] + ].iloc[0] ) connected_company_name.append( companies_df.loc[ companies_df["company_id"] == row["company_relation_company2_id"] - ]["company_name"].values[0] + ]["company_name"].iloc[0] ) # print(company_name) @@ -54,7 +53,7 @@ def find_company_relations(company_id: int) -> pd.DataFrame: # Plotly figure -def networkGraph(company_id: int) -> go.Figure: +def network_graph(company_id: int) -> go.Figure: """_summary_. Args: @@ -63,14 +62,10 @@ def networkGraph(company_id: int) -> go.Figure: Returns: go.Figure: _description_ """ - # df = find_company_relations(test_company) edges = [] - for index, row in find_company_relations(company_id).iterrows(): + for _, row in find_company_relations(company_id).iterrows(): edges.append([row["company_name"], row["connected_company_name"]]) - # print(row["company_name"], row["connected_company_name"]) - # print(edges) - # edges = df[["relation_id","company_relation_company2_id"]] - # edges = [[EGDE_VAR, "B"], ["B", "C"], ["B", "D"]] + network_graph = nx.Graph() network_graph.add_edges_from(edges) pos = nx.spring_layout(network_graph) @@ -151,6 +146,6 @@ def networkx_component(company_id: int) -> html.Div: """ return html.Div( [ - dcc.Graph(id="my-graph", figure=networkGraph(company_id)), + dcc.Graph(id="my-graph", figure=network_graph(company_id)), ] ) diff --git a/src/aki_prj23_transparenzregister/ui/archive/networkx_dash_overall.py b/src/aki_prj23_transparenzregister/ui/archive/networkx_dash_overall.py index f196489..bbcc34d 100644 --- a/src/aki_prj23_transparenzregister/ui/archive/networkx_dash_overall.py +++ b/src/aki_prj23_transparenzregister/ui/archive/networkx_dash_overall.py @@ -34,18 +34,16 @@ def find_all_company_relations() -> pd.DataFrame: # print(companies_relations_df) for _, row in companies_relations_df.iterrows(): - # print(companies_df.loc[companies_df["company_id"] == row["relation_id"]]["company_name"].values[0]) - # print("TEst") company_name.append( companies_df.loc[companies_df["company_id"] == row["relation_id"]][ "company_name" - ].values[0] + ].iloc[0] ) connected_company_name.append( companies_df.loc[ companies_df["company_id"] == row["company_relation_company2_id"] - ]["company_name"].values[0] + ]["company_name"].iloc[0] ) # print(connected_company_name) @@ -58,7 +56,7 @@ def find_all_company_relations() -> pd.DataFrame: # Plotly figure -def networkGraph(EGDE_VAR: None) -> go.Figure: +def network_graph() -> go.Figure: """Create a NetworkX Graph. Args: @@ -67,15 +65,10 @@ def networkGraph(EGDE_VAR: None) -> go.Figure: Returns: go.Figure: _description_ """ - # find_all_company_relations() - edges = [] - for index, row in find_all_company_relations().iterrows(): + for _, row in find_all_company_relations().iterrows(): edges.append([row["company_name"], row["connected_company_name"]]) - # print(row["company_name"], row["connected_company_name"]) - # print(edges) - # edges = df[["relation_id","company_relation_company2_id"]] - # edges = [[EGDE_VAR, "B"], ["B", "C"], ["B", "D"]] + network_graph = nx.Graph() network_graph.add_edges_from(edges) pos = nx.spring_layout(network_graph) @@ -141,7 +134,6 @@ def networkGraph(EGDE_VAR: None) -> go.Figure: }, } - print(nx.eigenvector_centrality(network_graph)) measure_vector = {} network_metrics_df = pd.DataFrame() @@ -157,20 +149,11 @@ def networkGraph(EGDE_VAR: None) -> go.Figure: measure_vector = nx.closeness_centrality(network_graph) network_metrics_df["closeness"] = measure_vector.values() - # measure_vector = nx.pagerank(network_graph) - # network_metrics_df["pagerank"] = measure_vector.values() - - # measure_vector = nx.average_degree_connectivity(network_graph) - # network_metrics_df["average_degree"] = measure_vector.values() - print(network_metrics_df) - # figure return go.Figure(data=[edge_trace, node_trace], layout=layout) # Dash App - - app = Dash(__name__) app.title = "Dash Networkx" @@ -192,7 +175,7 @@ app.layout = html.Div( # Input('metric-dropdown', 'value'), [Input("EGDE_VAR", "value")], ) -def update_output(EGDE_VAR: None) -> go.Figure: +def update_output(edge_var: None) -> go.Figure: """Just Returns the go Figure of Plotly. Args: @@ -201,7 +184,7 @@ def update_output(EGDE_VAR: None) -> go.Figure: Returns: go.Figure: _description_ """ - return networkGraph(EGDE_VAR) + return network_graph(edge_var) if __name__ == "__main__": diff --git a/src/aki_prj23_transparenzregister/ui/archive/ui_elements.py b/src/aki_prj23_transparenzregister/ui/archive/ui_elements.py index c432e8a..512b75d 100644 --- a/src/aki_prj23_transparenzregister/ui/archive/ui_elements.py +++ b/src/aki_prj23_transparenzregister/ui/archive/ui_elements.py @@ -7,8 +7,8 @@ from dash import dash_table, dcc, html from sqlalchemy.engine import Engine from sqlalchemy.orm import Session -from aki_prj23_transparenzregister.utils.sql import entities from aki_prj23_transparenzregister.ui.archive.networkx_dash import networkx_component +from aki_prj23_transparenzregister.utils.sql import entities COLORS = { "light": "#edefef", @@ -363,6 +363,4 @@ def network_layout(selected_company_id: int) -> html: Returns: The html div to create the network tab of the company page. """ - selected_company_id return networkx_component(selected_company_id) - # return html.Div([f"Netzwerk von Unternehmen mit ID: {selected_company_id}"]) diff --git a/src/aki_prj23_transparenzregister/ui/company_elements.py b/src/aki_prj23_transparenzregister/ui/company_elements.py index 647805c..4563e76 100644 --- a/src/aki_prj23_transparenzregister/ui/company_elements.py +++ b/src/aki_prj23_transparenzregister/ui/company_elements.py @@ -15,6 +15,7 @@ from aki_prj23_transparenzregister.utils.networkx.network_base import initialize from aki_prj23_transparenzregister.utils.networkx.networkx_data import ( create_edge_and_node_list_for_company, find_company_relations, + get_relations_number_from_id, ) COLORS = { @@ -379,13 +380,37 @@ def network_layout(selected_company_id: int) -> html.Div: The html div to create the network tab of the company page. """ person_relations, company_relations = find_company_relations(selected_company_id) + + # get_all_metrics_from_id(selected_company_id) + get_relations_number_from_id(f"c_{selected_company_id}") + # Create Edge and Node List from data nodes, edges = create_edge_and_node_list_for_company(company_relations) - # Initialize the Network and receive the Graph and a DataFrame with Metrics - if nodes != {}: + # Initialize the Network and receive the Graph and a DataFrame with Metrics + if nodes != {}: graph, metrics = initialize_network(nodes=nodes, edges=edges) metric = "None" - figure = create_2d_graph(graph, nodes, edges, metrics, metric, layout="Spring", edge_annotation=True, node_annotation=False, edge_thickness=1) - - return html.Div( children=[dcc.Graph(figure=figure, id="company-graph", className="graph-style")]) - return html.Div([html.H3(f"Leider gibt es keine Verbindungen vom Unternehmen mit ID: {selected_company_id}")]) + figure = create_2d_graph( + graph, + nodes, + edges, + metrics, + metric, + layout="Spring", + edge_annotation=True, + node_annotation=False, + edge_thickness=1, + ) + return html.Div( + children=[ + dcc.Graph(figure=figure, id="company-graph", className="graph-style") + ] + ) + + return html.Div( + [ + html.H3( + f"Leider gibt es keine Verbindungen vom Unternehmen mit ID: {selected_company_id}" + ) + ] + ) diff --git a/src/aki_prj23_transparenzregister/ui/pages/home.py b/src/aki_prj23_transparenzregister/ui/pages/home.py index 7b45335..b445506 100644 --- a/src/aki_prj23_transparenzregister/ui/pages/home.py +++ b/src/aki_prj23_transparenzregister/ui/pages/home.py @@ -369,7 +369,7 @@ def update_graph_data( allow_duplicate=True, ) # @lru_cache(20) -def update_figure( +def update_figure( # noqa: PLR0913 selected_metric: str, switch_value: bool, # switch_node_annotaion_value: bool, diff --git a/src/aki_prj23_transparenzregister/utils/enum_types.py b/src/aki_prj23_transparenzregister/utils/enum_types.py index 59d6316..e3bc9e7 100644 --- a/src/aki_prj23_transparenzregister/utils/enum_types.py +++ b/src/aki_prj23_transparenzregister/utils/enum_types.py @@ -11,38 +11,6 @@ class SentimentLabel(MultiValueEnum): NEGATIVE = -1, "negative" NEUTRAL = 0, "neutral" - @staticmethod - def get_string_from_enum(value: int | None) -> str: - """Translates relation name into a RelationTypeEnum. - - If no translation can be found a warning is given. - - Args: - relation_name: The name of the relation to be translated. - - Returns: - The identified translation or None if no translation can be found. - """ - tmp = RelationTypeEnum(value) - if value is None: - raise ValueError("A relation type needs to be given.") - name = { - RelationTypeEnum.GESCHAEFTSFUEHRER: "Geschäftsführer", - RelationTypeEnum.KOMMANDITIST: "Kommanditist", - RelationTypeEnum.VORSTAND: "Vorstand", - RelationTypeEnum.PROKURIST: "Prokurist", - RelationTypeEnum.LIQUIDATOR: "Liquidator", - RelationTypeEnum.INHABER: "Inhaber", - RelationTypeEnum.PERSOENLICH_HAFTENDER_GESELLSCHAFTER: "Persönlich haftender Gesellschafter", - RelationTypeEnum.ORGANISATION: "Organisation", - RelationTypeEnum.PARTNER: "Partner", - RelationTypeEnum.DIREKTOR: "Direktor", - RelationTypeEnum.RECHTSNACHFOLGER: "Rechtsnachfolger", - }.get(tmp) - if name is not None: - return name - raise ValueError(f'Relation type "{value}" is not yet implemented!') - class FinancialKPIEnum(Enum): """Financial KPI keys.""" diff --git a/src/aki_prj23_transparenzregister/utils/networkx/network_3d.py b/src/aki_prj23_transparenzregister/utils/networkx/network_3d.py index 329056e..b2262be 100644 --- a/src/aki_prj23_transparenzregister/utils/networkx/network_3d.py +++ b/src/aki_prj23_transparenzregister/utils/networkx/network_3d.py @@ -5,7 +5,7 @@ import pandas as pd import plotly.graph_objects as go -def create_3d_graph( +def create_3d_graph( # noqa : PLR0913 graph: nx.Graph, nodes: dict, edges: list, diff --git a/src/aki_prj23_transparenzregister/utils/networkx/network_base.py b/src/aki_prj23_transparenzregister/utils/networkx/network_base.py index 4e86eb6..60bbe6b 100644 --- a/src/aki_prj23_transparenzregister/utils/networkx/network_base.py +++ b/src/aki_prj23_transparenzregister/utils/networkx/network_base.py @@ -34,5 +34,66 @@ def initialize_network(edges: list, nodes: dict) -> tuple[nx.Graph, pd.DataFrame metrics["pagerank"] = nx.pagerank(graph).values() metrics["category"] = nx.get_node_attributes(graph, "type").values() metrics["designation"] = nx.get_node_attributes(graph, "name").values() + metrics["id"] = nx.get_node_attributes(graph, "id").values() return graph, metrics + + +def initialize_network_with_reduced_metrics( + edges: list, nodes: dict +) -> tuple[nx.Graph, pd.DataFrame]: + """This Method creates a Network from the Framework NetworkX with the help of a Node and Edge List. Furthemore it creates a DataFrame with the most important Metrics. + + Args: + edges (list): List with the connections between Nodes. + nodes (dict): Dict with all Nodes. + + Returns: + Graph: Plotly Figure + Metrices: DataFrame with Metrics + """ + # create edge dataframe + df_edges = pd.DataFrame(edges, columns=["from", "to", "type"]) + graph = nx.from_pandas_edgelist( + df_edges, source="from", target="to", edge_attr="type" + ) + + # update node attributes from dataframe + nx.set_node_attributes(graph, nodes) + + # Create a DataFrame with all Metrics + metrics = pd.DataFrame( + columns=["degree", "eigenvector", "betweenness", "closeness", "pagerank"] + ) + # metrics["eigenvector"] = nx.eigenvector_centrality(graph).values() + metrics["degree"] = nx.degree_centrality(graph).values() + metrics["betweenness"] = nx.betweenness_centrality(graph).values() + metrics["closeness"] = nx.closeness_centrality(graph).values() + # metrics["pagerank"] = nx.pagerank(graph).values() + metrics["category"] = nx.get_node_attributes(graph, "type").values() + metrics["designation"] = nx.get_node_attributes(graph, "name").values() + metrics["id"] = nx.get_node_attributes(graph, "id").values() + + return graph, metrics + + +def initialize_network_without_metrics(edges: list, nodes: dict) -> nx.Graph: + """This Method creates a Network from the Framework NetworkX with the help of a Node and Edge List. Furthemore it creates a DataFrame with the most important Metrics. + + Args: + edges (list): List with the connections between Nodes. + nodes (dict): Dict with all Nodes. + + Returns: + Graph: Plotly Figure + """ + # create edge dataframe + df_edges = pd.DataFrame(edges, columns=["from", "to", "type"]) + graph = nx.from_pandas_edgelist( + df_edges, source="from", target="to", edge_attr="type" + ) + + # update node attributes from dataframe + nx.set_node_attributes(graph, nodes) + + return graph diff --git a/src/aki_prj23_transparenzregister/utils/networkx/networkx_data.py b/src/aki_prj23_transparenzregister/utils/networkx/networkx_data.py index 9322992..e2f3239 100644 --- a/src/aki_prj23_transparenzregister/utils/networkx/networkx_data.py +++ b/src/aki_prj23_transparenzregister/utils/networkx/networkx_data.py @@ -1,8 +1,15 @@ """Module to receive and filter Data for working with NetworkX.""" +from functools import lru_cache + +import networkx as nx import pandas as pd from sqlalchemy.orm import aliased from aki_prj23_transparenzregister.config.config_providers import JsonFileConfigProvider +from aki_prj23_transparenzregister.utils.networkx.network_base import ( + initialize_network_with_reduced_metrics, + initialize_network_without_metrics, +) from aki_prj23_transparenzregister.utils.sql import connector, entities from aki_prj23_transparenzregister.utils.sql.connector import get_session @@ -15,6 +22,9 @@ to_company = aliased(entities.Company, name="to_company") # Alias for Company table for the head company from_company = aliased(entities.Company, name="from_company") +COLOR_COMPANY = "blue" +COLOR_PERSON = "red" + def find_all_company_relations() -> pd.DataFrame: """_summary_. @@ -40,12 +50,12 @@ def find_all_company_relations() -> pd.DataFrame: company_name.append( companies_df.loc[companies_df["company_id"] == row["relation_id"]][ "company_name" - ].values[0] + ].iloc[0] ) connected_company_name.append( companies_df.loc[ companies_df["company_id"] == row["company_relation_company2_id"] - ]["company_name"].values[0] + ]["company_name"].iloc[0] ) companies_relations_df["company_name"] = company_name @@ -213,19 +223,16 @@ def create_edge_and_node_list( nodes: dict = {} edges: list = [] - COLOR_COMPANY = "blue" - COLOR_PERSON = "red" - # Iterate over person relations - for _index, row in person_relations.iterrows(): - if node := nodes.get(row["id_company"]) is None: + for _, row in person_relations.iterrows(): + if nodes.get(row["id_company"]) is None: nodes[row["id_company"]] = { "id": row["id_company"], "name": row["name_company"], "color": COLOR_COMPANY, "type": "company", } - if node := nodes.get(row["id_person"]) is None: + if nodes.get(row["id_person"]) is None: nodes[row["id_person"]] = { "id": row["id_person"], "name": str(row["firstname"]) + " " + str(row["lastname"]), @@ -241,15 +248,15 @@ def create_edge_and_node_list( } ) - for _index, row in company_relations.iterrows(): - if node := nodes.get(row["id_company_from"]) is None: + for _, row in company_relations.iterrows(): + if nodes.get(row["id_company_from"]) is None: # noqa nodes[row["id_company_from"]] = { "id": row["id_company_from"], "name": row["name_company_from"], "color": COLOR_COMPANY, "type": "company", } - if node := nodes.get(row["id_company_to"]) is None: + if nodes.get(row["id_company_to"]) is None: nodes[row["id_company_to"]] = { "id": row["id_company_to"], "name": row["name_company_to"], @@ -361,7 +368,7 @@ def create_edge_and_node_list_for_company( return nodes, edges -def get_all_metrics_from_id(company_id: int) -> pd.DataFrame: +def get_all_metrics_from_id(company_id: int) -> pd.Series: """_summary_ Args: @@ -370,10 +377,20 @@ def get_all_metrics_from_id(company_id: int) -> pd.DataFrame: Returns: pd.DataFrame: _description_ """ - return pd.DataFrame() + # Get Data + person_df = get_all_person_relations() + company_df = get_all_company_relations() + + # Create Edge and Node List from data + nodes_tmp, edges_tmp = create_edge_and_node_list(person_df, company_df) + graph, metrics = initialize_network_with_reduced_metrics( + nodes=nodes_tmp, edges=edges_tmp + ) + return metrics.loc[metrics["id"] == company_id].iloc[0] -def get_relations_number_from_id(company_id: int) -> tuple[int, int, int]: +@lru_cache +def get_relations_number_from_id(id: str) -> tuple[int, int, int]: """_summary_ Args: @@ -382,4 +399,29 @@ def get_relations_number_from_id(company_id: int) -> tuple[int, int, int]: Returns: tuple[int,int,int]: _description_ """ - return (1, 2, 3) + # Get Data + person_df = get_all_person_relations() + company_df = get_all_company_relations() + + # Create Edge and Node List from data + nodes_tmp, edges_tmp = create_edge_and_node_list(person_df, company_df) + + graph = initialize_network_without_metrics(nodes=nodes_tmp, edges=edges_tmp) + + neighbors = nx.all_neighbors(graph, id) + + relations_lv1 = set(neighbors) + relations_lv2 = set() + relations_lv3 = set() + + for node in relations_lv1: + relations_lv2 |= set(nx.all_neighbors(graph, node)) + + relations_lv2.discard(id) + + for sub_node in relations_lv2: + relations_lv3 |= set(nx.all_neighbors(graph, sub_node)) + + relations_lv2.difference(relations_lv3) + + return (len(relations_lv1), len(relations_lv2), len(relations_lv3)) diff --git a/src/aki_prj23_transparenzregister/utils/sql/connector.py b/src/aki_prj23_transparenzregister/utils/sql/connector.py index 7208e0f..7b95985 100644 --- a/src/aki_prj23_transparenzregister/utils/sql/connector.py +++ b/src/aki_prj23_transparenzregister/utils/sql/connector.py @@ -45,7 +45,7 @@ def get_engine(conn_args: SQLConnectionString) -> Engine: return sa.create_engine( str(conn_args), connect_args={"check_same_thread": True}, - poolclass=SingletonThreadPool + poolclass=SingletonThreadPool, ) raise TypeError("The type of the configuration is invalid.") diff --git a/tests/ui/networkx_dash_test.py b/tests/ui/networkx_dash_test.py index baa6f71..c9b48c2 100644 --- a/tests/ui/networkx_dash_test.py +++ b/tests/ui/networkx_dash_test.py @@ -2,6 +2,6 @@ from aki_prj23_transparenzregister.ui import networkx_dash -def networkGraph(Edges: None) -> None: +def network_graph(Edges: None) -> None: """Checks if an import co company_stats_dash can be made.""" assert networkx_dash is not None diff --git a/tests/utils/networkx/network_2d_test.py b/tests/utils/networkx/network_2d_test.py index ed2fd00..88199ed 100644 --- a/tests/utils/networkx/network_2d_test.py +++ b/tests/utils/networkx/network_2d_test.py @@ -1,14 +1,11 @@ """Test the initialize Network function.""" import datetime -from unittest import TestCase import networkx as nx import pandas as pd from aki_prj23_transparenzregister.utils.networkx.network_base import initialize_network -tc = TestCase() - def test_initialize_network() -> None: edges: list = [ @@ -32,7 +29,7 @@ def test_initialize_network() -> None: graph, metrics = initialize_network(edges=edges, nodes=nodes) assert type(graph) is nx.Graph assert type(metrics) is pd.DataFrame - tc.assertListEqual( - list(metrics.columns), - ["degree", "eigenvector", "betweeness", "closeness", "pagerank"], + assert ( + list(metrics.columns) + == ["degree", "eigenvector", "betweeness", "closeness", "pagerank"], ) diff --git a/tests/utils/networkx/network_base_test.py b/tests/utils/networkx/network_base_test.py index d040182..2641cef 100644 --- a/tests/utils/networkx/network_base_test.py +++ b/tests/utils/networkx/network_base_test.py @@ -1,16 +1,14 @@ """Test the initialize Network function.""" import datetime -from unittest import TestCase import networkx as nx import pandas as pd +import pytest from aki_prj23_transparenzregister.utils.networkx.network_base import initialize_network -tc = TestCase() -import pytest -@pytest.mark.tim +@pytest.mark.tim() def test_initialize_network() -> None: edges: list = [ {"from": "p_545", "to": "c_53", "type": "HAFTENDER_GESELLSCHAFTER"}, @@ -33,7 +31,7 @@ def test_initialize_network() -> None: graph, metrics = initialize_network(edges=edges, nodes=nodes) assert isinstance(graph, nx.Graph) assert isinstance(metrics, pd.DataFrame) - tc.assertListEqual( - list(metrics.columns), - ["degree", "eigenvector", "betweeness", "closeness", "pagerank"], + assert ( + list(metrics.columns) + == ["degree", "eigenvector", "betweeness", "closeness", "pagerank"], )