Added an experimental filter for minimally networked persons

This commit is contained in:
2024-01-11 00:24:37 +01:00
parent adc439d8f6
commit 1b15be514d
5 changed files with 24 additions and 11 deletions

View File

@ -1,4 +1,5 @@
"""Dash elements.""" """Dash elements."""
import os
import pandas as pd import pandas as pd
import plotly.graph_objs as go import plotly.graph_objs as go
@ -60,7 +61,7 @@ def get_finance_data(session: Session) -> pd.DataFrame:
@cached( # type: ignore @cached( # type: ignore
cache=TTLCache(maxsize=1, ttl=300), cache=TTLCache(maxsize=1, ttl=int(os.getenv("PYTHON_CACHE_TTL", "3600"))),
key=lambda session: 0 if session is None else str(session.bind), key=lambda session: 0 if session is None else str(session.bind),
) )
def get_options(session: Session | None) -> dict[int, str]: def get_options(session: Session | None) -> dict[int, str]:

View File

@ -106,7 +106,7 @@ def get_finance_data_of_one_company(session: Session, company_id: int) -> pd.Dat
@cached( # type: ignore @cached( # type: ignore
cache=TTLCache(maxsize=1, ttl=300), cache=TTLCache(maxsize=1, ttl=int(os.getenv("PYTHON_CACHE_TTL", "3600"))),
key=lambda session: 0 if session is None else str(session.bind), key=lambda session: 0 if session is None else str(session.bind),
) )
def get_options(session: Session | None) -> dict[int, str]: def get_options(session: Session | None) -> dict[int, str]:

View File

@ -56,7 +56,7 @@ dash.register_page(
def person_relation_type_filter() -> list[str]: def person_relation_type_filter() -> list[str]:
"""Returns a Numpy Array of String with Person relation types.""" """Returns a Numpy Array of String with Person relation types."""
return get_all_person_relations()["relation_type"].unique().tolist() return get_all_person_relations(False)["relation_type"].unique().tolist()
def company_relation_type_filter() -> list[str]: def company_relation_type_filter() -> list[str]:
@ -408,9 +408,10 @@ def layout() -> html:
def update_graph_data( def update_graph_data(
person_relation_type: frozenset[str] | None = None, person_relation_type: frozenset[str] | None = None,
company_relation_type: frozenset[str] | None = None, company_relation_type: frozenset[str] | None = None,
drop_min_person_links: bool = False,
) -> tuple[nx.Graph, pd.DataFrame, dict, list]: ) -> tuple[nx.Graph, pd.DataFrame, dict, list]:
"""_summary_.""" """_summary_."""
person_df = get_all_person_relations() person_df = get_all_person_relations(drop_min_person_links)
company_df = get_all_company_relations() company_df = get_all_company_relations()
person_relation = filter_relation_type(person_df, person_relation_type) person_relation = filter_relation_type(person_df, person_relation_type)

View File

@ -1,8 +1,10 @@
"""Module to receive and filter Data for working with NetworkX.""" """Module to receive and filter Data for working with NetworkX."""
import os
from functools import lru_cache from functools import lru_cache
import networkx as nx import networkx as nx
import pandas as pd import pandas as pd
from cachetools import TTLCache, cached
from sqlalchemy.orm import aliased from sqlalchemy.orm import aliased
from aki_prj23_transparenzregister.ui.session_handler import SessionHandler from aki_prj23_transparenzregister.ui.session_handler import SessionHandler
@ -24,6 +26,10 @@ COLOR_COMPANY = "#006250"
COLOR_PERSON = "#ff5200" COLOR_PERSON = "#ff5200"
@cached( # type: ignore
cache=TTLCache(maxsize=100, ttl=int(os.getenv("PYTHON_CACHE_TTL", "3600"))),
key=lambda session, company_id: hash((company_id, str(session.bind))),
)
def find_all_company_relations() -> pd.DataFrame: def find_all_company_relations() -> pd.DataFrame:
"""_summary_. """_summary_.
@ -64,7 +70,7 @@ def find_all_company_relations() -> pd.DataFrame:
def get_all_company_relations() -> pd.DataFrame: def get_all_company_relations() -> pd.DataFrame:
"""This Methods makes a Database Request for all Companies and their relations, modifies the ID Column and returns the Result as an DataFrame. """Makes a Database Request for all Companies and their relations, modifies the ID Column and returns the Result as an DataFrame.
Returns: Returns:
DataFrame: DataFrame with all Relations between Companies. DataFrame: DataFrame with all Relations between Companies.
@ -102,7 +108,11 @@ def get_all_company_relations() -> pd.DataFrame:
return company_relations return company_relations
def get_all_person_relations() -> pd.DataFrame: @cached( # type: ignore
cache=TTLCache(maxsize=100, ttl=int(os.getenv("PYTHON_CACHE_TTL", "3600"))),
key=lambda session, company_id: hash((company_id, str(session.bind))),
)
def get_all_person_relations(drop_min_person_links: bool) -> pd.DataFrame:
"""These method makes a Database Request for all Persons and their relations, modifies the ID Column and returns the Result as an DataFrame. """These method makes a Database Request for all Persons and their relations, modifies the ID Column and returns the Result as an DataFrame.
Returns: Returns:
@ -137,7 +147,8 @@ def get_all_person_relations() -> pd.DataFrame:
person_relations["id_person"] = person_relations["id_person"].apply( person_relations["id_person"] = person_relations["id_person"].apply(
lambda x: f"p_{x}" lambda x: f"p_{x}"
) )
if drop_min_person_links:
return person_relations.groupby("id_person").filter(lambda x: len(x) > 1)
return person_relations return person_relations
@ -379,7 +390,7 @@ def get_all_metrics_from_id(company_id: int) -> pd.Series:
pd.DataFrame: _description_ pd.DataFrame: _description_
""" """
# Get Data # Get Data
person_df = get_all_person_relations() person_df = get_all_person_relations(False)
company_df = get_all_company_relations() company_df = get_all_company_relations()
# Create Edge and Node List from data # Create Edge and Node List from data
@ -416,7 +427,7 @@ def get_relations_number_from_id(id: str) -> tuple[int, int, int]:
tuple[int,int,int]: _description_ tuple[int,int,int]: _description_
""" """
# Get Data # Get Data
person_df = get_all_person_relations() person_df = get_all_person_relations(False)
company_df = get_all_company_relations() company_df = get_all_company_relations()
# Create Edge and Node List from data # Create Edge and Node List from data
@ -457,7 +468,7 @@ def get_relations_until_level_3(id: str) -> tuple[dict, list]:
tuple[dict, list]: nodes, edges tuple[dict, list]: nodes, edges
""" """
# Get Data # Get Data
person_df = get_all_person_relations() person_df = get_all_person_relations(False)
company_df = get_all_company_relations() company_df = get_all_company_relations()
# Create Edge and Node List from data # Create Edge and Node List from data

View File

@ -58,7 +58,7 @@ def extract_domain(url: str) -> str:
return urlparse(url).netloc return urlparse(url).netloc
@cached(cache=TTLCache(5000, ttl=300), key=lambda ner_name, _: hash(ner_name)) # type: ignore @cached(cache=TTLCache(5000, ttl=int(os.getenv("PYTHON_CACHE_TTL", "3600"))), key=lambda ner_name, _: hash(ner_name)) # type: ignore
def get_company_id(ner_name: str, db: Session) -> int | None: def get_company_id(ner_name: str, db: Session) -> int | None:
"""Finds the company id to an entity found via NER. """Finds the company id to an entity found via NER.