Added an experimental filter for minimally networked persons

This commit is contained in:
2024-01-11 00:24:37 +01:00
parent adc439d8f6
commit 1b15be514d
5 changed files with 24 additions and 11 deletions

View File

@ -1,4 +1,5 @@
"""Dash elements."""
import os
import pandas as pd
import plotly.graph_objs as go
@ -60,7 +61,7 @@ def get_finance_data(session: Session) -> pd.DataFrame:
@cached( # type: ignore
cache=TTLCache(maxsize=1, ttl=300),
cache=TTLCache(maxsize=1, ttl=int(os.getenv("PYTHON_CACHE_TTL", "3600"))),
key=lambda session: 0 if session is None else str(session.bind),
)
def get_options(session: Session | None) -> dict[int, str]:

View File

@ -106,7 +106,7 @@ def get_finance_data_of_one_company(session: Session, company_id: int) -> pd.Dat
@cached( # type: ignore
cache=TTLCache(maxsize=1, ttl=300),
cache=TTLCache(maxsize=1, ttl=int(os.getenv("PYTHON_CACHE_TTL", "3600"))),
key=lambda session: 0 if session is None else str(session.bind),
)
def get_options(session: Session | None) -> dict[int, str]:

View File

@ -56,7 +56,7 @@ dash.register_page(
def person_relation_type_filter() -> list[str]:
"""Returns a Numpy Array of String with Person relation types."""
return get_all_person_relations()["relation_type"].unique().tolist()
return get_all_person_relations(False)["relation_type"].unique().tolist()
def company_relation_type_filter() -> list[str]:
@ -408,9 +408,10 @@ def layout() -> html:
def update_graph_data(
person_relation_type: frozenset[str] | None = None,
company_relation_type: frozenset[str] | None = None,
drop_min_person_links: bool = False,
) -> tuple[nx.Graph, pd.DataFrame, dict, list]:
"""_summary_."""
person_df = get_all_person_relations()
person_df = get_all_person_relations(drop_min_person_links)
company_df = get_all_company_relations()
person_relation = filter_relation_type(person_df, person_relation_type)

View File

@ -1,8 +1,10 @@
"""Module to receive and filter Data for working with NetworkX."""
import os
from functools import lru_cache
import networkx as nx
import pandas as pd
from cachetools import TTLCache, cached
from sqlalchemy.orm import aliased
from aki_prj23_transparenzregister.ui.session_handler import SessionHandler
@ -24,6 +26,10 @@ COLOR_COMPANY = "#006250"
COLOR_PERSON = "#ff5200"
@cached( # type: ignore
cache=TTLCache(maxsize=100, ttl=int(os.getenv("PYTHON_CACHE_TTL", "3600"))),
key=lambda session, company_id: hash((company_id, str(session.bind))),
)
def find_all_company_relations() -> pd.DataFrame:
"""_summary_.
@ -64,7 +70,7 @@ def find_all_company_relations() -> pd.DataFrame:
def get_all_company_relations() -> pd.DataFrame:
"""This Methods makes a Database Request for all Companies and their relations, modifies the ID Column and returns the Result as an DataFrame.
"""Makes a Database Request for all Companies and their relations, modifies the ID Column and returns the Result as an DataFrame.
Returns:
DataFrame: DataFrame with all Relations between Companies.
@ -102,7 +108,11 @@ def get_all_company_relations() -> pd.DataFrame:
return company_relations
def get_all_person_relations() -> pd.DataFrame:
@cached( # type: ignore
cache=TTLCache(maxsize=100, ttl=int(os.getenv("PYTHON_CACHE_TTL", "3600"))),
key=lambda session, company_id: hash((company_id, str(session.bind))),
)
def get_all_person_relations(drop_min_person_links: bool) -> pd.DataFrame:
"""These method makes a Database Request for all Persons and their relations, modifies the ID Column and returns the Result as an DataFrame.
Returns:
@ -137,7 +147,8 @@ def get_all_person_relations() -> pd.DataFrame:
person_relations["id_person"] = person_relations["id_person"].apply(
lambda x: f"p_{x}"
)
if drop_min_person_links:
return person_relations.groupby("id_person").filter(lambda x: len(x) > 1)
return person_relations
@ -379,7 +390,7 @@ def get_all_metrics_from_id(company_id: int) -> pd.Series:
pd.DataFrame: _description_
"""
# Get Data
person_df = get_all_person_relations()
person_df = get_all_person_relations(False)
company_df = get_all_company_relations()
# Create Edge and Node List from data
@ -416,7 +427,7 @@ def get_relations_number_from_id(id: str) -> tuple[int, int, int]:
tuple[int,int,int]: _description_
"""
# Get Data
person_df = get_all_person_relations()
person_df = get_all_person_relations(False)
company_df = get_all_company_relations()
# Create Edge and Node List from data
@ -457,7 +468,7 @@ def get_relations_until_level_3(id: str) -> tuple[dict, list]:
tuple[dict, list]: nodes, edges
"""
# Get Data
person_df = get_all_person_relations()
person_df = get_all_person_relations(False)
company_df = get_all_company_relations()
# Create Edge and Node List from data

View File

@ -58,7 +58,7 @@ def extract_domain(url: str) -> str:
return urlparse(url).netloc
@cached(cache=TTLCache(5000, ttl=300), key=lambda ner_name, _: hash(ner_name)) # type: ignore
@cached(cache=TTLCache(5000, ttl=int(os.getenv("PYTHON_CACHE_TTL", "3600"))), key=lambda ner_name, _: hash(ner_name)) # type: ignore
def get_company_id(ner_name: str, db: Session) -> int | None:
"""Finds the company id to an entity found via NER.