mirror of
https://github.com/fhswf/aki_prj23_transparenzregister.git
synced 2025-06-21 23:43:55 +02:00
A lot of spelling (#512)
This commit is contained in:
@ -48,21 +48,21 @@ class EntityPipeline:
|
|||||||
# spaCy
|
# spaCy
|
||||||
if ner_method == "spacy":
|
if ner_method == "spacy":
|
||||||
ner_service_instance = ner_service.NerAnalysisService(
|
ner_service_instance = ner_service.NerAnalysisService(
|
||||||
use_spacy=True, use_transformer=False, use_companylist=False
|
use_spacy=True, use_transformer=False, use_company_list=False
|
||||||
)
|
)
|
||||||
ner_service_func = ner_service_instance.ner_spacy
|
ner_service_func = ner_service_instance.ner_spacy
|
||||||
|
|
||||||
# company list
|
# company list
|
||||||
elif ner_method == "company_list":
|
elif ner_method == "company_list":
|
||||||
ner_service_instance = ner_service.NerAnalysisService(
|
ner_service_instance = ner_service.NerAnalysisService(
|
||||||
use_spacy=False, use_transformer=False, use_companylist=True
|
use_spacy=False, use_transformer=False, use_company_list=True
|
||||||
)
|
)
|
||||||
ner_service_func = ner_service_instance.ner_company_list
|
ner_service_func = ner_service_instance.ner_company_list
|
||||||
|
|
||||||
# transformer
|
# transformer
|
||||||
elif ner_method == "transformer":
|
elif ner_method == "transformer":
|
||||||
ner_service_instance = ner_service.NerAnalysisService(
|
ner_service_instance = ner_service.NerAnalysisService(
|
||||||
use_spacy=False, use_transformer=True, use_companylist=False
|
use_spacy=False, use_transformer=True, use_company_list=False
|
||||||
)
|
)
|
||||||
ner_service_func = ner_service_instance.ner_transformer
|
ner_service_func = ner_service_instance.ner_transformer
|
||||||
else:
|
else:
|
||||||
|
@ -15,14 +15,14 @@ class NerAnalysisService:
|
|||||||
self,
|
self,
|
||||||
use_spacy: bool = False,
|
use_spacy: bool = False,
|
||||||
use_transformer: bool = False,
|
use_transformer: bool = False,
|
||||||
use_companylist: bool = False,
|
use_company_list: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Method to check which sentiment model is chosen."""
|
"""Method to check which sentiment model is chosen."""
|
||||||
if use_spacy:
|
if use_spacy:
|
||||||
self.init_spacy()
|
self.init_spacy()
|
||||||
if use_transformer:
|
if use_transformer:
|
||||||
self.init_transformer()
|
self.init_transformer()
|
||||||
if use_companylist:
|
if use_company_list:
|
||||||
self.init_companylist()
|
self.init_companylist()
|
||||||
|
|
||||||
def init_spacy(self) -> None:
|
def init_spacy(self) -> None:
|
||||||
|
@ -78,15 +78,14 @@ class SentimentAnalysisService:
|
|||||||
"""Sentiment Analytics with Spacy.
|
"""Sentiment Analytics with Spacy.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
doc: a document which is processed with spacy
|
doc: a document which is processed with spacy
|
||||||
docAttrib: which attribute of the document has to be processed: text or title
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
label: positive, negative, neutral.
|
label: positive, negative, neutral.
|
||||||
"""
|
"""
|
||||||
# set limits for sentiments
|
# set limits for sentiments
|
||||||
_upperlimit = 0.1
|
_upper_limit = 0.1
|
||||||
_lowerlimit = -0.1
|
_lower_limit = -0.1
|
||||||
|
|
||||||
_doc = self.nlp(doc)
|
_doc = self.nlp(doc)
|
||||||
_score = None
|
_score = None
|
||||||
@ -108,9 +107,9 @@ class SentimentAnalysisService:
|
|||||||
# Normalize the score to the range 0..1
|
# Normalize the score to the range 0..1
|
||||||
_normalized_score = (_pos - abs(_neg)) / _max_score if _max_score > 0 else 0
|
_normalized_score = (_pos - abs(_neg)) / _max_score if _max_score > 0 else 0
|
||||||
|
|
||||||
if _normalized_score > _upperlimit:
|
if _normalized_score > _upper_limit:
|
||||||
_sent = "positive"
|
_sent = "positive"
|
||||||
elif _normalized_score < _lowerlimit:
|
elif _normalized_score < _lower_limit:
|
||||||
_sent = "negative"
|
_sent = "negative"
|
||||||
else:
|
else:
|
||||||
_sent = "neutral"
|
_sent = "neutral"
|
||||||
|
@ -26,7 +26,7 @@ from aki_prj23_transparenzregister.utils.mongo.connector import MongoConnector
|
|||||||
def cli() -> None: # pragma: no cover
|
def cli() -> None: # pragma: no cover
|
||||||
"""CLI entry point."""
|
"""CLI entry point."""
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
prog="Transparenzregister enriching companies with mising financial data",
|
prog="Transparenzregister enriching companies with missing financial data",
|
||||||
description="Filters all raw companies with missing financial info from the MongoDB and enriches them with yearly result data from the Bundesanzeiger.",
|
description="Filters all raw companies with missing financial info from the MongoDB and enriches them with yearly result data from the Bundesanzeiger.",
|
||||||
epilog="Example: enrich-company-financials --log-level ERROR --log-path print.log",
|
epilog="Example: enrich-company-financials --log-level ERROR --log-path print.log",
|
||||||
)
|
)
|
||||||
|
@ -77,15 +77,15 @@ def main(config_provider: ConfigProvider) -> int:
|
|||||||
logger.error("Error while fetching news from Handelsblatt")
|
logger.error("Error while fetching news from Handelsblatt")
|
||||||
news_handelsblatt = []
|
news_handelsblatt = []
|
||||||
|
|
||||||
news_tageschau = tagesschau.get_news_for_category()
|
news_tagesschau = tagesschau.get_news_for_category()
|
||||||
if news_tageschau is None:
|
if news_tagesschau is None:
|
||||||
logger.error("Error while fetching news from Tagesschau")
|
logger.error("Error while fetching news from Tagesschau")
|
||||||
news_tageschau = []
|
news_tagesschau = []
|
||||||
|
|
||||||
logger.info(f"Found {len(news_handelsblatt)} news articles from Handelsblatt")
|
logger.info(f"Found {len(news_handelsblatt)} news articles from Handelsblatt")
|
||||||
logger.info(f"Found {len(news_tageschau)} news articles from Tagesschau")
|
logger.info(f"Found {len(news_tagesschau)} news articles from Tagesschau")
|
||||||
|
|
||||||
news_joined = news_handelsblatt + news_tageschau
|
news_joined = news_handelsblatt + news_tagesschau
|
||||||
|
|
||||||
count_new_documents = 0
|
count_new_documents = 0
|
||||||
count_duplicate_documents = 0
|
count_duplicate_documents = 0
|
||||||
|
@ -25,7 +25,7 @@ from aki_prj23_transparenzregister.utils.logger_config import (
|
|||||||
|
|
||||||
|
|
||||||
def load_schedule(schedule_file: str) -> dict:
|
def load_schedule(schedule_file: str) -> dict:
|
||||||
"""Load scheudle data from file.
|
"""Load schedule data from file.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
dict: Schedule data
|
dict: Schedule data
|
||||||
@ -52,7 +52,7 @@ def cli() -> None: # pragma: no cover
|
|||||||
"""CLI entry point."""
|
"""CLI entry point."""
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
prog="Transparenzregister Company ingestion",
|
prog="Transparenzregister Company ingestion",
|
||||||
description="Ingests all missing companies and enriches them with finandcial data - runs on scheulde.",
|
description="Ingests all missing companies and enriches them with financial data - runs on schedule.",
|
||||||
epilog="Example: ingest --log-level ERROR --log-path print.log",
|
epilog="Example: ingest --log-level ERROR --log-path print.log",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
@ -73,7 +73,7 @@ def cli() -> None: # pragma: no cover
|
|||||||
|
|
||||||
# Schedule tasks or resume scheduling based on last execution times
|
# Schedule tasks or resume scheduling based on last execution times
|
||||||
every(6).hours.do(fetch_news.main, config_provider).tag("fetch_news")
|
every(6).hours.do(fetch_news.main, config_provider).tag("fetch_news")
|
||||||
every(3).hours.do(main, config_provider).tag("missing_compnies_and_financials")
|
every(3).hours.do(main, config_provider).tag("missing_companies_and_financials")
|
||||||
|
|
||||||
# Run the scheduler in a persistent loops
|
# Run the scheduler in a persistent loops
|
||||||
while True:
|
while True:
|
||||||
|
@ -20,7 +20,7 @@ from aki_prj23_transparenzregister.config.config_template import (
|
|||||||
HELP_TEXT_CONFIG: Final[str] = (
|
HELP_TEXT_CONFIG: Final[str] = (
|
||||||
"Database configuration. "
|
"Database configuration. "
|
||||||
"Either give the paths to a *.json containing the secrets. "
|
"Either give the paths to a *.json containing the secrets. "
|
||||||
"Alternativly specify the use of enviromental vairables by entering the ENV or the einviromental prefix ending with a '_'."
|
"Alternatively specify the use of environmental vairables by entering the ENV or the environmental prefix ending with a '_'."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -20,5 +20,5 @@ def add_auth(app: Dash) -> None:
|
|||||||
return
|
return
|
||||||
logger.info("The password protection is not or only partially configured!")
|
logger.info("The password protection is not or only partially configured!")
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"The enviromental variables PYTHON_DASH_LOGIN_USERNAME and PYTHON_DASH_LOGIN_PW should be used to activate this feature."
|
"The environmental variables PYTHON_DASH_LOGIN_USERNAME and PYTHON_DASH_LOGIN_PW should be used to activate this feature."
|
||||||
)
|
)
|
||||||
|
@ -25,7 +25,7 @@ class BaseNewsExtractor(metaclass=abc.ABCMeta):
|
|||||||
category (str): News category to retrieve.
|
category (str): News category to retrieve.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
list[News] | None: List of news or None if an error occured.
|
list[News] | None: List of news or None if an error occurred.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
"""Tageschau API news extractor."""
|
"""Tagesschau API news extractor."""
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
@ -10,7 +10,7 @@ from aki_prj23_transparenzregister.utils.data_extraction.news.base import (
|
|||||||
|
|
||||||
|
|
||||||
class TagesschauAPI(BaseNewsExtractor):
|
class TagesschauAPI(BaseNewsExtractor):
|
||||||
"""Tageschau API news extractor."""
|
"""Tagesschau API news extractor."""
|
||||||
|
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
"""Constructor."""
|
"""Constructor."""
|
||||||
@ -18,7 +18,7 @@ class TagesschauAPI(BaseNewsExtractor):
|
|||||||
|
|
||||||
@logger.catch(reraise=True)
|
@logger.catch(reraise=True)
|
||||||
def get_news_for_category(self, category: str = "wirtschaft") -> list[News] | None:
|
def get_news_for_category(self, category: str = "wirtschaft") -> list[News] | None:
|
||||||
"""Retrieve news for the given category from the Tageschau API.
|
"""Retrieve news for the given category from the Tagesschau API.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
category (str, optional): Category to search for. Defaults to "wirtschaft".
|
category (str, optional): Category to search for. Defaults to "wirtschaft".
|
||||||
|
@ -134,7 +134,7 @@ class BaseTransformer(metaclass=abc.ABCMeta):
|
|||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def parse_date_of_birth(self, data: dict) -> str | None:
|
def parse_date_of_birth(self, data: dict) -> str | None:
|
||||||
"""Retreives the date of birth from a stakeholder entry if possible.
|
"""Retrieves the date of birth from a stakeholder entry if possible.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
data (dict): Stakeholder data
|
data (dict): Stakeholder data
|
||||||
|
@ -24,7 +24,7 @@ from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.tr
|
|||||||
normalize_street,
|
normalize_street,
|
||||||
)
|
)
|
||||||
from aki_prj23_transparenzregister.utils.string_tools import (
|
from aki_prj23_transparenzregister.utils.string_tools import (
|
||||||
remove_traling_and_leading_quotes,
|
remove_trailing_and_leading_quotes,
|
||||||
transform_date_to_iso,
|
transform_date_to_iso,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -33,7 +33,7 @@ class V1_Transformer(BaseTransformer): # noqa: N801
|
|||||||
"""Transformer for data exports from Unternehmensregister (v1)."""
|
"""Transformer for data exports from Unternehmensregister (v1)."""
|
||||||
|
|
||||||
def parse_date_of_birth(self, data: dict) -> str | None:
|
def parse_date_of_birth(self, data: dict) -> str | None:
|
||||||
"""Retreives the date of birth from a stakeholder entry if possible.
|
"""Retrieves the date of birth from a stakeholder entry if possible.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
data (dict): Stakeholder data
|
data (dict): Stakeholder data
|
||||||
@ -64,7 +64,7 @@ class V1_Transformer(BaseTransformer): # noqa: N801
|
|||||||
):
|
):
|
||||||
return CompanyToCompanyRelationship(
|
return CompanyToCompanyRelationship(
|
||||||
**{ # type: ignore
|
**{ # type: ignore
|
||||||
"name": remove_traling_and_leading_quotes(
|
"name": remove_trailing_and_leading_quotes(
|
||||||
data["Beteiligter"]["Natuerliche_Person"]["Voller_Name"][
|
data["Beteiligter"]["Natuerliche_Person"]["Voller_Name"][
|
||||||
"Nachname"
|
"Nachname"
|
||||||
]
|
]
|
||||||
@ -130,7 +130,7 @@ class V1_Transformer(BaseTransformer): # noqa: N801
|
|||||||
"role": RelationshipRoleEnum(
|
"role": RelationshipRoleEnum(
|
||||||
data["Rolle"]["Rollenbezeichnung"]["content"]
|
data["Rolle"]["Rollenbezeichnung"]["content"]
|
||||||
),
|
),
|
||||||
"name": remove_traling_and_leading_quotes(
|
"name": remove_trailing_and_leading_quotes(
|
||||||
data["Beteiligter"]["Organisation"]["Bezeichnung"][
|
data["Beteiligter"]["Organisation"]["Bezeichnung"][
|
||||||
"Bezeichnung_Aktuell"
|
"Bezeichnung_Aktuell"
|
||||||
]
|
]
|
||||||
@ -213,7 +213,7 @@ class V1_Transformer(BaseTransformer): # noqa: N801
|
|||||||
name = data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"]["Beteiligung"][0][
|
name = data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"]["Beteiligung"][0][
|
||||||
"Beteiligter"
|
"Beteiligter"
|
||||||
]["Organisation"]["Bezeichnung"]["Bezeichnung_Aktuell"]
|
]["Organisation"]["Bezeichnung"]["Bezeichnung_Aktuell"]
|
||||||
return remove_traling_and_leading_quotes(name)
|
return remove_trailing_and_leading_quotes(name)
|
||||||
|
|
||||||
def map_rechtsform(self, company_name: str, data: dict) -> CompanyTypeEnum | None:
|
def map_rechtsform(self, company_name: str, data: dict) -> CompanyTypeEnum | None:
|
||||||
"""Extracts the company type from a given Unternehmensregister export.
|
"""Extracts the company type from a given Unternehmensregister export.
|
||||||
|
@ -28,7 +28,7 @@ from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.tr
|
|||||||
RoleMapper,
|
RoleMapper,
|
||||||
)
|
)
|
||||||
from aki_prj23_transparenzregister.utils.string_tools import (
|
from aki_prj23_transparenzregister.utils.string_tools import (
|
||||||
remove_traling_and_leading_quotes,
|
remove_trailing_and_leading_quotes,
|
||||||
transform_date_to_iso,
|
transform_date_to_iso,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -37,7 +37,7 @@ class V3_Transformer(BaseTransformer): # noqa: N801
|
|||||||
"""Transformer for data exports from Unternehmensregister (v3)."""
|
"""Transformer for data exports from Unternehmensregister (v3)."""
|
||||||
|
|
||||||
def parse_date_of_birth(self, data: dict) -> str | None:
|
def parse_date_of_birth(self, data: dict) -> str | None:
|
||||||
"""Retreives the date of birth from a stakeholder entry if possible.
|
"""Retrieves the date of birth from a stakeholder entry if possible.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
data (dict): Stakeholder data
|
data (dict): Stakeholder data
|
||||||
@ -89,7 +89,7 @@ class V3_Transformer(BaseTransformer): # noqa: N801
|
|||||||
):
|
):
|
||||||
return CompanyToCompanyRelationship(
|
return CompanyToCompanyRelationship(
|
||||||
**{ # type: ignore
|
**{ # type: ignore
|
||||||
"name": remove_traling_and_leading_quotes(
|
"name": remove_trailing_and_leading_quotes(
|
||||||
data["tns:beteiligter"]["tns:auswahl_beteiligter"][
|
data["tns:beteiligter"]["tns:auswahl_beteiligter"][
|
||||||
"tns:natuerlichePerson"
|
"tns:natuerlichePerson"
|
||||||
]["tns:vollerName"]["tns:nachname"]
|
]["tns:vollerName"]["tns:nachname"]
|
||||||
@ -160,7 +160,6 @@ class V3_Transformer(BaseTransformer): # noqa: N801
|
|||||||
"tns:organisation"
|
"tns:organisation"
|
||||||
]
|
]
|
||||||
|
|
||||||
location = None
|
|
||||||
if "tns:anschrift" in base:
|
if "tns:anschrift" in base:
|
||||||
location = Location(
|
location = Location(
|
||||||
**{
|
**{
|
||||||
@ -197,7 +196,7 @@ class V3_Transformer(BaseTransformer): # noqa: N801
|
|||||||
"role": self.map_role_id_to_enum(
|
"role": self.map_role_id_to_enum(
|
||||||
data["tns:rolle"]["tns:rollenbezeichnung"]["code"]
|
data["tns:rolle"]["tns:rollenbezeichnung"]["code"]
|
||||||
),
|
),
|
||||||
"name": remove_traling_and_leading_quotes(
|
"name": remove_trailing_and_leading_quotes(
|
||||||
base["tns:bezeichnung"]["tns:bezeichnung.aktuell"]
|
base["tns:bezeichnung"]["tns:bezeichnung.aktuell"]
|
||||||
),
|
),
|
||||||
"location": location,
|
"location": location,
|
||||||
@ -273,7 +272,7 @@ class V3_Transformer(BaseTransformer): # noqa: N801
|
|||||||
"tns:bezeichnung.aktuell",
|
"tns:bezeichnung.aktuell",
|
||||||
]
|
]
|
||||||
name = traversal(data, path)
|
name = traversal(data, path)
|
||||||
return remove_traling_and_leading_quotes(name)
|
return remove_trailing_and_leading_quotes(name)
|
||||||
|
|
||||||
def map_rechtsform(self, company_name: str, data: dict) -> CompanyTypeEnum | None:
|
def map_rechtsform(self, company_name: str, data: dict) -> CompanyTypeEnum | None:
|
||||||
"""Extracts the company type from a given Unternehmensregister export.
|
"""Extracts the company type from a given Unternehmensregister export.
|
||||||
|
@ -35,7 +35,7 @@ def cli() -> None: # pragma: no cover
|
|||||||
"""A cli interface for the data transfer."""
|
"""A cli interface for the data transfer."""
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
prog="Process and transform data",
|
prog="Process and transform data",
|
||||||
description="Process the raw data from the MongoDB with AI models and match and transform the data from the MongoDB when transfering into the SQL DB.",
|
description="Process the raw data from the MongoDB with AI models and match and transform the data from the MongoDB when transferring into the SQL DB.",
|
||||||
epilog="Example: 'data-processing secrets.json' or 'data-processing ENV'",
|
epilog="Example: 'data-processing secrets.json' or 'data-processing ENV'",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
|
@ -99,7 +99,7 @@ class CompanyMongoService:
|
|||||||
return any(not re.match("^[0-9]{4}$", key) for key in data["yearly_results"])
|
return any(not re.match("^[0-9]{4}$", key) for key in data["yearly_results"])
|
||||||
|
|
||||||
def is_self_referencing_auditors(self, data: dict) -> bool:
|
def is_self_referencing_auditors(self, data: dict) -> bool:
|
||||||
"""Does the entry contain yearly_resutls which are self-referencing?
|
"""Does the entry contain yearly_results which are self-referencing?
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
data (dict): Entry from MongoDB
|
data (dict): Entry from MongoDB
|
||||||
|
@ -15,7 +15,7 @@ def create_2d_graph( # noqa PLR0913
|
|||||||
edge_annotation: bool,
|
edge_annotation: bool,
|
||||||
edge_thickness: int,
|
edge_thickness: int,
|
||||||
) -> go.Figure:
|
) -> go.Figure:
|
||||||
"""This Method creates a 2d Network in Plotly with a Scatter Graph and retuns it.
|
"""This Method creates a 2d Network in Plotly with a Scatter Graph and returns it.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
graph: NetworkX Graph.
|
graph: NetworkX Graph.
|
||||||
|
@ -15,7 +15,7 @@ def create_3d_graph( # noqa : PLR0913
|
|||||||
edge_annotation: bool,
|
edge_annotation: bool,
|
||||||
edge_thickness: int,
|
edge_thickness: int,
|
||||||
) -> go.Figure:
|
) -> go.Figure:
|
||||||
"""This Method creates a 3D Network in Plotly with a Scatter Graph and retuns it.
|
"""This Method creates a 3D Network in Plotly with a Scatter Graph and returns it.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
graph: NetworkX Graph.
|
graph: NetworkX Graph.
|
||||||
|
@ -4,7 +4,7 @@ import pandas as pd
|
|||||||
|
|
||||||
|
|
||||||
def initialize_network(edges: list, nodes: dict) -> tuple[nx.Graph, pd.DataFrame]:
|
def initialize_network(edges: list, nodes: dict) -> tuple[nx.Graph, pd.DataFrame]:
|
||||||
"""This Method creates a Network from the Framework NetworkX with the help of a Node and Edge List. Furthemore it creates a DataFrame with the most important Metrics.
|
"""This Method creates a Network from the Framework NetworkX with the help of a Node and Edge List. Furthermore it creates a DataFrame with the most important Metrics.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
edges (list): List with the connections between Nodes.
|
edges (list): List with the connections between Nodes.
|
||||||
@ -50,7 +50,7 @@ def initialize_network(edges: list, nodes: dict) -> tuple[nx.Graph, pd.DataFrame
|
|||||||
def initialize_network_with_reduced_metrics(
|
def initialize_network_with_reduced_metrics(
|
||||||
edges: list, nodes: dict
|
edges: list, nodes: dict
|
||||||
) -> tuple[nx.Graph, pd.DataFrame]:
|
) -> tuple[nx.Graph, pd.DataFrame]:
|
||||||
"""This Method creates a Network from the Framework NetworkX with the help of a Node and Edge List. Furthemore it creates a DataFrame with the most important Metrics.
|
"""This Method creates a Network from the Framework NetworkX with the help of a Node and Edge List. Furthermore it creates a DataFrame with the most important Metrics.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
edges: List with the connections between Nodes.
|
edges: List with the connections between Nodes.
|
||||||
@ -58,7 +58,7 @@ def initialize_network_with_reduced_metrics(
|
|||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Graph: Plotly Figure
|
Graph: Plotly Figure
|
||||||
Metrices: DataFrame with Metrics
|
Metrics: DataFrame with Metrics
|
||||||
"""
|
"""
|
||||||
# create edge dataframe
|
# create edge dataframe
|
||||||
df_edges = pd.DataFrame(edges, columns=["from", "to", "type"])
|
df_edges = pd.DataFrame(edges, columns=["from", "to", "type"])
|
||||||
|
@ -36,15 +36,8 @@ def transform_date_to_iso(date: str) -> str:
|
|||||||
return date_temp.strftime("%Y-%m-%d")
|
return date_temp.strftime("%Y-%m-%d")
|
||||||
|
|
||||||
|
|
||||||
def remove_traling_and_leading_quotes(value: str) -> str:
|
def remove_trailing_and_leading_quotes(value: str) -> str:
|
||||||
"""Removes trailing and leading double-quotes from given string if present.
|
"""Removes trailing and leading double-quotes from given string if present."""
|
||||||
|
|
||||||
Args:
|
|
||||||
value (str): _description_
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
str: _description_
|
|
||||||
"""
|
|
||||||
if value is not None:
|
if value is not None:
|
||||||
count_quotes = value.count('"')
|
count_quotes = value.count('"')
|
||||||
if count_quotes > 0:
|
if count_quotes > 0:
|
||||||
|
@ -12,9 +12,6 @@ from aki_prj23_transparenzregister.config.config_template import MongoConnection
|
|||||||
def mock_mongo_connection() -> MongoConnection:
|
def mock_mongo_connection() -> MongoConnection:
|
||||||
"""Mock MongoConnector class.
|
"""Mock MongoConnector class.
|
||||||
|
|
||||||
Args:
|
|
||||||
mocker (any): Library mocker
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Mock: Mocked MongoConnector
|
Mock: Mocked MongoConnector
|
||||||
"""
|
"""
|
||||||
|
@ -7,7 +7,7 @@ def test_ner_spacy() -> None:
|
|||||||
"""Mock TestNerService."""
|
"""Mock TestNerService."""
|
||||||
# Create instance of NerAnalysisService with use_spacy=True
|
# Create instance of NerAnalysisService with use_spacy=True
|
||||||
ner_service = NerAnalysisService(
|
ner_service = NerAnalysisService(
|
||||||
use_spacy=True, use_transformer=False, use_companylist=False
|
use_spacy=True, use_transformer=False, use_company_list=False
|
||||||
)
|
)
|
||||||
# 1st testing
|
# 1st testing
|
||||||
doc = {"title": "Siemens ist ein Unternehmen."}
|
doc = {"title": "Siemens ist ein Unternehmen."}
|
||||||
@ -24,7 +24,7 @@ def test_ner_company_list() -> None:
|
|||||||
"""Mock test_ner_company."""
|
"""Mock test_ner_company."""
|
||||||
# Create instance of NerAnalysisService with use_companylist=True
|
# Create instance of NerAnalysisService with use_companylist=True
|
||||||
ner_service = NerAnalysisService(
|
ner_service = NerAnalysisService(
|
||||||
use_spacy=False, use_transformer=False, use_companylist=True
|
use_spacy=False, use_transformer=False, use_company_list=True
|
||||||
)
|
)
|
||||||
|
|
||||||
doc = {"title": "Siemens ist ein Unternehmen."}
|
doc = {"title": "Siemens ist ein Unternehmen."}
|
||||||
@ -41,7 +41,7 @@ def test_ner_transformer() -> None:
|
|||||||
"""Mock test_ner_company."""
|
"""Mock test_ner_company."""
|
||||||
# Create instance of NerAnalysisService with use_use_companylist=True
|
# Create instance of NerAnalysisService with use_use_companylist=True
|
||||||
ner_service = NerAnalysisService(
|
ner_service = NerAnalysisService(
|
||||||
use_spacy=False, use_transformer=True, use_companylist=False
|
use_spacy=False, use_transformer=True, use_company_list=False
|
||||||
)
|
)
|
||||||
|
|
||||||
doc = {"title": "Siemens ist ein Unternehmen."}
|
doc = {"title": "Siemens ist ein Unternehmen."}
|
||||||
|
@ -14,9 +14,6 @@ from aki_prj23_transparenzregister.config.config_template import MongoConnection
|
|||||||
def mock_mongo_connection() -> MongoConnection:
|
def mock_mongo_connection() -> MongoConnection:
|
||||||
"""Mock MongoConnector class.
|
"""Mock MongoConnector class.
|
||||||
|
|
||||||
Args:
|
|
||||||
mocker (any): Library mocker
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Mock: Mocked MongoConnector
|
Mock: Mocked MongoConnector
|
||||||
"""
|
"""
|
||||||
|
@ -20,6 +20,10 @@ def test_work(
|
|||||||
company_mongo_service_mock: Mock,
|
company_mongo_service_mock: Mock,
|
||||||
mongo_connector_mock: Mock,
|
mongo_connector_mock: Mock,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
_ = connector_mock
|
||||||
|
_ = mongo_connector_mock
|
||||||
|
_ = company_mongo_service_mock
|
||||||
|
|
||||||
config_provider_mock = Mock()
|
config_provider_mock = Mock()
|
||||||
config_provider_mock.session.return_value = Mock()
|
config_provider_mock.session.return_value = Mock()
|
||||||
|
|
||||||
|
@ -91,7 +91,7 @@ def test_by_id_result(mock_mongo_connector: Mock, mock_collection: Mock) -> None
|
|||||||
"""
|
"""
|
||||||
mock_mongo_connector.database = {"companies": mock_collection}
|
mock_mongo_connector.database = {"companies": mock_collection}
|
||||||
service = CompanyMongoService(mock_mongo_connector)
|
service = CompanyMongoService(mock_mongo_connector)
|
||||||
mock_entry = {"id": "Does exist", "vaue": 42}
|
mock_entry = {"id": "Does exist", "value": 42}
|
||||||
mock_collection.find.return_value = [mock_entry]
|
mock_collection.find.return_value = [mock_entry]
|
||||||
id = CompanyID(DistrictCourt("a", "b"), "c").to_dict()
|
id = CompanyID(DistrictCourt("a", "b"), "c").to_dict()
|
||||||
assert service.get_by_id(id) == mock_entry
|
assert service.get_by_id(id) == mock_entry
|
||||||
@ -154,7 +154,7 @@ def test_get_where_financial_results(
|
|||||||
assert service.get_where_yearly_results() == mock_result
|
assert service.get_where_yearly_results() == mock_result
|
||||||
|
|
||||||
|
|
||||||
def test_add_yearly_reslults(mock_mongo_connector: Mock, mock_collection: Mock) -> None:
|
def test_add_yearly_results(mock_mongo_connector: Mock, mock_collection: Mock) -> None:
|
||||||
mock_mongo_connector.database = {"companies": mock_collection}
|
mock_mongo_connector.database = {"companies": mock_collection}
|
||||||
service = CompanyMongoService(mock_mongo_connector)
|
service = CompanyMongoService(mock_mongo_connector)
|
||||||
mock_result: list = [{"_id": "abc", "brille?": "Fielmann", "Hotel?": "Trivago"}]
|
mock_result: list = [{"_id": "abc", "brille?": "Fielmann", "Hotel?": "Trivago"}]
|
||||||
|
@ -81,5 +81,5 @@ def test_initialize_network() -> None:
|
|||||||
"id",
|
"id",
|
||||||
]
|
]
|
||||||
|
|
||||||
graph = initialize_network_without_metrics(edges=edges, nodes=nodes)
|
initialize_network_without_metrics(edges=edges, nodes=nodes)
|
||||||
assert isinstance(graph_reduced, nx.Graph)
|
assert isinstance(graph_reduced, nx.Graph)
|
||||||
|
@ -57,5 +57,5 @@ def test_transform_date_to_iso(value: str, expected: str) -> None:
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_remove_trailing_and_leading_quotes(value: str, expected_result: str) -> None:
|
def test_remove_trailing_and_leading_quotes(value: str, expected_result: str) -> None:
|
||||||
result = string_tools.remove_traling_and_leading_quotes(value)
|
result = string_tools.remove_trailing_and_leading_quotes(value)
|
||||||
assert result == expected_result
|
assert result == expected_result
|
||||||
|
Reference in New Issue
Block a user