mirror of
https://github.com/fhswf/aki_prj23_transparenzregister.git
synced 2025-06-21 23:33:54 +02:00
A lot of spelling (#512)
This commit is contained in:
@ -48,21 +48,21 @@ class EntityPipeline:
|
||||
# spaCy
|
||||
if ner_method == "spacy":
|
||||
ner_service_instance = ner_service.NerAnalysisService(
|
||||
use_spacy=True, use_transformer=False, use_companylist=False
|
||||
use_spacy=True, use_transformer=False, use_company_list=False
|
||||
)
|
||||
ner_service_func = ner_service_instance.ner_spacy
|
||||
|
||||
# company list
|
||||
elif ner_method == "company_list":
|
||||
ner_service_instance = ner_service.NerAnalysisService(
|
||||
use_spacy=False, use_transformer=False, use_companylist=True
|
||||
use_spacy=False, use_transformer=False, use_company_list=True
|
||||
)
|
||||
ner_service_func = ner_service_instance.ner_company_list
|
||||
|
||||
# transformer
|
||||
elif ner_method == "transformer":
|
||||
ner_service_instance = ner_service.NerAnalysisService(
|
||||
use_spacy=False, use_transformer=True, use_companylist=False
|
||||
use_spacy=False, use_transformer=True, use_company_list=False
|
||||
)
|
||||
ner_service_func = ner_service_instance.ner_transformer
|
||||
else:
|
||||
|
@ -15,14 +15,14 @@ class NerAnalysisService:
|
||||
self,
|
||||
use_spacy: bool = False,
|
||||
use_transformer: bool = False,
|
||||
use_companylist: bool = False,
|
||||
use_company_list: bool = False,
|
||||
) -> None:
|
||||
"""Method to check which sentiment model is chosen."""
|
||||
if use_spacy:
|
||||
self.init_spacy()
|
||||
if use_transformer:
|
||||
self.init_transformer()
|
||||
if use_companylist:
|
||||
if use_company_list:
|
||||
self.init_companylist()
|
||||
|
||||
def init_spacy(self) -> None:
|
||||
|
@ -79,14 +79,13 @@ class SentimentAnalysisService:
|
||||
|
||||
Args:
|
||||
doc: a document which is processed with spacy
|
||||
docAttrib: which attribute of the document has to be processed: text or title
|
||||
|
||||
Returns:
|
||||
label: positive, negative, neutral.
|
||||
"""
|
||||
# set limits for sentiments
|
||||
_upperlimit = 0.1
|
||||
_lowerlimit = -0.1
|
||||
_upper_limit = 0.1
|
||||
_lower_limit = -0.1
|
||||
|
||||
_doc = self.nlp(doc)
|
||||
_score = None
|
||||
@ -108,9 +107,9 @@ class SentimentAnalysisService:
|
||||
# Normalize the score to the range 0..1
|
||||
_normalized_score = (_pos - abs(_neg)) / _max_score if _max_score > 0 else 0
|
||||
|
||||
if _normalized_score > _upperlimit:
|
||||
if _normalized_score > _upper_limit:
|
||||
_sent = "positive"
|
||||
elif _normalized_score < _lowerlimit:
|
||||
elif _normalized_score < _lower_limit:
|
||||
_sent = "negative"
|
||||
else:
|
||||
_sent = "neutral"
|
||||
|
@ -26,7 +26,7 @@ from aki_prj23_transparenzregister.utils.mongo.connector import MongoConnector
|
||||
def cli() -> None: # pragma: no cover
|
||||
"""CLI entry point."""
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="Transparenzregister enriching companies with mising financial data",
|
||||
prog="Transparenzregister enriching companies with missing financial data",
|
||||
description="Filters all raw companies with missing financial info from the MongoDB and enriches them with yearly result data from the Bundesanzeiger.",
|
||||
epilog="Example: enrich-company-financials --log-level ERROR --log-path print.log",
|
||||
)
|
||||
|
@ -77,15 +77,15 @@ def main(config_provider: ConfigProvider) -> int:
|
||||
logger.error("Error while fetching news from Handelsblatt")
|
||||
news_handelsblatt = []
|
||||
|
||||
news_tageschau = tagesschau.get_news_for_category()
|
||||
if news_tageschau is None:
|
||||
news_tagesschau = tagesschau.get_news_for_category()
|
||||
if news_tagesschau is None:
|
||||
logger.error("Error while fetching news from Tagesschau")
|
||||
news_tageschau = []
|
||||
news_tagesschau = []
|
||||
|
||||
logger.info(f"Found {len(news_handelsblatt)} news articles from Handelsblatt")
|
||||
logger.info(f"Found {len(news_tageschau)} news articles from Tagesschau")
|
||||
logger.info(f"Found {len(news_tagesschau)} news articles from Tagesschau")
|
||||
|
||||
news_joined = news_handelsblatt + news_tageschau
|
||||
news_joined = news_handelsblatt + news_tagesschau
|
||||
|
||||
count_new_documents = 0
|
||||
count_duplicate_documents = 0
|
||||
|
@ -25,7 +25,7 @@ from aki_prj23_transparenzregister.utils.logger_config import (
|
||||
|
||||
|
||||
def load_schedule(schedule_file: str) -> dict:
|
||||
"""Load scheudle data from file.
|
||||
"""Load schedule data from file.
|
||||
|
||||
Returns:
|
||||
dict: Schedule data
|
||||
@ -52,7 +52,7 @@ def cli() -> None: # pragma: no cover
|
||||
"""CLI entry point."""
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="Transparenzregister Company ingestion",
|
||||
description="Ingests all missing companies and enriches them with finandcial data - runs on scheulde.",
|
||||
description="Ingests all missing companies and enriches them with financial data - runs on schedule.",
|
||||
epilog="Example: ingest --log-level ERROR --log-path print.log",
|
||||
)
|
||||
parser.add_argument(
|
||||
@ -73,7 +73,7 @@ def cli() -> None: # pragma: no cover
|
||||
|
||||
# Schedule tasks or resume scheduling based on last execution times
|
||||
every(6).hours.do(fetch_news.main, config_provider).tag("fetch_news")
|
||||
every(3).hours.do(main, config_provider).tag("missing_compnies_and_financials")
|
||||
every(3).hours.do(main, config_provider).tag("missing_companies_and_financials")
|
||||
|
||||
# Run the scheduler in a persistent loops
|
||||
while True:
|
||||
|
@ -20,7 +20,7 @@ from aki_prj23_transparenzregister.config.config_template import (
|
||||
HELP_TEXT_CONFIG: Final[str] = (
|
||||
"Database configuration. "
|
||||
"Either give the paths to a *.json containing the secrets. "
|
||||
"Alternativly specify the use of enviromental vairables by entering the ENV or the einviromental prefix ending with a '_'."
|
||||
"Alternatively specify the use of environmental vairables by entering the ENV or the environmental prefix ending with a '_'."
|
||||
)
|
||||
|
||||
|
||||
|
@ -20,5 +20,5 @@ def add_auth(app: Dash) -> None:
|
||||
return
|
||||
logger.info("The password protection is not or only partially configured!")
|
||||
logger.debug(
|
||||
"The enviromental variables PYTHON_DASH_LOGIN_USERNAME and PYTHON_DASH_LOGIN_PW should be used to activate this feature."
|
||||
"The environmental variables PYTHON_DASH_LOGIN_USERNAME and PYTHON_DASH_LOGIN_PW should be used to activate this feature."
|
||||
)
|
||||
|
@ -25,7 +25,7 @@ class BaseNewsExtractor(metaclass=abc.ABCMeta):
|
||||
category (str): News category to retrieve.
|
||||
|
||||
Returns:
|
||||
list[News] | None: List of news or None if an error occured.
|
||||
list[News] | None: List of news or None if an error occurred.
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
|
@ -1,4 +1,4 @@
|
||||
"""Tageschau API news extractor."""
|
||||
"""Tagesschau API news extractor."""
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from loguru import logger
|
||||
@ -10,7 +10,7 @@ from aki_prj23_transparenzregister.utils.data_extraction.news.base import (
|
||||
|
||||
|
||||
class TagesschauAPI(BaseNewsExtractor):
|
||||
"""Tageschau API news extractor."""
|
||||
"""Tagesschau API news extractor."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
"""Constructor."""
|
||||
@ -18,7 +18,7 @@ class TagesschauAPI(BaseNewsExtractor):
|
||||
|
||||
@logger.catch(reraise=True)
|
||||
def get_news_for_category(self, category: str = "wirtschaft") -> list[News] | None:
|
||||
"""Retrieve news for the given category from the Tageschau API.
|
||||
"""Retrieve news for the given category from the Tagesschau API.
|
||||
|
||||
Args:
|
||||
category (str, optional): Category to search for. Defaults to "wirtschaft".
|
||||
|
@ -134,7 +134,7 @@ class BaseTransformer(metaclass=abc.ABCMeta):
|
||||
|
||||
@abc.abstractmethod
|
||||
def parse_date_of_birth(self, data: dict) -> str | None:
|
||||
"""Retreives the date of birth from a stakeholder entry if possible.
|
||||
"""Retrieves the date of birth from a stakeholder entry if possible.
|
||||
|
||||
Args:
|
||||
data (dict): Stakeholder data
|
||||
|
@ -24,7 +24,7 @@ from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.tr
|
||||
normalize_street,
|
||||
)
|
||||
from aki_prj23_transparenzregister.utils.string_tools import (
|
||||
remove_traling_and_leading_quotes,
|
||||
remove_trailing_and_leading_quotes,
|
||||
transform_date_to_iso,
|
||||
)
|
||||
|
||||
@ -33,7 +33,7 @@ class V1_Transformer(BaseTransformer): # noqa: N801
|
||||
"""Transformer for data exports from Unternehmensregister (v1)."""
|
||||
|
||||
def parse_date_of_birth(self, data: dict) -> str | None:
|
||||
"""Retreives the date of birth from a stakeholder entry if possible.
|
||||
"""Retrieves the date of birth from a stakeholder entry if possible.
|
||||
|
||||
Args:
|
||||
data (dict): Stakeholder data
|
||||
@ -64,7 +64,7 @@ class V1_Transformer(BaseTransformer): # noqa: N801
|
||||
):
|
||||
return CompanyToCompanyRelationship(
|
||||
**{ # type: ignore
|
||||
"name": remove_traling_and_leading_quotes(
|
||||
"name": remove_trailing_and_leading_quotes(
|
||||
data["Beteiligter"]["Natuerliche_Person"]["Voller_Name"][
|
||||
"Nachname"
|
||||
]
|
||||
@ -130,7 +130,7 @@ class V1_Transformer(BaseTransformer): # noqa: N801
|
||||
"role": RelationshipRoleEnum(
|
||||
data["Rolle"]["Rollenbezeichnung"]["content"]
|
||||
),
|
||||
"name": remove_traling_and_leading_quotes(
|
||||
"name": remove_trailing_and_leading_quotes(
|
||||
data["Beteiligter"]["Organisation"]["Bezeichnung"][
|
||||
"Bezeichnung_Aktuell"
|
||||
]
|
||||
@ -213,7 +213,7 @@ class V1_Transformer(BaseTransformer): # noqa: N801
|
||||
name = data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"]["Beteiligung"][0][
|
||||
"Beteiligter"
|
||||
]["Organisation"]["Bezeichnung"]["Bezeichnung_Aktuell"]
|
||||
return remove_traling_and_leading_quotes(name)
|
||||
return remove_trailing_and_leading_quotes(name)
|
||||
|
||||
def map_rechtsform(self, company_name: str, data: dict) -> CompanyTypeEnum | None:
|
||||
"""Extracts the company type from a given Unternehmensregister export.
|
||||
|
@ -28,7 +28,7 @@ from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.tr
|
||||
RoleMapper,
|
||||
)
|
||||
from aki_prj23_transparenzregister.utils.string_tools import (
|
||||
remove_traling_and_leading_quotes,
|
||||
remove_trailing_and_leading_quotes,
|
||||
transform_date_to_iso,
|
||||
)
|
||||
|
||||
@ -37,7 +37,7 @@ class V3_Transformer(BaseTransformer): # noqa: N801
|
||||
"""Transformer for data exports from Unternehmensregister (v3)."""
|
||||
|
||||
def parse_date_of_birth(self, data: dict) -> str | None:
|
||||
"""Retreives the date of birth from a stakeholder entry if possible.
|
||||
"""Retrieves the date of birth from a stakeholder entry if possible.
|
||||
|
||||
Args:
|
||||
data (dict): Stakeholder data
|
||||
@ -89,7 +89,7 @@ class V3_Transformer(BaseTransformer): # noqa: N801
|
||||
):
|
||||
return CompanyToCompanyRelationship(
|
||||
**{ # type: ignore
|
||||
"name": remove_traling_and_leading_quotes(
|
||||
"name": remove_trailing_and_leading_quotes(
|
||||
data["tns:beteiligter"]["tns:auswahl_beteiligter"][
|
||||
"tns:natuerlichePerson"
|
||||
]["tns:vollerName"]["tns:nachname"]
|
||||
@ -160,7 +160,6 @@ class V3_Transformer(BaseTransformer): # noqa: N801
|
||||
"tns:organisation"
|
||||
]
|
||||
|
||||
location = None
|
||||
if "tns:anschrift" in base:
|
||||
location = Location(
|
||||
**{
|
||||
@ -197,7 +196,7 @@ class V3_Transformer(BaseTransformer): # noqa: N801
|
||||
"role": self.map_role_id_to_enum(
|
||||
data["tns:rolle"]["tns:rollenbezeichnung"]["code"]
|
||||
),
|
||||
"name": remove_traling_and_leading_quotes(
|
||||
"name": remove_trailing_and_leading_quotes(
|
||||
base["tns:bezeichnung"]["tns:bezeichnung.aktuell"]
|
||||
),
|
||||
"location": location,
|
||||
@ -273,7 +272,7 @@ class V3_Transformer(BaseTransformer): # noqa: N801
|
||||
"tns:bezeichnung.aktuell",
|
||||
]
|
||||
name = traversal(data, path)
|
||||
return remove_traling_and_leading_quotes(name)
|
||||
return remove_trailing_and_leading_quotes(name)
|
||||
|
||||
def map_rechtsform(self, company_name: str, data: dict) -> CompanyTypeEnum | None:
|
||||
"""Extracts the company type from a given Unternehmensregister export.
|
||||
|
@ -35,7 +35,7 @@ def cli() -> None: # pragma: no cover
|
||||
"""A cli interface for the data transfer."""
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="Process and transform data",
|
||||
description="Process the raw data from the MongoDB with AI models and match and transform the data from the MongoDB when transfering into the SQL DB.",
|
||||
description="Process the raw data from the MongoDB with AI models and match and transform the data from the MongoDB when transferring into the SQL DB.",
|
||||
epilog="Example: 'data-processing secrets.json' or 'data-processing ENV'",
|
||||
)
|
||||
parser.add_argument(
|
||||
|
@ -99,7 +99,7 @@ class CompanyMongoService:
|
||||
return any(not re.match("^[0-9]{4}$", key) for key in data["yearly_results"])
|
||||
|
||||
def is_self_referencing_auditors(self, data: dict) -> bool:
|
||||
"""Does the entry contain yearly_resutls which are self-referencing?
|
||||
"""Does the entry contain yearly_results which are self-referencing?
|
||||
|
||||
Args:
|
||||
data (dict): Entry from MongoDB
|
||||
|
@ -15,7 +15,7 @@ def create_2d_graph( # noqa PLR0913
|
||||
edge_annotation: bool,
|
||||
edge_thickness: int,
|
||||
) -> go.Figure:
|
||||
"""This Method creates a 2d Network in Plotly with a Scatter Graph and retuns it.
|
||||
"""This Method creates a 2d Network in Plotly with a Scatter Graph and returns it.
|
||||
|
||||
Args:
|
||||
graph: NetworkX Graph.
|
||||
|
@ -15,7 +15,7 @@ def create_3d_graph( # noqa : PLR0913
|
||||
edge_annotation: bool,
|
||||
edge_thickness: int,
|
||||
) -> go.Figure:
|
||||
"""This Method creates a 3D Network in Plotly with a Scatter Graph and retuns it.
|
||||
"""This Method creates a 3D Network in Plotly with a Scatter Graph and returns it.
|
||||
|
||||
Args:
|
||||
graph: NetworkX Graph.
|
||||
|
@ -4,7 +4,7 @@ import pandas as pd
|
||||
|
||||
|
||||
def initialize_network(edges: list, nodes: dict) -> tuple[nx.Graph, pd.DataFrame]:
|
||||
"""This Method creates a Network from the Framework NetworkX with the help of a Node and Edge List. Furthemore it creates a DataFrame with the most important Metrics.
|
||||
"""This Method creates a Network from the Framework NetworkX with the help of a Node and Edge List. Furthermore it creates a DataFrame with the most important Metrics.
|
||||
|
||||
Args:
|
||||
edges (list): List with the connections between Nodes.
|
||||
@ -50,7 +50,7 @@ def initialize_network(edges: list, nodes: dict) -> tuple[nx.Graph, pd.DataFrame
|
||||
def initialize_network_with_reduced_metrics(
|
||||
edges: list, nodes: dict
|
||||
) -> tuple[nx.Graph, pd.DataFrame]:
|
||||
"""This Method creates a Network from the Framework NetworkX with the help of a Node and Edge List. Furthemore it creates a DataFrame with the most important Metrics.
|
||||
"""This Method creates a Network from the Framework NetworkX with the help of a Node and Edge List. Furthermore it creates a DataFrame with the most important Metrics.
|
||||
|
||||
Args:
|
||||
edges: List with the connections between Nodes.
|
||||
@ -58,7 +58,7 @@ def initialize_network_with_reduced_metrics(
|
||||
|
||||
Returns:
|
||||
Graph: Plotly Figure
|
||||
Metrices: DataFrame with Metrics
|
||||
Metrics: DataFrame with Metrics
|
||||
"""
|
||||
# create edge dataframe
|
||||
df_edges = pd.DataFrame(edges, columns=["from", "to", "type"])
|
||||
|
@ -36,15 +36,8 @@ def transform_date_to_iso(date: str) -> str:
|
||||
return date_temp.strftime("%Y-%m-%d")
|
||||
|
||||
|
||||
def remove_traling_and_leading_quotes(value: str) -> str:
|
||||
"""Removes trailing and leading double-quotes from given string if present.
|
||||
|
||||
Args:
|
||||
value (str): _description_
|
||||
|
||||
Returns:
|
||||
str: _description_
|
||||
"""
|
||||
def remove_trailing_and_leading_quotes(value: str) -> str:
|
||||
"""Removes trailing and leading double-quotes from given string if present."""
|
||||
if value is not None:
|
||||
count_quotes = value.count('"')
|
||||
if count_quotes > 0:
|
||||
|
@ -12,9 +12,6 @@ from aki_prj23_transparenzregister.config.config_template import MongoConnection
|
||||
def mock_mongo_connection() -> MongoConnection:
|
||||
"""Mock MongoConnector class.
|
||||
|
||||
Args:
|
||||
mocker (any): Library mocker
|
||||
|
||||
Returns:
|
||||
Mock: Mocked MongoConnector
|
||||
"""
|
||||
|
@ -7,7 +7,7 @@ def test_ner_spacy() -> None:
|
||||
"""Mock TestNerService."""
|
||||
# Create instance of NerAnalysisService with use_spacy=True
|
||||
ner_service = NerAnalysisService(
|
||||
use_spacy=True, use_transformer=False, use_companylist=False
|
||||
use_spacy=True, use_transformer=False, use_company_list=False
|
||||
)
|
||||
# 1st testing
|
||||
doc = {"title": "Siemens ist ein Unternehmen."}
|
||||
@ -24,7 +24,7 @@ def test_ner_company_list() -> None:
|
||||
"""Mock test_ner_company."""
|
||||
# Create instance of NerAnalysisService with use_companylist=True
|
||||
ner_service = NerAnalysisService(
|
||||
use_spacy=False, use_transformer=False, use_companylist=True
|
||||
use_spacy=False, use_transformer=False, use_company_list=True
|
||||
)
|
||||
|
||||
doc = {"title": "Siemens ist ein Unternehmen."}
|
||||
@ -41,7 +41,7 @@ def test_ner_transformer() -> None:
|
||||
"""Mock test_ner_company."""
|
||||
# Create instance of NerAnalysisService with use_use_companylist=True
|
||||
ner_service = NerAnalysisService(
|
||||
use_spacy=False, use_transformer=True, use_companylist=False
|
||||
use_spacy=False, use_transformer=True, use_company_list=False
|
||||
)
|
||||
|
||||
doc = {"title": "Siemens ist ein Unternehmen."}
|
||||
|
@ -14,9 +14,6 @@ from aki_prj23_transparenzregister.config.config_template import MongoConnection
|
||||
def mock_mongo_connection() -> MongoConnection:
|
||||
"""Mock MongoConnector class.
|
||||
|
||||
Args:
|
||||
mocker (any): Library mocker
|
||||
|
||||
Returns:
|
||||
Mock: Mocked MongoConnector
|
||||
"""
|
||||
|
@ -20,6 +20,10 @@ def test_work(
|
||||
company_mongo_service_mock: Mock,
|
||||
mongo_connector_mock: Mock,
|
||||
) -> None:
|
||||
_ = connector_mock
|
||||
_ = mongo_connector_mock
|
||||
_ = company_mongo_service_mock
|
||||
|
||||
config_provider_mock = Mock()
|
||||
config_provider_mock.session.return_value = Mock()
|
||||
|
||||
|
@ -91,7 +91,7 @@ def test_by_id_result(mock_mongo_connector: Mock, mock_collection: Mock) -> None
|
||||
"""
|
||||
mock_mongo_connector.database = {"companies": mock_collection}
|
||||
service = CompanyMongoService(mock_mongo_connector)
|
||||
mock_entry = {"id": "Does exist", "vaue": 42}
|
||||
mock_entry = {"id": "Does exist", "value": 42}
|
||||
mock_collection.find.return_value = [mock_entry]
|
||||
id = CompanyID(DistrictCourt("a", "b"), "c").to_dict()
|
||||
assert service.get_by_id(id) == mock_entry
|
||||
@ -154,7 +154,7 @@ def test_get_where_financial_results(
|
||||
assert service.get_where_yearly_results() == mock_result
|
||||
|
||||
|
||||
def test_add_yearly_reslults(mock_mongo_connector: Mock, mock_collection: Mock) -> None:
|
||||
def test_add_yearly_results(mock_mongo_connector: Mock, mock_collection: Mock) -> None:
|
||||
mock_mongo_connector.database = {"companies": mock_collection}
|
||||
service = CompanyMongoService(mock_mongo_connector)
|
||||
mock_result: list = [{"_id": "abc", "brille?": "Fielmann", "Hotel?": "Trivago"}]
|
||||
|
@ -81,5 +81,5 @@ def test_initialize_network() -> None:
|
||||
"id",
|
||||
]
|
||||
|
||||
graph = initialize_network_without_metrics(edges=edges, nodes=nodes)
|
||||
initialize_network_without_metrics(edges=edges, nodes=nodes)
|
||||
assert isinstance(graph_reduced, nx.Graph)
|
||||
|
@ -57,5 +57,5 @@ def test_transform_date_to_iso(value: str, expected: str) -> None:
|
||||
],
|
||||
)
|
||||
def test_remove_trailing_and_leading_quotes(value: str, expected_result: str) -> None:
|
||||
result = string_tools.remove_traling_and_leading_quotes(value)
|
||||
result = string_tools.remove_trailing_and_leading_quotes(value)
|
||||
assert result == expected_result
|
||||
|
Reference in New Issue
Block a user