A lot of spelling (#512)

This commit is contained in:
2024-01-04 18:01:59 +01:00
committed by GitHub
parent a0ba5d5027
commit 9ea3771f18
26 changed files with 57 additions and 68 deletions

View File

@ -48,21 +48,21 @@ class EntityPipeline:
# spaCy
if ner_method == "spacy":
ner_service_instance = ner_service.NerAnalysisService(
use_spacy=True, use_transformer=False, use_companylist=False
use_spacy=True, use_transformer=False, use_company_list=False
)
ner_service_func = ner_service_instance.ner_spacy
# company list
elif ner_method == "company_list":
ner_service_instance = ner_service.NerAnalysisService(
use_spacy=False, use_transformer=False, use_companylist=True
use_spacy=False, use_transformer=False, use_company_list=True
)
ner_service_func = ner_service_instance.ner_company_list
# transformer
elif ner_method == "transformer":
ner_service_instance = ner_service.NerAnalysisService(
use_spacy=False, use_transformer=True, use_companylist=False
use_spacy=False, use_transformer=True, use_company_list=False
)
ner_service_func = ner_service_instance.ner_transformer
else:

View File

@ -15,14 +15,14 @@ class NerAnalysisService:
self,
use_spacy: bool = False,
use_transformer: bool = False,
use_companylist: bool = False,
use_company_list: bool = False,
) -> None:
"""Method to check which sentiment model is chosen."""
if use_spacy:
self.init_spacy()
if use_transformer:
self.init_transformer()
if use_companylist:
if use_company_list:
self.init_companylist()
def init_spacy(self) -> None:

View File

@ -79,14 +79,13 @@ class SentimentAnalysisService:
Args:
doc: a document which is processed with spacy
docAttrib: which attribute of the document has to be processed: text or title
Returns:
label: positive, negative, neutral.
"""
# set limits for sentiments
_upperlimit = 0.1
_lowerlimit = -0.1
_upper_limit = 0.1
_lower_limit = -0.1
_doc = self.nlp(doc)
_score = None
@ -108,9 +107,9 @@ class SentimentAnalysisService:
# Normalize the score to the range 0..1
_normalized_score = (_pos - abs(_neg)) / _max_score if _max_score > 0 else 0
if _normalized_score > _upperlimit:
if _normalized_score > _upper_limit:
_sent = "positive"
elif _normalized_score < _lowerlimit:
elif _normalized_score < _lower_limit:
_sent = "negative"
else:
_sent = "neutral"

View File

@ -26,7 +26,7 @@ from aki_prj23_transparenzregister.utils.mongo.connector import MongoConnector
def cli() -> None: # pragma: no cover
"""CLI entry point."""
parser = argparse.ArgumentParser(
prog="Transparenzregister enriching companies with mising financial data",
prog="Transparenzregister enriching companies with missing financial data",
description="Filters all raw companies with missing financial info from the MongoDB and enriches them with yearly result data from the Bundesanzeiger.",
epilog="Example: enrich-company-financials --log-level ERROR --log-path print.log",
)

View File

@ -77,15 +77,15 @@ def main(config_provider: ConfigProvider) -> int:
logger.error("Error while fetching news from Handelsblatt")
news_handelsblatt = []
news_tageschau = tagesschau.get_news_for_category()
if news_tageschau is None:
news_tagesschau = tagesschau.get_news_for_category()
if news_tagesschau is None:
logger.error("Error while fetching news from Tagesschau")
news_tageschau = []
news_tagesschau = []
logger.info(f"Found {len(news_handelsblatt)} news articles from Handelsblatt")
logger.info(f"Found {len(news_tageschau)} news articles from Tagesschau")
logger.info(f"Found {len(news_tagesschau)} news articles from Tagesschau")
news_joined = news_handelsblatt + news_tageschau
news_joined = news_handelsblatt + news_tagesschau
count_new_documents = 0
count_duplicate_documents = 0

View File

@ -25,7 +25,7 @@ from aki_prj23_transparenzregister.utils.logger_config import (
def load_schedule(schedule_file: str) -> dict:
"""Load scheudle data from file.
"""Load schedule data from file.
Returns:
dict: Schedule data
@ -52,7 +52,7 @@ def cli() -> None: # pragma: no cover
"""CLI entry point."""
parser = argparse.ArgumentParser(
prog="Transparenzregister Company ingestion",
description="Ingests all missing companies and enriches them with finandcial data - runs on scheulde.",
description="Ingests all missing companies and enriches them with financial data - runs on schedule.",
epilog="Example: ingest --log-level ERROR --log-path print.log",
)
parser.add_argument(
@ -73,7 +73,7 @@ def cli() -> None: # pragma: no cover
# Schedule tasks or resume scheduling based on last execution times
every(6).hours.do(fetch_news.main, config_provider).tag("fetch_news")
every(3).hours.do(main, config_provider).tag("missing_compnies_and_financials")
every(3).hours.do(main, config_provider).tag("missing_companies_and_financials")
# Run the scheduler in a persistent loops
while True:

View File

@ -20,7 +20,7 @@ from aki_prj23_transparenzregister.config.config_template import (
HELP_TEXT_CONFIG: Final[str] = (
"Database configuration. "
"Either give the paths to a *.json containing the secrets. "
"Alternativly specify the use of enviromental vairables by entering the ENV or the einviromental prefix ending with a '_'."
"Alternatively specify the use of environmental vairables by entering the ENV or the environmental prefix ending with a '_'."
)

View File

@ -20,5 +20,5 @@ def add_auth(app: Dash) -> None:
return
logger.info("The password protection is not or only partially configured!")
logger.debug(
"The enviromental variables PYTHON_DASH_LOGIN_USERNAME and PYTHON_DASH_LOGIN_PW should be used to activate this feature."
"The environmental variables PYTHON_DASH_LOGIN_USERNAME and PYTHON_DASH_LOGIN_PW should be used to activate this feature."
)

View File

@ -25,7 +25,7 @@ class BaseNewsExtractor(metaclass=abc.ABCMeta):
category (str): News category to retrieve.
Returns:
list[News] | None: List of news or None if an error occured.
list[News] | None: List of news or None if an error occurred.
"""
@abc.abstractmethod

View File

@ -1,4 +1,4 @@
"""Tageschau API news extractor."""
"""Tagesschau API news extractor."""
import requests
from bs4 import BeautifulSoup
from loguru import logger
@ -10,7 +10,7 @@ from aki_prj23_transparenzregister.utils.data_extraction.news.base import (
class TagesschauAPI(BaseNewsExtractor):
"""Tageschau API news extractor."""
"""Tagesschau API news extractor."""
def __init__(self) -> None:
"""Constructor."""
@ -18,7 +18,7 @@ class TagesschauAPI(BaseNewsExtractor):
@logger.catch(reraise=True)
def get_news_for_category(self, category: str = "wirtschaft") -> list[News] | None:
"""Retrieve news for the given category from the Tageschau API.
"""Retrieve news for the given category from the Tagesschau API.
Args:
category (str, optional): Category to search for. Defaults to "wirtschaft".

View File

@ -134,7 +134,7 @@ class BaseTransformer(metaclass=abc.ABCMeta):
@abc.abstractmethod
def parse_date_of_birth(self, data: dict) -> str | None:
"""Retreives the date of birth from a stakeholder entry if possible.
"""Retrieves the date of birth from a stakeholder entry if possible.
Args:
data (dict): Stakeholder data

View File

@ -24,7 +24,7 @@ from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.tr
normalize_street,
)
from aki_prj23_transparenzregister.utils.string_tools import (
remove_traling_and_leading_quotes,
remove_trailing_and_leading_quotes,
transform_date_to_iso,
)
@ -33,7 +33,7 @@ class V1_Transformer(BaseTransformer): # noqa: N801
"""Transformer for data exports from Unternehmensregister (v1)."""
def parse_date_of_birth(self, data: dict) -> str | None:
"""Retreives the date of birth from a stakeholder entry if possible.
"""Retrieves the date of birth from a stakeholder entry if possible.
Args:
data (dict): Stakeholder data
@ -64,7 +64,7 @@ class V1_Transformer(BaseTransformer): # noqa: N801
):
return CompanyToCompanyRelationship(
**{ # type: ignore
"name": remove_traling_and_leading_quotes(
"name": remove_trailing_and_leading_quotes(
data["Beteiligter"]["Natuerliche_Person"]["Voller_Name"][
"Nachname"
]
@ -130,7 +130,7 @@ class V1_Transformer(BaseTransformer): # noqa: N801
"role": RelationshipRoleEnum(
data["Rolle"]["Rollenbezeichnung"]["content"]
),
"name": remove_traling_and_leading_quotes(
"name": remove_trailing_and_leading_quotes(
data["Beteiligter"]["Organisation"]["Bezeichnung"][
"Bezeichnung_Aktuell"
]
@ -213,7 +213,7 @@ class V1_Transformer(BaseTransformer): # noqa: N801
name = data["XJustiz_Daten"]["Grunddaten"]["Verfahrensdaten"]["Beteiligung"][0][
"Beteiligter"
]["Organisation"]["Bezeichnung"]["Bezeichnung_Aktuell"]
return remove_traling_and_leading_quotes(name)
return remove_trailing_and_leading_quotes(name)
def map_rechtsform(self, company_name: str, data: dict) -> CompanyTypeEnum | None:
"""Extracts the company type from a given Unternehmensregister export.

View File

@ -28,7 +28,7 @@ from aki_prj23_transparenzregister.utils.data_extraction.unternehmensregister.tr
RoleMapper,
)
from aki_prj23_transparenzregister.utils.string_tools import (
remove_traling_and_leading_quotes,
remove_trailing_and_leading_quotes,
transform_date_to_iso,
)
@ -37,7 +37,7 @@ class V3_Transformer(BaseTransformer): # noqa: N801
"""Transformer for data exports from Unternehmensregister (v3)."""
def parse_date_of_birth(self, data: dict) -> str | None:
"""Retreives the date of birth from a stakeholder entry if possible.
"""Retrieves the date of birth from a stakeholder entry if possible.
Args:
data (dict): Stakeholder data
@ -89,7 +89,7 @@ class V3_Transformer(BaseTransformer): # noqa: N801
):
return CompanyToCompanyRelationship(
**{ # type: ignore
"name": remove_traling_and_leading_quotes(
"name": remove_trailing_and_leading_quotes(
data["tns:beteiligter"]["tns:auswahl_beteiligter"][
"tns:natuerlichePerson"
]["tns:vollerName"]["tns:nachname"]
@ -160,7 +160,6 @@ class V3_Transformer(BaseTransformer): # noqa: N801
"tns:organisation"
]
location = None
if "tns:anschrift" in base:
location = Location(
**{
@ -197,7 +196,7 @@ class V3_Transformer(BaseTransformer): # noqa: N801
"role": self.map_role_id_to_enum(
data["tns:rolle"]["tns:rollenbezeichnung"]["code"]
),
"name": remove_traling_and_leading_quotes(
"name": remove_trailing_and_leading_quotes(
base["tns:bezeichnung"]["tns:bezeichnung.aktuell"]
),
"location": location,
@ -273,7 +272,7 @@ class V3_Transformer(BaseTransformer): # noqa: N801
"tns:bezeichnung.aktuell",
]
name = traversal(data, path)
return remove_traling_and_leading_quotes(name)
return remove_trailing_and_leading_quotes(name)
def map_rechtsform(self, company_name: str, data: dict) -> CompanyTypeEnum | None:
"""Extracts the company type from a given Unternehmensregister export.

View File

@ -35,7 +35,7 @@ def cli() -> None: # pragma: no cover
"""A cli interface for the data transfer."""
parser = argparse.ArgumentParser(
prog="Process and transform data",
description="Process the raw data from the MongoDB with AI models and match and transform the data from the MongoDB when transfering into the SQL DB.",
description="Process the raw data from the MongoDB with AI models and match and transform the data from the MongoDB when transferring into the SQL DB.",
epilog="Example: 'data-processing secrets.json' or 'data-processing ENV'",
)
parser.add_argument(

View File

@ -99,7 +99,7 @@ class CompanyMongoService:
return any(not re.match("^[0-9]{4}$", key) for key in data["yearly_results"])
def is_self_referencing_auditors(self, data: dict) -> bool:
"""Does the entry contain yearly_resutls which are self-referencing?
"""Does the entry contain yearly_results which are self-referencing?
Args:
data (dict): Entry from MongoDB

View File

@ -15,7 +15,7 @@ def create_2d_graph( # noqa PLR0913
edge_annotation: bool,
edge_thickness: int,
) -> go.Figure:
"""This Method creates a 2d Network in Plotly with a Scatter Graph and retuns it.
"""This Method creates a 2d Network in Plotly with a Scatter Graph and returns it.
Args:
graph: NetworkX Graph.

View File

@ -15,7 +15,7 @@ def create_3d_graph( # noqa : PLR0913
edge_annotation: bool,
edge_thickness: int,
) -> go.Figure:
"""This Method creates a 3D Network in Plotly with a Scatter Graph and retuns it.
"""This Method creates a 3D Network in Plotly with a Scatter Graph and returns it.
Args:
graph: NetworkX Graph.

View File

@ -4,7 +4,7 @@ import pandas as pd
def initialize_network(edges: list, nodes: dict) -> tuple[nx.Graph, pd.DataFrame]:
"""This Method creates a Network from the Framework NetworkX with the help of a Node and Edge List. Furthemore it creates a DataFrame with the most important Metrics.
"""This Method creates a Network from the Framework NetworkX with the help of a Node and Edge List. Furthermore it creates a DataFrame with the most important Metrics.
Args:
edges (list): List with the connections between Nodes.
@ -50,7 +50,7 @@ def initialize_network(edges: list, nodes: dict) -> tuple[nx.Graph, pd.DataFrame
def initialize_network_with_reduced_metrics(
edges: list, nodes: dict
) -> tuple[nx.Graph, pd.DataFrame]:
"""This Method creates a Network from the Framework NetworkX with the help of a Node and Edge List. Furthemore it creates a DataFrame with the most important Metrics.
"""This Method creates a Network from the Framework NetworkX with the help of a Node and Edge List. Furthermore it creates a DataFrame with the most important Metrics.
Args:
edges: List with the connections between Nodes.
@ -58,7 +58,7 @@ def initialize_network_with_reduced_metrics(
Returns:
Graph: Plotly Figure
Metrices: DataFrame with Metrics
Metrics: DataFrame with Metrics
"""
# create edge dataframe
df_edges = pd.DataFrame(edges, columns=["from", "to", "type"])

View File

@ -36,15 +36,8 @@ def transform_date_to_iso(date: str) -> str:
return date_temp.strftime("%Y-%m-%d")
def remove_traling_and_leading_quotes(value: str) -> str:
"""Removes trailing and leading double-quotes from given string if present.
Args:
value (str): _description_
Returns:
str: _description_
"""
def remove_trailing_and_leading_quotes(value: str) -> str:
"""Removes trailing and leading double-quotes from given string if present."""
if value is not None:
count_quotes = value.count('"')
if count_quotes > 0:

View File

@ -12,9 +12,6 @@ from aki_prj23_transparenzregister.config.config_template import MongoConnection
def mock_mongo_connection() -> MongoConnection:
"""Mock MongoConnector class.
Args:
mocker (any): Library mocker
Returns:
Mock: Mocked MongoConnector
"""

View File

@ -7,7 +7,7 @@ def test_ner_spacy() -> None:
"""Mock TestNerService."""
# Create instance of NerAnalysisService with use_spacy=True
ner_service = NerAnalysisService(
use_spacy=True, use_transformer=False, use_companylist=False
use_spacy=True, use_transformer=False, use_company_list=False
)
# 1st testing
doc = {"title": "Siemens ist ein Unternehmen."}
@ -24,7 +24,7 @@ def test_ner_company_list() -> None:
"""Mock test_ner_company."""
# Create instance of NerAnalysisService with use_companylist=True
ner_service = NerAnalysisService(
use_spacy=False, use_transformer=False, use_companylist=True
use_spacy=False, use_transformer=False, use_company_list=True
)
doc = {"title": "Siemens ist ein Unternehmen."}
@ -41,7 +41,7 @@ def test_ner_transformer() -> None:
"""Mock test_ner_company."""
# Create instance of NerAnalysisService with use_use_companylist=True
ner_service = NerAnalysisService(
use_spacy=False, use_transformer=True, use_companylist=False
use_spacy=False, use_transformer=True, use_company_list=False
)
doc = {"title": "Siemens ist ein Unternehmen."}

View File

@ -14,9 +14,6 @@ from aki_prj23_transparenzregister.config.config_template import MongoConnection
def mock_mongo_connection() -> MongoConnection:
"""Mock MongoConnector class.
Args:
mocker (any): Library mocker
Returns:
Mock: Mocked MongoConnector
"""

View File

@ -20,6 +20,10 @@ def test_work(
company_mongo_service_mock: Mock,
mongo_connector_mock: Mock,
) -> None:
_ = connector_mock
_ = mongo_connector_mock
_ = company_mongo_service_mock
config_provider_mock = Mock()
config_provider_mock.session.return_value = Mock()

View File

@ -91,7 +91,7 @@ def test_by_id_result(mock_mongo_connector: Mock, mock_collection: Mock) -> None
"""
mock_mongo_connector.database = {"companies": mock_collection}
service = CompanyMongoService(mock_mongo_connector)
mock_entry = {"id": "Does exist", "vaue": 42}
mock_entry = {"id": "Does exist", "value": 42}
mock_collection.find.return_value = [mock_entry]
id = CompanyID(DistrictCourt("a", "b"), "c").to_dict()
assert service.get_by_id(id) == mock_entry
@ -154,7 +154,7 @@ def test_get_where_financial_results(
assert service.get_where_yearly_results() == mock_result
def test_add_yearly_reslults(mock_mongo_connector: Mock, mock_collection: Mock) -> None:
def test_add_yearly_results(mock_mongo_connector: Mock, mock_collection: Mock) -> None:
mock_mongo_connector.database = {"companies": mock_collection}
service = CompanyMongoService(mock_mongo_connector)
mock_result: list = [{"_id": "abc", "brille?": "Fielmann", "Hotel?": "Trivago"}]

View File

@ -81,5 +81,5 @@ def test_initialize_network() -> None:
"id",
]
graph = initialize_network_without_metrics(edges=edges, nodes=nodes)
initialize_network_without_metrics(edges=edges, nodes=nodes)
assert isinstance(graph_reduced, nx.Graph)

View File

@ -57,5 +57,5 @@ def test_transform_date_to_iso(value: str, expected: str) -> None:
],
)
def test_remove_trailing_and_leading_quotes(value: str, expected_result: str) -> None:
result = string_tools.remove_traling_and_leading_quotes(value)
result = string_tools.remove_trailing_and_leading_quotes(value)
assert result == expected_result