KM-R 4e83c83374
Visualize company sentiment (#422)
Updates the tab and widget "Stimmung"
2023-11-22 22:32:38 +01:00

467 lines
18 KiB
Python

"""Global configurations and definitions for pytest."""
import datetime
from collections.abc import Generator
from inspect import getmembers, isfunction
from typing import Any
import pytest
from sqlalchemy.orm import Session, sessionmaker
from aki_prj23_transparenzregister.config.config_template import SQLiteConnectionString
from aki_prj23_transparenzregister.models.company import CapitalTypeEnum
from aki_prj23_transparenzregister.utils import data_transfer
from aki_prj23_transparenzregister.utils.enum_types import SentimentLabel
from aki_prj23_transparenzregister.utils.sql import entities
from aki_prj23_transparenzregister.utils.sql.connector import (
get_engine,
init_db,
)
@pytest.fixture(autouse=True)
def _clear_caches() -> Generator[None, None, None]:
"""A function that clears all caches after each test.
All the modules containing the cached functions need to be listed in the modules tuple.
"""
yield
# https://stackoverflow.com/a/139198/11003343
modules = (data_transfer,)
functions = [
function
for module in modules
for name, function in getmembers(module, isfunction)
if function.__dict__.get("cache") is not None
]
# https://cachetools.readthedocs.io/en/stable/?highlight=clear#memoizing-decorators
for function in functions:
function.cache.clear() # type: ignore
@pytest.fixture()
def empty_db() -> Generator[Session, None, None]:
"""Generates a db Session to a sql_lite db."""
engine = get_engine(SQLiteConnectionString(":memory:"))
db = sessionmaker(autocommit=False, autoflush=False, bind=engine)()
init_db(db)
yield db
db.close()
engine.dispose()
@pytest.fixture()
def finance_statements() -> list[dict[str, Any]]:
"""Creates a list of finance statements."""
return [
{
"id": 1,
"company_id": 1,
"date": datetime.date.fromisoformat("2023-01-01"),
"ebit": 1000.0,
"ebitda": 1000.0,
"gross_profit": 1000.0,
"equity": 1000.0,
"current_liabilities": 1000.0,
"assets": 1000.0,
"net_income": 100.0,
"long_term_debt": 1000.0,
"short_term_debt": 1000.0,
"revenue": 1000.0,
"cash_flow": 1000.0,
"current_assets": 1000.0,
"liabilities": 0.0,
"cash_and_cash_equivalents": 1.0,
"dividends": 0.0,
},
{
"id": 2,
"company_id": 1,
"date": datetime.date.fromisoformat("2022-01-01"),
"revenue": 1100.0,
"net_income": float("NaN"),
"ebit": 1100.0,
"ebitda": 1100.0,
"gross_profit": 1100.0,
"equity": 1100.0,
"current_liabilities": 1100.0,
"dividends": float("NaN"),
"assets": 1100.0,
"long_term_debt": 1100.0,
"short_term_debt": 1100.0,
"cash_flow": 1100.0,
"current_assets": 1100.0,
"operating_profit": 1.0,
},
]
@pytest.fixture()
def full_db(empty_db: Session, finance_statements: list[dict[str, Any]]) -> Session:
"""Fills a db with some test data."""
empty_db.add_all(
[
entities.DistrictCourt(name="Amtsgericht Bochum", city="Bochum"),
entities.DistrictCourt(name="Amtsgericht Dortmund", city="Dortmund"),
entities.Person(
firstname="Max",
lastname="Mustermann",
date_of_birth=datetime.date(2023, 1, 1),
),
entities.Person(
firstname="Sabine",
lastname="Mustermann",
date_of_birth=datetime.date(2023, 1, 1),
),
entities.Person(
firstname="Some Firstname",
lastname="Some Surname",
date_of_birth=datetime.date(2023, 1, 1),
),
entities.Person(
firstname="Some Firstname",
lastname="Some Surname",
date_of_birth=datetime.date(2023, 1, 2),
),
entities.Person(
firstname="Other Firstname",
lastname="Other Surname",
date_of_birth=datetime.date(2023, 1, 2),
),
]
)
empty_db.commit()
empty_db.add_all(
[
entities.Company(
hr="HRB 123",
court_id=2,
name="Some Company GmbH",
street="Sesamstr.",
house_number="1",
zip_code="58644",
city="TV City",
last_update=datetime.date.fromisoformat("2023-01-01"),
latitude=51.3246,
longitude=7.6968,
pos_accuracy=4.0,
founding_date=datetime.date(2010, 8, 7),
capital_value=1000000,
original_currency="DM",
capital_type=CapitalTypeEnum.HAFTEINLAGE,
business_purpose='Say "Hello World"',
),
entities.Company(
hr="HRB 123",
court_id=1,
name="Other Company GmbH",
street="Sesamstr.",
house_number="2",
zip_code="58636",
city="TV City",
last_update=datetime.date.fromisoformat("2023-01-01"),
latitude=51.38,
longitude=7.7032,
pos_accuracy=4.0,
business_purpose="Some purpose",
),
entities.Company(
hr="HRB 12",
court_id=2,
name="Third Company GmbH",
last_update=datetime.date.fromisoformat("2023-01-01"),
sector="Electronic",
capital_value=10000,
original_currency="EUR",
capital_type=CapitalTypeEnum.GRUNDKAPITAL,
),
]
)
empty_db.commit()
empty_db.add_all(
[
entities.AnnualFinanceStatement(**finance_statement)
for finance_statement in finance_statements
]
)
empty_db.add(
entities.MissingCompany(name="Some company missing", zip_code="", city="")
)
empty_db.commit()
# print(pd.read_sql_table("company", empty_db.bind).to_string())
return empty_db
@pytest.fixture()
def news_db(full_db: Session) -> Session:
news_example = entities.News(
title="AI Revolution in Tech",
timestamp=datetime.datetime(2023, 11, 1, 15, 30),
text="The latest advancements in AI are transforming the tech industry.",
source_url="http://example-news.com/ai-revolution",
source_domain="example-news.com",
overall_sentiment_label=SentimentLabel.POSITIVE, # type: ignore
overall_sentiment_certainty=0.95,
number_of_companies=2,
sum_of_times_named=5,
)
full_db.add(news_example)
full_db.commit()
# Print to see the object representation
# Example instances of Sentiment for different company IDs
sentiment_examples = [
entities.Sentiment(
company_id=1,
article_id=news_example.id, # This should be the actual ID after insertion into the database
times_named=3,
specific_sentiment_label=SentimentLabel.NEUTRAL, # type: ignore
specific_sentiment_score=0.5,
),
entities.Sentiment(
company_id=2,
article_id=news_example.id, # This should be the actual ID after insertion into the database
times_named=2,
specific_sentiment_label=SentimentLabel.NEGATIVE, # type: ignore
specific_sentiment_score=0.2,
),
entities.Sentiment(
company_id=3,
article_id=news_example.id, # This should be the actual ID after insertion into the database
times_named=1,
specific_sentiment_label=SentimentLabel.POSITIVE, # type: ignore
specific_sentiment_score=0.8,
),
]
full_db.add_all(sentiment_examples)
add_news_examples = [
entities.News(
title="Breakthrough in Renewable Energy",
timestamp=datetime.datetime(2023, 11, 1, 15, 30),
text="Innovative solar panels are expected to revolutionize the energy sector.",
source_url="http://example-news.com/renewable-breakthrough",
source_domain="eco-news.com",
overall_sentiment_label=SentimentLabel.POSITIVE, # type: ignore
overall_sentiment_certainty=0.9,
number_of_companies=1,
sum_of_times_named=3,
),
entities.News(
title="Global Economic Outlook",
timestamp=datetime.datetime(2023, 11, 2, 10, 0),
text="Economists predict a challenging year ahead for global markets.",
source_url="http://example-news.com/economic-outlook",
source_domain="finance-world.com",
overall_sentiment_label=SentimentLabel.NEGATIVE, # type: ignore
overall_sentiment_certainty=0.95,
number_of_companies=3,
sum_of_times_named=7,
),
entities.News(
title="Tech Giants Merge",
timestamp=datetime.datetime(2023, 11, 3, 12, 45),
text="Two leading tech companies have announced a merger, sparking industry-wide discussions.",
source_url="http://example-news.com/tech-merger",
source_domain="tech-news.com",
overall_sentiment_label=SentimentLabel.NEUTRAL, # type: ignore
overall_sentiment_certainty=0.91,
number_of_companies=2,
sum_of_times_named=10,
),
entities.News(
title="Deutsche Bank announces huge job loss plan",
timestamp=datetime.datetime(2023, 11, 5, 12, 45),
text="Deutsche Bank has announced plans for major job losses and will also scale down massively its operations worldwide, as part of a major shake-up of.",
source_url="http://example-news.com/deutsche-bank",
source_domain="tech-news.com",
overall_sentiment_label=SentimentLabel.NEGATIVE, # type: ignore
overall_sentiment_certainty=0.9,
number_of_companies=1,
sum_of_times_named=8,
),
entities.News(
title="Germany approves reforms to help its tech industry compete with Silicon Valley",
timestamp=datetime.datetime(2023, 11, 6, 9, 45),
text="Germany on Friday approved a litany of changes to its rules for stock-based compensation at tech startups, listing of companies and taxation.",
source_url="http://example-news.com/german-tech-industry",
source_domain="tech-news.com",
overall_sentiment_label=SentimentLabel.POSITIVE, # type: ignore
overall_sentiment_certainty=0.97,
number_of_companies=3,
sum_of_times_named=5,
),
entities.News(
title="German housebuilding is collapsing ",
timestamp=datetime.datetime(2023, 11, 8, 12, 30),
text="German housebuilding is on the brink of collapse as construction projects are being canceled and orders are slowing.",
source_url="http://example-news.com/german-housing",
source_domain="tech-news.com",
overall_sentiment_label=SentimentLabel.NEGATIVE, # type: ignore
overall_sentiment_certainty=0.96,
number_of_companies=3,
sum_of_times_named=2,
),
entities.News(
title="The Berlin start-ups tearing up capitalism and putting workers first",
timestamp=datetime.datetime(2023, 11, 10, 12, 45),
text="Berlin is one of Europe's hottest start-up hubs. But companies setting up there are doing things differently and it starts with where the power lies.",
source_url="http://example-news.com/tech-merger",
source_domain="tech-news.com",
overall_sentiment_label=SentimentLabel.POSITIVE, # type: ignore
overall_sentiment_certainty=0.91,
number_of_companies=2,
sum_of_times_named=10,
),
entities.News(
title="Air traffic across Germany disrupted due to mass walkouts",
timestamp=datetime.datetime(2023, 11, 3, 12, 45),
text="Security workers are staging a full-day walkout at airports across Germany over pay disputes.",
source_url="http://example-news.com/tech-merger",
source_domain="tech-news.com",
overall_sentiment_label=SentimentLabel.NEUTRAL, # type: ignore
overall_sentiment_certainty=0.95,
number_of_companies=3,
sum_of_times_named=17,
),
entities.News(
title="Car makers face fraud claims",
timestamp=datetime.datetime(2023, 11, 12, 12, 45),
text="The German economy grew 0.6 percent in Q1, quarter on quarter, helped by strong exports, booming construction and higher household and state spending.",
source_url="http://example-news.com/tech-merger",
source_domain="tech-news.com",
overall_sentiment_label=SentimentLabel.NEUTRAL, # type: ignore
overall_sentiment_certainty=0.9,
number_of_companies=2,
sum_of_times_named=10,
),
entities.News(
title="Inflation rises, unemployment falls",
timestamp=datetime.datetime(2023, 11, 15, 12, 45),
text="German inflation rose in February, reaching its highest level in four-and-a-half years, while unemployment fell more than expected.",
source_url="http://example-news.com/inflation",
source_domain="tech-news.com",
overall_sentiment_label=SentimentLabel.POSITIVE, # type: ignore
overall_sentiment_certainty=0.98,
number_of_companies=2,
sum_of_times_named=10,
),
entities.News(
title="Exports struggle on slow factory output",
timestamp=datetime.datetime(2023, 11, 16, 12, 45),
text="Exports rebounded by less than expected in October, according to latest official figures.",
source_url="http://example-news.com/exports",
source_domain="tech-news.com",
overall_sentiment_label=SentimentLabel.NEGATIVE, # type: ignore
overall_sentiment_certainty=0.98,
number_of_companies=3,
sum_of_times_named=4,
),
]
full_db.add_all(add_news_examples)
full_db.commit()
# Additional examples for the Sentiment entity
sentiment_more_examples = [
entities.Sentiment(
company_id=1,
article_id=add_news_examples[
2
].id, # Placeholder ID, replace with actual after insertion
times_named=1,
specific_sentiment_label=SentimentLabel.NEUTRAL, # type: ignore
specific_sentiment_score=0.5,
),
entities.Sentiment(
company_id=3,
article_id=add_news_examples[
1
].id, # Placeholder ID, replace with actual after insertion
times_named=4,
specific_sentiment_label=SentimentLabel.NEGATIVE, # type: ignore
specific_sentiment_score=0.3,
),
entities.Sentiment(
company_id=1,
article_id=add_news_examples[
1
].id, # Placeholder ID, replace with actual after insertion
times_named=4,
specific_sentiment_label=SentimentLabel.NEGATIVE, # type: ignore
specific_sentiment_score=0.3,
),
entities.Sentiment(
company_id=1,
article_id=add_news_examples[
3
].id, # Placeholder ID, replace with actual after insertion
times_named=4,
specific_sentiment_label=SentimentLabel.NEGATIVE, # type: ignore
specific_sentiment_score=0.3,
),
entities.Sentiment(
company_id=1,
article_id=add_news_examples[
4
].id, # Placeholder ID, replace with actual after insertion
times_named=4,
specific_sentiment_label=SentimentLabel.NEGATIVE, # type: ignore
specific_sentiment_score=0.3,
),
entities.Sentiment(
company_id=1,
article_id=add_news_examples[
5
].id, # Placeholder ID, replace with actual after insertion
times_named=4,
specific_sentiment_label=SentimentLabel.NEGATIVE, # type: ignore
specific_sentiment_score=0.3,
),
entities.Sentiment(
company_id=1,
article_id=add_news_examples[
6
].id, # Placeholder ID, replace with actual after insertion
times_named=4,
specific_sentiment_label=SentimentLabel.NEGATIVE, # type: ignore
specific_sentiment_score=0.3,
),
entities.Sentiment(
company_id=1,
article_id=add_news_examples[
7
].id, # Placeholder ID, replace with actual after insertion
times_named=4,
specific_sentiment_label=SentimentLabel.NEGATIVE, # type: ignore
specific_sentiment_score=0.3,
),
entities.Sentiment(
company_id=1,
article_id=add_news_examples[
8
].id, # Placeholder ID, replace with actual after insertion
times_named=4,
specific_sentiment_label=SentimentLabel.NEGATIVE, # type: ignore
specific_sentiment_score=0.3,
),
entities.Sentiment(
company_id=1,
article_id=add_news_examples[
9
].id, # Placeholder ID, replace with actual after insertion
times_named=4,
specific_sentiment_label=SentimentLabel.NEGATIVE, # type: ignore
specific_sentiment_score=0.3,
),
entities.Sentiment(
company_id=1,
article_id=add_news_examples[
10
].id, # Placeholder ID, replace with actual after insertion
times_named=4,
specific_sentiment_label=SentimentLabel.NEGATIVE, # type: ignore
specific_sentiment_score=0.3,
),
]
full_db.add_all(sentiment_more_examples)
full_db.commit()
return full_db