improve financial fetch, bind financial KPIs to enum

This commit is contained in:
TrisNol 2023-08-29 17:10:42 +02:00
parent d1c09d51a2
commit ac07bd8e8e
4 changed files with 73 additions and 30 deletions

View File

@ -48,6 +48,10 @@ seaborn = "^0.12.2"
selenium = "^4.10.0"
tqdm = "^4.65.0"
# TODO Add dependent libraries (i.e., deutshcland, plotly, etc)
[tool.poetry.extras]
ingest = ["selenium"]
[tool.poetry.group.develop.dependencies]
black = {extras = ["jupyter"], version = "^23.3.0"}
jupyterlab = "^4.0.0"
@ -85,6 +89,10 @@ pytest-cov = "^4.1.0"
pytest-mock = "^3.10.0"
pytest-repeat = "^0.9.1"
# TODO Add enrich_company_financials hinzufügen
[tool.poetry.scripts]
mein_test = "aki_prj23_transparenzregister.utils.postgres.connector:init_db"
[tool.ruff]
exclude = [
".bzr",

View File

@ -18,9 +18,9 @@ def work(company: typing.Any, company_service: CompanyMongoService) -> None:
company (dict): _description_
company_service (CompanyMongoService): _description_
"""
# print(company["name"])
yearly_results = Bundesanzeiger().get_information(company_name=company["name"])
# print(len(yearly_results))
yearly_results = Bundesanzeiger().get_information(
company["name"], company["location"]["city"]
)
yearly_results_data = {}
for _index, row in yearly_results.iterrows():
yearly_results_data[row.jahr] = {

View File

@ -45,21 +45,54 @@ class CompanyRelationship(ABC):
location: Location
class FinancialKPIEnum(Enum):
"""Financial KPI keys."""
# Umsatz || Erlöse
REVENUE = "revenue"
# Jahresüberschuss || Nettoeinkommen
NET_INCOME = "net_income"
# Ebit
EBIT = "ebit"
# Ebitda
EBITDA = "ebitda"
# Bruttogewinn
GROSS_PROFIT = "gross_profit"
# Betriebsgewinn
OPERATING_PROFIT = "operating_profit"
# Bilanzsumme
ASSETS = "assets"
# Gesamtverbindlichkeiten
LIABILITIES = "liabilities"
# Eigenkapital
EQUITY = "equity"
# Umlaufvermögen
CURRENT_ASSETS = "current_assets"
# Kurzfristige Verbindlichkeiten
CURRENT_LIABILITIES = "current_liabilities"
# Langfristige Verbindlichkeiten
LONG_TERM_DEBT = "long_term_debt"
# Kurzfristige Verbindlichkeiten
SHORT_TERM_DEBT = "short_term_debt"
# Barmittel
CASH_AND_CASH_EQUIVALENTS = "cash_and_cash_equivalents"
# Dividende
DIVIDENDS = "dividends"
# Cash Flow
CASH_FLOW = "cash_flow"
@dataclass
class FinancialResults:
"""_summary_."""
class YearlyResult:
"""Company yearly result."""
year: int
kpis: dict[str, float]
kpis: dict[FinancialKPIEnum, float]
@dataclass
class Company:
"""_summary_.
Returns:
_type_: _description_
"""
"""Company dataclass."""
id: CompanyID
location: Location

View File

@ -6,23 +6,25 @@ from bs4 import BeautifulSoup
from deutschland.bundesanzeiger import Bundesanzeiger as Ba
from aki_prj23_transparenzregister.models.auditor import Auditor
from aki_prj23_transparenzregister.models.company import FinancialKPIEnum
class Bundesanzeiger:
"""Bundesanzeiger wrapper to export relevant information."""
def get_information(self, company_name: str) -> pd.DataFrame:
def get_information(self, company_name: str, city: str | None) -> pd.DataFrame:
"""Extract relevant information from all found yearly results for the given company.
Args:
company_name (str): Name of the company to search for
city (Optional[str]): City where the company is registered
Returns:
pd.DataFrame: Result
"""
ba = Ba()
# Get Bundesanzeiger entries for company
reports = ba.get_reports(company_name)
reports = ba.get_reports(f"{company_name} {city}")
# Transform to list of data
report_contents = []
for key in reports:
@ -113,22 +115,22 @@ class Bundesanzeiger:
# Define KPI patterns to search for
kpi_patterns = {
"revenue": r"(?:revenue|umsatz|erlöse)[:\s]*([\d,.]+[mmb]?)",
"net_income": r"(?:net income|jahresüberschuss|nettoeinkommen|Ergebnis nach Steuern)[:\s]*([\d,.]+[mmb]?)",
"ebit": r"(?:ebit|operating income)[:\s]*([\d,.]+[mmb]?)",
"ebitda": r"(?:ebitda)[:\s]*([\d,.]+[mmb]?)",
"gross_profit": r"(?:gross profit|bruttogewinn)[:\s]*([\d,.]+[mmb]?)",
"operating_profit": r"(?:operating profit|betriebsgewinn)[:\s]*([\d,.]+[mmb]?)",
"assets": r"(?:total assets|bilanzsumme)[:\s]*([\d,.]+[mmb]?)",
"liabilities": r"(?:total liabilities|gesamtverbindlichkeiten)[:\s]*([\d,.]+[mmb]?)",
"equity": r"(?:shareholders'? equity|eigenkapital)[:\s]*([\d,.]+[mmb]?)",
"current_assets": r"(?:current assets|umlaufvermögen)[:\s]*([\d,.]+[mmb]?)",
"current_liabilities": r"(?:current liabilities|kurzfristige verbindlichkeiten)[:\s]*([\d,.]+[mmb]?)",
"long_term_debt": r"(?:long[-\s]?term debt|langfristige verbindlichkeiten)[:\s]*([\d,.]+[mmb]?)",
"short_term_debt": r"(?:short[-\s]?term debt|kurzfristige verbindlichkeiten)[:\s]*([\d,.]+[mmb]?)",
"cash_and_cash_equivalents": r"(?:cash (?:and cash equivalents)?|barmittel)[:\s]*([\d,.]+[mmb]?)",
"dividends": r"(?:dividends?|dividende)[:\s]*([\d,.]+[mmb]?)",
"cash_flow": r"(?:cash flow|cashflow|cash flow from operating activities)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.REVENUE: r"(?:revenue|umsatz|erlöse)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.NET_INCOME: r"(?:net income|jahresüberschuss|nettoeinkommen|Ergebnis nach Steuern)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.EBIT: r"(?:ebit|operating income)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.EBITDA: r"(?:ebitda)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.GROSS_PROFIT: r"(?:gross profit|bruttogewinn)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.OPERATING_PROFIT: r"(?:operating profit|betriebsgewinn)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.ASSETS: r"(?:total assets|bilanzsumme)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.LIABILITIES: r"(?:total liabilities|gesamtverbindlichkeiten)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.EQUITY: r"(?:shareholders'? equity|eigenkapital)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.CURRENT_ASSETS: r"(?:current assets|umlaufvermögen)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.CURRENT_LIABILITIES: r"(?:current liabilities|kurzfristige verbindlichkeiten)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.LONG_TERM_DEBT: r"(?:long[-\s]?term debt|langfristige verbindlichkeiten)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.SHORT_TERM_DEBT: r"(?:short[-\s]?term debt|kurzfristige verbindlichkeiten)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.CASH_AND_CASH_EQUIVALENTS: r"(?:cash (?:and cash equivalents)?|barmittel)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.DIVIDENDS: r"(?:dividends?|dividende)[:\s]*([\d,.]+[mmb]?)",
FinancialKPIEnum.CASH_FLOW: r"(?:cash flow|cashflow|cash flow from operating activities)[:\s]*([\d,.]+[mmb]?)",
}
for kpi, pattern in kpi_patterns.items():
@ -176,4 +178,4 @@ class Bundesanzeiger:
if __name__ == "__main__":
ba_wrapper = Bundesanzeiger()
ba_wrapper.get_information("Atos IT-Dienstleistung und Beratung GmbH")
ba_wrapper.get_information("Atos IT-Dienstleistung und Beratung GmbH", None)