diff --git a/pyproject.toml b/pyproject.toml index c225bbb..5ce1904 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,6 +48,10 @@ seaborn = "^0.12.2" selenium = "^4.10.0" tqdm = "^4.65.0" +# TODO Add dependent libraries (i.e., deutshcland, plotly, etc) +[tool.poetry.extras] +ingest = ["selenium"] + [tool.poetry.group.develop.dependencies] black = {extras = ["jupyter"], version = "^23.3.0"} jupyterlab = "^4.0.0" @@ -85,6 +89,10 @@ pytest-cov = "^4.1.0" pytest-mock = "^3.10.0" pytest-repeat = "^0.9.1" +# TODO Add enrich_company_financials hinzufügen +[tool.poetry.scripts] +mein_test = "aki_prj23_transparenzregister.utils.postgres.connector:init_db" + [tool.ruff] exclude = [ ".bzr", diff --git a/src/aki_prj23_transparenzregister/apps/enrich_company_financials.py b/src/aki_prj23_transparenzregister/apps/enrich_company_financials.py index 47bca4e..0667a87 100644 --- a/src/aki_prj23_transparenzregister/apps/enrich_company_financials.py +++ b/src/aki_prj23_transparenzregister/apps/enrich_company_financials.py @@ -18,9 +18,9 @@ def work(company: typing.Any, company_service: CompanyMongoService) -> None: company (dict): _description_ company_service (CompanyMongoService): _description_ """ - # print(company["name"]) - yearly_results = Bundesanzeiger().get_information(company_name=company["name"]) - # print(len(yearly_results)) + yearly_results = Bundesanzeiger().get_information( + company["name"], company["location"]["city"] + ) yearly_results_data = {} for _index, row in yearly_results.iterrows(): yearly_results_data[row.jahr] = { diff --git a/src/aki_prj23_transparenzregister/models/company.py b/src/aki_prj23_transparenzregister/models/company.py index 8905513..d160826 100644 --- a/src/aki_prj23_transparenzregister/models/company.py +++ b/src/aki_prj23_transparenzregister/models/company.py @@ -45,21 +45,54 @@ class CompanyRelationship(ABC): location: Location +class FinancialKPIEnum(Enum): + """Financial KPI keys.""" + + # Umsatz || Erlöse + REVENUE = "revenue" + # Jahresüberschuss || Nettoeinkommen + NET_INCOME = "net_income" + # Ebit + EBIT = "ebit" + # Ebitda + EBITDA = "ebitda" + # Bruttogewinn + GROSS_PROFIT = "gross_profit" + # Betriebsgewinn + OPERATING_PROFIT = "operating_profit" + # Bilanzsumme + ASSETS = "assets" + # Gesamtverbindlichkeiten + LIABILITIES = "liabilities" + # Eigenkapital + EQUITY = "equity" + # Umlaufvermögen + CURRENT_ASSETS = "current_assets" + # Kurzfristige Verbindlichkeiten + CURRENT_LIABILITIES = "current_liabilities" + # Langfristige Verbindlichkeiten + LONG_TERM_DEBT = "long_term_debt" + # Kurzfristige Verbindlichkeiten + SHORT_TERM_DEBT = "short_term_debt" + # Barmittel + CASH_AND_CASH_EQUIVALENTS = "cash_and_cash_equivalents" + # Dividende + DIVIDENDS = "dividends" + # Cash Flow + CASH_FLOW = "cash_flow" + + @dataclass -class FinancialResults: - """_summary_.""" +class YearlyResult: + """Company yearly result.""" year: int - kpis: dict[str, float] + kpis: dict[FinancialKPIEnum, float] @dataclass class Company: - """_summary_. - - Returns: - _type_: _description_ - """ + """Company dataclass.""" id: CompanyID location: Location diff --git a/src/aki_prj23_transparenzregister/utils/data_extraction/bundesanzeiger.py b/src/aki_prj23_transparenzregister/utils/data_extraction/bundesanzeiger.py index c00f8e4..c2c28d7 100644 --- a/src/aki_prj23_transparenzregister/utils/data_extraction/bundesanzeiger.py +++ b/src/aki_prj23_transparenzregister/utils/data_extraction/bundesanzeiger.py @@ -6,23 +6,25 @@ from bs4 import BeautifulSoup from deutschland.bundesanzeiger import Bundesanzeiger as Ba from aki_prj23_transparenzregister.models.auditor import Auditor +from aki_prj23_transparenzregister.models.company import FinancialKPIEnum class Bundesanzeiger: """Bundesanzeiger wrapper to export relevant information.""" - def get_information(self, company_name: str) -> pd.DataFrame: + def get_information(self, company_name: str, city: str | None) -> pd.DataFrame: """Extract relevant information from all found yearly results for the given company. Args: company_name (str): Name of the company to search for + city (Optional[str]): City where the company is registered Returns: pd.DataFrame: Result """ ba = Ba() # Get Bundesanzeiger entries for company - reports = ba.get_reports(company_name) + reports = ba.get_reports(f"{company_name} {city}") # Transform to list of data report_contents = [] for key in reports: @@ -113,22 +115,22 @@ class Bundesanzeiger: # Define KPI patterns to search for kpi_patterns = { - "revenue": r"(?:revenue|umsatz|erlöse)[:\s]*([\d,.]+[mmb]?)", - "net_income": r"(?:net income|jahresüberschuss|nettoeinkommen|Ergebnis nach Steuern)[:\s]*([\d,.]+[mmb]?)", - "ebit": r"(?:ebit|operating income)[:\s]*([\d,.]+[mmb]?)", - "ebitda": r"(?:ebitda)[:\s]*([\d,.]+[mmb]?)", - "gross_profit": r"(?:gross profit|bruttogewinn)[:\s]*([\d,.]+[mmb]?)", - "operating_profit": r"(?:operating profit|betriebsgewinn)[:\s]*([\d,.]+[mmb]?)", - "assets": r"(?:total assets|bilanzsumme)[:\s]*([\d,.]+[mmb]?)", - "liabilities": r"(?:total liabilities|gesamtverbindlichkeiten)[:\s]*([\d,.]+[mmb]?)", - "equity": r"(?:shareholders'? equity|eigenkapital)[:\s]*([\d,.]+[mmb]?)", - "current_assets": r"(?:current assets|umlaufvermögen)[:\s]*([\d,.]+[mmb]?)", - "current_liabilities": r"(?:current liabilities|kurzfristige verbindlichkeiten)[:\s]*([\d,.]+[mmb]?)", - "long_term_debt": r"(?:long[-\s]?term debt|langfristige verbindlichkeiten)[:\s]*([\d,.]+[mmb]?)", - "short_term_debt": r"(?:short[-\s]?term debt|kurzfristige verbindlichkeiten)[:\s]*([\d,.]+[mmb]?)", - "cash_and_cash_equivalents": r"(?:cash (?:and cash equivalents)?|barmittel)[:\s]*([\d,.]+[mmb]?)", - "dividends": r"(?:dividends?|dividende)[:\s]*([\d,.]+[mmb]?)", - "cash_flow": r"(?:cash flow|cashflow|cash flow from operating activities)[:\s]*([\d,.]+[mmb]?)", + FinancialKPIEnum.REVENUE: r"(?:revenue|umsatz|erlöse)[:\s]*([\d,.]+[mmb]?)", + FinancialKPIEnum.NET_INCOME: r"(?:net income|jahresüberschuss|nettoeinkommen|Ergebnis nach Steuern)[:\s]*([\d,.]+[mmb]?)", + FinancialKPIEnum.EBIT: r"(?:ebit|operating income)[:\s]*([\d,.]+[mmb]?)", + FinancialKPIEnum.EBITDA: r"(?:ebitda)[:\s]*([\d,.]+[mmb]?)", + FinancialKPIEnum.GROSS_PROFIT: r"(?:gross profit|bruttogewinn)[:\s]*([\d,.]+[mmb]?)", + FinancialKPIEnum.OPERATING_PROFIT: r"(?:operating profit|betriebsgewinn)[:\s]*([\d,.]+[mmb]?)", + FinancialKPIEnum.ASSETS: r"(?:total assets|bilanzsumme)[:\s]*([\d,.]+[mmb]?)", + FinancialKPIEnum.LIABILITIES: r"(?:total liabilities|gesamtverbindlichkeiten)[:\s]*([\d,.]+[mmb]?)", + FinancialKPIEnum.EQUITY: r"(?:shareholders'? equity|eigenkapital)[:\s]*([\d,.]+[mmb]?)", + FinancialKPIEnum.CURRENT_ASSETS: r"(?:current assets|umlaufvermögen)[:\s]*([\d,.]+[mmb]?)", + FinancialKPIEnum.CURRENT_LIABILITIES: r"(?:current liabilities|kurzfristige verbindlichkeiten)[:\s]*([\d,.]+[mmb]?)", + FinancialKPIEnum.LONG_TERM_DEBT: r"(?:long[-\s]?term debt|langfristige verbindlichkeiten)[:\s]*([\d,.]+[mmb]?)", + FinancialKPIEnum.SHORT_TERM_DEBT: r"(?:short[-\s]?term debt|kurzfristige verbindlichkeiten)[:\s]*([\d,.]+[mmb]?)", + FinancialKPIEnum.CASH_AND_CASH_EQUIVALENTS: r"(?:cash (?:and cash equivalents)?|barmittel)[:\s]*([\d,.]+[mmb]?)", + FinancialKPIEnum.DIVIDENDS: r"(?:dividends?|dividende)[:\s]*([\d,.]+[mmb]?)", + FinancialKPIEnum.CASH_FLOW: r"(?:cash flow|cashflow|cash flow from operating activities)[:\s]*([\d,.]+[mmb]?)", } for kpi, pattern in kpi_patterns.items(): @@ -176,4 +178,4 @@ class Bundesanzeiger: if __name__ == "__main__": ba_wrapper = Bundesanzeiger() - ba_wrapper.get_information("Atos IT-Dienstleistung und Beratung GmbH") + ba_wrapper.get_information("Atos IT-Dienstleistung und Beratung GmbH", None)