mirror of
https://github.com/fhswf/aki_prj23_transparenzregister.git
synced 2025-04-24 16:52:34 +02:00
feat: Introduce switch for different financial extraction routines
This commit is contained in:
parent
9edf5b1dce
commit
e1b8397f9e
@ -16,12 +16,15 @@ pd.options.mode.chained_assignment = None # type: ignore
|
||||
class Bundesanzeiger:
|
||||
"""Bundesanzeiger wrapper to export relevant information."""
|
||||
|
||||
def get_information(self, company_name: str, city: str | None) -> pd.DataFrame:
|
||||
def get_information(
|
||||
self, company_name: str, city: str | None, finance_from_tables: bool = False
|
||||
) -> pd.DataFrame:
|
||||
"""Extract relevant information from all found yearly results for the given company.
|
||||
|
||||
Args:
|
||||
company_name (str): Name of the company to search for
|
||||
city (Optional[str]): City where the company is registered
|
||||
finance_from_tables (bool, optional): If True, financial information is extracted from tables. If False, financial information will be extracted from text via RegEx. Defaults to False.
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: Result
|
||||
@ -51,9 +54,14 @@ class Bundesanzeiger:
|
||||
df_data["auditors"] = audits
|
||||
|
||||
# Add Financial information
|
||||
df_data["financial_results"] = df_data.raw_report.apply(
|
||||
self.parse_tables_to_kpis
|
||||
)
|
||||
if finance_from_tables is True:
|
||||
df_data["financial_results"] = df_data.raw_report.apply(
|
||||
self.parse_tables_to_kpis
|
||||
)
|
||||
else:
|
||||
df_data["financial_results"] = df_data.raw_report.apply(
|
||||
self.extract_financial_results
|
||||
)
|
||||
|
||||
# Remove irrelevant columns
|
||||
return df_data
|
||||
@ -262,27 +270,31 @@ class Bundesanzeiger:
|
||||
)
|
||||
break
|
||||
else:
|
||||
for x, factor in converter.items():
|
||||
for x, factor in converter.items(): # noqa: PLW2901
|
||||
parts = str(column).split(" ")
|
||||
for y in parts:
|
||||
if re.match(x, y):
|
||||
table[column] = table[column].apply(
|
||||
lambda x, factor=factor: apply_factor(x, factor)
|
||||
)
|
||||
table = table.rename({column: parts[0]}, axis=1)
|
||||
table = table.rename( # noqa: PLW2901
|
||||
{column: parts[0]}, axis=1
|
||||
)
|
||||
break
|
||||
|
||||
table = table.dropna(axis=0, how="all")
|
||||
table = table.dropna(axis=1, how="all")
|
||||
table = table.dropna(axis=0, how="all") # noqa: PLW2901
|
||||
table = table.dropna(axis=1, how="all") # noqa: PLW2901
|
||||
|
||||
columns_to_prune = []
|
||||
for column_index, column_type in enumerate(table.dtypes[1:]):
|
||||
if column_type in ["object", "str"]:
|
||||
columns_to_prune.append(column_index + 1)
|
||||
|
||||
table = table.drop(table.columns[columns_to_prune], axis="columns")
|
||||
table = table.replace(to_replace="None", value=np.nan)
|
||||
table = table.dropna()
|
||||
table = table.drop( # noqa: PLW2901
|
||||
table.columns[columns_to_prune], axis="columns"
|
||||
)
|
||||
table = table.replace(to_replace="None", value=np.nan) # noqa: PLW2901
|
||||
table = table.dropna() # noqa: PLW2901
|
||||
if len(table.columns) <= 1:
|
||||
continue
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user