mirror of
https://github.com/fhswf/aki_prj23_transparenzregister.git
synced 2025-04-25 16:42:34 +02:00
feat: Introduce switch for different financial extraction routines
This commit is contained in:
parent
9edf5b1dce
commit
e1b8397f9e
@ -16,12 +16,15 @@ pd.options.mode.chained_assignment = None # type: ignore
|
|||||||
class Bundesanzeiger:
|
class Bundesanzeiger:
|
||||||
"""Bundesanzeiger wrapper to export relevant information."""
|
"""Bundesanzeiger wrapper to export relevant information."""
|
||||||
|
|
||||||
def get_information(self, company_name: str, city: str | None) -> pd.DataFrame:
|
def get_information(
|
||||||
|
self, company_name: str, city: str | None, finance_from_tables: bool = False
|
||||||
|
) -> pd.DataFrame:
|
||||||
"""Extract relevant information from all found yearly results for the given company.
|
"""Extract relevant information from all found yearly results for the given company.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
company_name (str): Name of the company to search for
|
company_name (str): Name of the company to search for
|
||||||
city (Optional[str]): City where the company is registered
|
city (Optional[str]): City where the company is registered
|
||||||
|
finance_from_tables (bool, optional): If True, financial information is extracted from tables. If False, financial information will be extracted from text via RegEx. Defaults to False.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
pd.DataFrame: Result
|
pd.DataFrame: Result
|
||||||
@ -51,9 +54,14 @@ class Bundesanzeiger:
|
|||||||
df_data["auditors"] = audits
|
df_data["auditors"] = audits
|
||||||
|
|
||||||
# Add Financial information
|
# Add Financial information
|
||||||
df_data["financial_results"] = df_data.raw_report.apply(
|
if finance_from_tables is True:
|
||||||
self.parse_tables_to_kpis
|
df_data["financial_results"] = df_data.raw_report.apply(
|
||||||
)
|
self.parse_tables_to_kpis
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
df_data["financial_results"] = df_data.raw_report.apply(
|
||||||
|
self.extract_financial_results
|
||||||
|
)
|
||||||
|
|
||||||
# Remove irrelevant columns
|
# Remove irrelevant columns
|
||||||
return df_data
|
return df_data
|
||||||
@ -262,27 +270,31 @@ class Bundesanzeiger:
|
|||||||
)
|
)
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
for x, factor in converter.items():
|
for x, factor in converter.items(): # noqa: PLW2901
|
||||||
parts = str(column).split(" ")
|
parts = str(column).split(" ")
|
||||||
for y in parts:
|
for y in parts:
|
||||||
if re.match(x, y):
|
if re.match(x, y):
|
||||||
table[column] = table[column].apply(
|
table[column] = table[column].apply(
|
||||||
lambda x, factor=factor: apply_factor(x, factor)
|
lambda x, factor=factor: apply_factor(x, factor)
|
||||||
)
|
)
|
||||||
table = table.rename({column: parts[0]}, axis=1)
|
table = table.rename( # noqa: PLW2901
|
||||||
|
{column: parts[0]}, axis=1
|
||||||
|
)
|
||||||
break
|
break
|
||||||
|
|
||||||
table = table.dropna(axis=0, how="all")
|
table = table.dropna(axis=0, how="all") # noqa: PLW2901
|
||||||
table = table.dropna(axis=1, how="all")
|
table = table.dropna(axis=1, how="all") # noqa: PLW2901
|
||||||
|
|
||||||
columns_to_prune = []
|
columns_to_prune = []
|
||||||
for column_index, column_type in enumerate(table.dtypes[1:]):
|
for column_index, column_type in enumerate(table.dtypes[1:]):
|
||||||
if column_type in ["object", "str"]:
|
if column_type in ["object", "str"]:
|
||||||
columns_to_prune.append(column_index + 1)
|
columns_to_prune.append(column_index + 1)
|
||||||
|
|
||||||
table = table.drop(table.columns[columns_to_prune], axis="columns")
|
table = table.drop( # noqa: PLW2901
|
||||||
table = table.replace(to_replace="None", value=np.nan)
|
table.columns[columns_to_prune], axis="columns"
|
||||||
table = table.dropna()
|
)
|
||||||
|
table = table.replace(to_replace="None", value=np.nan) # noqa: PLW2901
|
||||||
|
table = table.dropna() # noqa: PLW2901
|
||||||
if len(table.columns) <= 1:
|
if len(table.columns) <= 1:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user