mirror of
https://github.com/fhswf/aki_prj23_transparenzregister.git
synced 2025-06-22 07:53:55 +02:00
fix(data-extraction): Resolve regex issue in detecting auditors
This commit is contained in:
@ -102,12 +102,9 @@ class Bundesanzeiger:
|
|||||||
list[Auditor]: List of Auditors found in the given report
|
list[Auditor]: List of Auditors found in the given report
|
||||||
"""
|
"""
|
||||||
auditor_company = self.extract_auditor_company(report)
|
auditor_company = self.extract_auditor_company(report)
|
||||||
auditor_regex = r"[a-z A-Z,.'-]+, Wirtschaftsprüfer"
|
auditor_regex = r"([a-z A-ZÄäÜüÖö,.'-]+), Wirtschaftsprüfer(in)?"
|
||||||
hits = re.findall(auditor_regex, report)
|
hits = re.findall(auditor_regex, report)
|
||||||
return [
|
return [Auditor(hit[0].strip(), auditor_company) for hit in hits]
|
||||||
Auditor(hit.replace(", Wirtschaftsprüfer", "").lstrip(), auditor_company)
|
|
||||||
for hit in hits
|
|
||||||
]
|
|
||||||
|
|
||||||
def __extract_kpis__(self, report: str) -> dict:
|
def __extract_kpis__(self, report: str) -> dict:
|
||||||
"""Source: https://github.com/bundesAPI/deutschland/pull/87/files#diff-f5b9db5384cf523fcc677056065041e7793bfc4da9cf74c4eebd6fab732739bd.
|
"""Source: https://github.com/bundesAPI/deutschland/pull/87/files#diff-f5b9db5384cf523fcc677056065041e7793bfc4da9cf74c4eebd6fab732739bd.
|
||||||
|
Reference in New Issue
Block a user