fix(data-extraction): Resolve regex issue in detecting auditors

This commit is contained in:
TrisNol 2023-10-17 17:56:26 +02:00
parent c680ac9759
commit 4058824f15

View File

@ -102,12 +102,9 @@ class Bundesanzeiger:
list[Auditor]: List of Auditors found in the given report
"""
auditor_company = self.extract_auditor_company(report)
auditor_regex = r"[a-z A-Z,.'-]+, Wirtschaftsprüfer"
auditor_regex = r"([a-z A-ZÄäÜüÖö,.'-]+), Wirtschaftsprüfer(in)?"
hits = re.findall(auditor_regex, report)
return [
Auditor(hit.replace(", Wirtschaftsprüfer", "").lstrip(), auditor_company)
for hit in hits
]
return [Auditor(hit[0].strip(), auditor_company) for hit in hits]
def __extract_kpis__(self, report: str) -> dict:
"""Source: https://github.com/bundesAPI/deutschland/pull/87/files#diff-f5b9db5384cf523fcc677056065041e7793bfc4da9cf74c4eebd6fab732739bd.