mirror of
https://github.com/fhswf/aki_prj23_transparenzregister.git
synced 2025-04-24 21:02:33 +02:00
fix(data-extraction): Resolve regex issue in detecting auditors
This commit is contained in:
parent
c680ac9759
commit
4058824f15
@ -102,12 +102,9 @@ class Bundesanzeiger:
|
||||
list[Auditor]: List of Auditors found in the given report
|
||||
"""
|
||||
auditor_company = self.extract_auditor_company(report)
|
||||
auditor_regex = r"[a-z A-Z,.'-]+, Wirtschaftsprüfer"
|
||||
auditor_regex = r"([a-z A-ZÄäÜüÖö,.'-]+), Wirtschaftsprüfer(in)?"
|
||||
hits = re.findall(auditor_regex, report)
|
||||
return [
|
||||
Auditor(hit.replace(", Wirtschaftsprüfer", "").lstrip(), auditor_company)
|
||||
for hit in hits
|
||||
]
|
||||
return [Auditor(hit[0].strip(), auditor_company) for hit in hits]
|
||||
|
||||
def __extract_kpis__(self, report: str) -> dict:
|
||||
"""Source: https://github.com/bundesAPI/deutschland/pull/87/files#diff-f5b9db5384cf523fcc677056065041e7793bfc4da9cf74c4eebd6fab732739bd.
|
||||
|
Loading…
x
Reference in New Issue
Block a user