From eba5235dff56880efddafa34fabe27f2539ed22f Mon Sep 17 00:00:00 2001 From: TrisNol Date: Sun, 15 Oct 2023 12:05:25 +0200 Subject: [PATCH] refactor: Implement PR feedback --- .../unternehmensregister/transform.py | 16 +++++++++------- .../unternehmensregister/transform_test.py | 4 ++++ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py index 926fe42..468fdee 100644 --- a/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py +++ b/src/aki_prj23_transparenzregister/utils/data_extraction/unternehmensregister/transform.py @@ -377,19 +377,21 @@ def extract_date_from_string(value: str) -> str | None: str | None: Date in ISO format, None if not found """ date_regex = [ # type: ignore - {"regex": r"\d{1,2}\.\d{1,2}\.(19|20)?\d{2}", "mapper": transform_date_to_iso}, - - {"regex": r"(20|19)\d{2}-\d{1,2}-\d{1,2}", "mapper": None}, - + {"regex": r"\d{1,2}\.\d{1,2}\.\d{4}", "mapper": transform_date_to_iso}, + {"regex": r"\d{4}-\d{1,2}-\d{1,2}", "mapper": None}, ] + results = [] for regex in date_regex: result = re.findall(regex["regex"], value) # type: ignore if len(result) == 1: relevant_data = result[0] if regex["mapper"] is not None: # type: ignore - return regex["mapper"](relevant_data) # type: ignore - return relevant_data - return None + results.append(regex["mapper"](relevant_data)) # type: ignore + else: + results.append(relevant_data) + if len(results) != 1: + return None + return results[0] def map_founding_date(data: dict) -> str | None: diff --git a/tests/utils/data_extraction/unternehmensregister/transform_test.py b/tests/utils/data_extraction/unternehmensregister/transform_test.py index 17da71e..a312572 100644 --- a/tests/utils/data_extraction/unternehmensregister/transform_test.py +++ b/tests/utils/data_extraction/unternehmensregister/transform_test.py @@ -612,6 +612,10 @@ def test_map_business_purpose_no_result() -> None: ("Gesellschaftsvertrag vom 06.04.2016 Hallo Welt", "2016-04-06"), ("Str. des Tests vom 1999-04-05", "1999-04-05"), ("Once upon a midnight dreary while I pondered weak and weary...", None), + ( + "This company was first founded in 2016-06-10 and then again on 1.5.2004", + None, + ), ], ) def test_extract_date_from_string(value: str, expected_result: str) -> None: