refactor(data-extraction): Move date_to_iso function to string_tools

This commit is contained in:
TrisNol 2023-09-23 10:51:54 +02:00
parent 77f08cd901
commit 1e23a8d5a3
5 changed files with 33 additions and 31 deletions

View File

@ -5,7 +5,6 @@ import json
import os import os
import re import re
import sys import sys
from datetime import datetime
import xmltodict import xmltodict
from tqdm import tqdm from tqdm import tqdm
@ -26,6 +25,7 @@ from aki_prj23_transparenzregister.models.company import (
PersonToCompanyRelationship, PersonToCompanyRelationship,
RelationshipRoleEnum, RelationshipRoleEnum,
) )
from aki_prj23_transparenzregister.utils.string_tools import transform_date_to_iso
def transform_xml_to_json(source_dir: str, target_dir: str) -> None: def transform_xml_to_json(source_dir: str, target_dir: str) -> None:
@ -329,22 +329,6 @@ def map_business_purpose(data: dict) -> str | None:
return None return None
def transform_date_to_iso(date: str) -> str:
"""Transform a date in `DD.MM.YY(YY)` to `YYYY-MM-DD`.
Args:
date (str): Input date
Returns:
str: ISO date
"""
regex_yy = r"^\d{1,2}\.\d{1,2}\.\d{2}$"
input_format = "%d.%m.%y" if re.match(regex_yy, date) else "%d.%m.%Y"
date_temp = datetime.strptime(date, input_format)
return date_temp.strftime("%Y-%m-%d")
def map_founding_date(data: dict) -> str | None: def map_founding_date(data: dict) -> str | None:
"""Extracts the founding date from a given Unternehmensregister export. """Extracts the founding date from a given Unternehmensregister export.

View File

@ -1,4 +1,6 @@
"""Contains functions fot string manipulation.""" """Contains functions fot string manipulation."""
import re
from datetime import datetime
def simplify_string(string_to_simplify: str | None) -> str | None: def simplify_string(string_to_simplify: str | None) -> str | None:
@ -16,3 +18,19 @@ def simplify_string(string_to_simplify: str | None) -> str | None:
else: else:
raise TypeError("The string to simplify is not a string.") raise TypeError("The string to simplify is not a string.")
return string_to_simplify if string_to_simplify else None return string_to_simplify if string_to_simplify else None
def transform_date_to_iso(date: str) -> str:
"""Transform a date in `DD.MM.YY(YY)` to `YYYY-MM-DD`.
Args:
date (str): Input date
Returns:
str: ISO date
"""
regex_yy = r"^\d{1,2}\.\d{1,2}\.\d{2}$"
input_format = "%d.%m.%y" if re.match(regex_yy, date) else "%d.%m.%Y"
date_temp = datetime.strptime(date, input_format)
return date_temp.strftime("%Y-%m-%d")

View File

@ -1,3 +1,4 @@
"""Testing utisl/data_extraction/unternehmensregister/extract.py."""
import os import os
from tempfile import TemporaryDirectory from tempfile import TemporaryDirectory

View File

@ -1,3 +1,4 @@
"""Testing utils/data_extraction/unternehmensregister/transform.py."""
import json import json
import os import os
from tempfile import TemporaryDirectory from tempfile import TemporaryDirectory
@ -435,20 +436,6 @@ def test_map_business_purpose_no_result() -> None:
assert result is None assert result is None
def test_transform_date_to_iso() -> None:
date = "10.10.1111"
expected_result = "1111-10-10"
result = transform.transform_date_to_iso(date)
assert result == expected_result
def test_transform_date_to_iso_2_char_year() -> None:
date = "10.10.98"
expected_result = "1998-10-10"
result = transform.transform_date_to_iso(date)
assert result == expected_result
def test_map_founding_date_from_tag_der_ersten_eintragung() -> None: def test_map_founding_date_from_tag_der_ersten_eintragung() -> None:
data = { data = {
"some entry": "Tag der ersten Eintragung: 01.05.2004", "some entry": "Tag der ersten Eintragung: 01.05.2004",

View File

@ -33,3 +33,15 @@ def test_simplify_string_type_error(value: Any) -> None:
"""Tests if the type error is thrown when the value is the wrong type.""" """Tests if the type error is thrown when the value is the wrong type."""
with pytest.raises(TypeError): with pytest.raises(TypeError):
assert string_tools.simplify_string(value) assert string_tools.simplify_string(value)
@pytest.mark.parametrize(
("value", "expected"),
[
("10.10.1111", "1111-10-10"),
("10.10.98", "1998-10-10"),
],
)
def test_transform_date_to_iso(value: str, expected: str) -> None:
result = string_tools.transform_date_to_iso(value)
assert result == expected