diff --git a/Jupyter/API-tests/Unternehmensregister/notebook.ipynb b/Jupyter/API-tests/Unternehmensregister/notebook.ipynb index e02e63c..f69136c 100644 --- a/Jupyter/API-tests/Unternehmensregister/notebook.ipynb +++ b/Jupyter/API-tests/Unternehmensregister/notebook.ipynb @@ -3963,7 +3963,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -4213,19 +4213,34 @@ " return None\n", "\n", "\n", + "from datetime import datetime\n", + "\n", + "\n", + "def transform_date_to_iso(date: str) -> str:\n", + " regex_yy = r\"^\\d{1,2}\\.\\d{1,2}\\.\\d{2}$\"\n", + "\n", + " if re.match(regex_yy, date):\n", + " input_format = \"%d.%m.%y\"\n", + " else:\n", + " input_format = \"%d.%m.%Y\"\n", + " date_temp = datetime.strptime(date, input_format)\n", + " return date_temp.strftime(\"%Y-%m-%d\")\n", + "\n", + "\n", + "# TODO transform date to iso format (YYYY-MM-DD)\n", "def map_founding_date(data: dict) -> str:\n", " text = str(data)\n", " entry_date = re.findall(\n", " r\".Tag der ersten Eintragung:(\\\\n| )?(\\d{1,2}\\.\\d{1,2}\\.\\d{2,4})\", text\n", " )\n", " if len(entry_date) == 1:\n", - " return entry_date[0][1]\n", + " return transform_date_to_iso(entry_date[0][1])\n", "\n", " entry_date = re.findall(\n", " r\".Gesellschaftsvertrag vom (\\d{1,2}\\.\\d{1,2}\\.\\d{2,4})\", text\n", " )\n", " if len(entry_date) == 1:\n", - " return entry_date[0]\n", + " return transform_date_to_iso(entry_date[0])\n", "\n", " if \"Eintragungstext\" in data[\"XJustiz_Daten\"][\"Fachdaten_Register\"][\"Auszug\"]:\n", " if (\n", @@ -4239,7 +4254,7 @@ " ][0][\"Text\"]\n", " results = re.findall(r\"\\d{1,2}\\.\\d{1,2}\\.\\d{2,4}\", temp)\n", " if len(temp) == 1:\n", - " return results[0]\n", + " return transform_date_to_iso(results[0])\n", " if (\n", " \"Gruendungsmetadaten\"\n", " in data[\"XJustiz_Daten\"][\"Fachdaten_Register\"][\"Basisdaten_Register\"]\n", @@ -4247,7 +4262,7 @@ " temp = data[\"XJustiz_Daten\"][\"Fachdaten_Register\"][\"Basisdaten_Register\"][\n", " \"Gruendungsmetadaten\"\n", " ][\"Gruendungsdatum\"]\n", - " return \".\".join(reversed(temp.split(\"-\")))\n", + " return temp\n", " # No reliable answer\n", " # raise ValueError()\n", " return None\n", @@ -4314,27 +4329,21 @@ "source": [ "import os\n", "import json\n", + "import glob\n", "import dataclasses\n", "from tqdm import tqdm" ] }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - " 0%| | 0/3381 [00:00