dockerized mongodb as staging DB

This commit is contained in:
TrisNol 2023-06-15 20:24:39 +02:00
parent 3e737fbac5
commit d3d8adabad
2 changed files with 77 additions and 47 deletions

View File

@ -18,7 +18,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 59, "execution_count": 8,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -46,7 +46,9 @@
" content = requests.get(url)\n", " content = requests.get(url)\n",
" soup = BeautifulSoup(content.text, features=\"html.parser\")\n", " soup = BeautifulSoup(content.text, features=\"html.parser\")\n",
"\n", "\n",
" return \" \".join([elem.text for elem in soup.find_all(\"p\")])\n", " return \" \".join(\n",
" [elem.text.replace(\"\\n\", \" \") for elem in soup.find_all(\"p\")][1:]\n",
" )\n",
"\n", "\n",
"\n", "\n",
"tagesschau = TagesschauAPI()\n", "tagesschau = TagesschauAPI()\n",
@ -58,14 +60,14 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 54, "execution_count": 9,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stderr", "name": "stderr",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"100%|██████████| 51/51 [00:14<00:00, 3.64it/s]\n" "100%|██████████| 50/50 [00:15<00:00, 3.33it/s]\n"
] ]
}, },
{ {
@ -98,38 +100,38 @@
" <tbody>\n", " <tbody>\n",
" <tr>\n", " <tr>\n",
" <th>0</th>\n", " <th>0</th>\n",
" <td>836120ce-9602-4296-9b04-8eff4da34be5</td>\n", " <td>873f8c93-c996-4e08-a077-7f2b182197aa</td>\n",
" <td>Befeuert Beyoncé die schwedische Inflation?</td>\n", " <td>Netzagentur versteigert Flächen für Offshore-W...</td>\n",
" <td>2023-06-15T16:31:26.141+02:00</td>\n", " <td>2023-06-15T19:24:04.940+02:00</td>\n",
" <td>Stand: 15.06.2023 16:31 Uhr \\nDie Inflationsra...</td>\n", " <td>Mehrere Unternehmen bewerben sich um Flächen ...</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1</th>\n", " <th>1</th>\n",
" <td>5cc61bcb-d290-4114-b608-0d5aba426f27</td>\n", " <td>08a99fad-b0be-4481-ac87-b71a8eeeb95e</td>\n",
" <td>EZB-Zinsentscheid mit fadem Beigeschmack</td>\n", " <td>Die neue Asien-Strategie von Siemens</td>\n",
" <td>2023-06-15T16:18:19.844+02:00</td>\n", " <td>2023-06-15T19:23:48.142+02:00</td>\n",
" <td>Stand: 15.06.2023 16:18 Uhr \\nNach der Zinserh...</td>\n", " <td>Der Siemens-Konzern hat neue Investitionen in...</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>2</th>\n", " <th>2</th>\n",
" <td>091386f3-78b5-4180-8d5b-7c5b371aed93</td>\n", " <td>5cc61bcb-d290-4114-b608-0d5aba426f27</td>\n",
" <td>Leitzins in der Eurozone steigt auf vier Prozent</td>\n", " <td>DAX zeigt Stärke</td>\n",
" <td>2023-06-15T14:16:40.611+02:00</td>\n", " <td>2023-06-15T18:26:25.866+02:00</td>\n",
" <td>Stand: 15.06.2023 14:16 Uhr \\nDie Europäische ...</td>\n", " <td>Der DAX hat die heutige Zinserhöhung erstaunl...</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>3</th>\n", " <th>3</th>\n",
" <td>add3a48e-d847-48eb-9e0d-2a1db3daf00c</td>\n", " <td>836120ce-9602-4296-9b04-8eff4da34be5</td>\n",
" <td>Überschwemmte Äcker, bedrohte Ernten</td>\n", " <td>Befeuert Beyoncé die schwedische Inflation?</td>\n",
" <td>2023-06-15T14:05:59.591+02:00</td>\n", " <td>2023-06-15T16:31:26.141+02:00</td>\n",
" <td>Stand: 15.06.2023 14:05 Uhr \\nVom Kachowka-Sta...</td>\n", " <td>Die Inflationsrate in Schweden ist zuletzt ni...</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>4</th>\n", " <th>4</th>\n",
" <td>54453278-1e7d-4bc3-b33f-ea84f2daa7ac</td>\n", " <td>091386f3-78b5-4180-8d5b-7c5b371aed93</td>\n",
" <td>Forscher erwarten 2023 schrumpfende Wirtschaft</td>\n", " <td>Leitzins in der Eurozone steigt auf vier Prozent</td>\n",
" <td>2023-06-15T12:52:14.804+02:00</td>\n", " <td>2023-06-15T14:16:40.611+02:00</td>\n",
" <td>Stand: 15.06.2023 12:52 Uhr \\nMehrere Wirtscha...</td>\n", " <td>Die Europäische Zentralbank hat den Leitzins ...</td>\n",
" </tr>\n", " </tr>\n",
" </tbody>\n", " </tbody>\n",
"</table>\n", "</table>\n",
@ -137,35 +139,35 @@
], ],
"text/plain": [ "text/plain": [
" id \\\n", " id \\\n",
"0 836120ce-9602-4296-9b04-8eff4da34be5 \n", "0 873f8c93-c996-4e08-a077-7f2b182197aa \n",
"1 5cc61bcb-d290-4114-b608-0d5aba426f27 \n", "1 08a99fad-b0be-4481-ac87-b71a8eeeb95e \n",
"2 091386f3-78b5-4180-8d5b-7c5b371aed93 \n", "2 5cc61bcb-d290-4114-b608-0d5aba426f27 \n",
"3 add3a48e-d847-48eb-9e0d-2a1db3daf00c \n", "3 836120ce-9602-4296-9b04-8eff4da34be5 \n",
"4 54453278-1e7d-4bc3-b33f-ea84f2daa7ac \n", "4 091386f3-78b5-4180-8d5b-7c5b371aed93 \n",
"\n", "\n",
" title \\\n", " title \\\n",
"0 Befeuert Beyoncé die schwedische Inflation? \n", "0 Netzagentur versteigert Flächen für Offshore-W... \n",
"1 EZB-Zinsentscheid mit fadem Beigeschmack \n", "1 Die neue Asien-Strategie von Siemens \n",
"2 Leitzins in der Eurozone steigt auf vier Prozent \n", "2 DAX zeigt Stärke \n",
"3 Überschwemmte Äcker, bedrohte Ernten \n", "3 Befeuert Beyoncé die schwedische Inflation? \n",
"4 Forscher erwarten 2023 schrumpfende Wirtschaft \n", "4 Leitzins in der Eurozone steigt auf vier Prozent \n",
"\n", "\n",
" date \\\n", " date \\\n",
"0 2023-06-15T16:31:26.141+02:00 \n", "0 2023-06-15T19:24:04.940+02:00 \n",
"1 2023-06-15T16:18:19.844+02:00 \n", "1 2023-06-15T19:23:48.142+02:00 \n",
"2 2023-06-15T14:16:40.611+02:00 \n", "2 2023-06-15T18:26:25.866+02:00 \n",
"3 2023-06-15T14:05:59.591+02:00 \n", "3 2023-06-15T16:31:26.141+02:00 \n",
"4 2023-06-15T12:52:14.804+02:00 \n", "4 2023-06-15T14:16:40.611+02:00 \n",
"\n", "\n",
" text \n", " text \n",
"0 Stand: 15.06.2023 16:31 Uhr \\nDie Inflationsra... \n", "0 Mehrere Unternehmen bewerben sich um Flächen ... \n",
"1 Stand: 15.06.2023 16:18 Uhr \\nNach der Zinserh... \n", "1 Der Siemens-Konzern hat neue Investitionen in... \n",
"2 Stand: 15.06.2023 14:16 Uhr \\nDie Europäische ... \n", "2 Der DAX hat die heutige Zinserhöhung erstaunl... \n",
"3 Stand: 15.06.2023 14:05 Uhr \\nVom Kachowka-Sta... \n", "3 Die Inflationsrate in Schweden ist zuletzt ni... \n",
"4 Stand: 15.06.2023 12:52 Uhr \\nMehrere Wirtscha... " "4 Die Europäische Zentralbank hat den Leitzins ... "
] ]
}, },
"execution_count": 54, "execution_count": 9,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -191,7 +193,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 55, "execution_count": 10,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [

View File

@ -0,0 +1,28 @@
version: '3.8'
services:
mongodb:
image: mongo:6.0.6
container_name: mongodb
restart: unless-stopped
environment:
MONGO_INITDB_ROOT_USERNAME: root
MONGO_INITDB_ROOT_PASSWORD: pR0R0v2e2
MONGO_INITDB_DATABASE: transparenzregister
ports:
- 27017:27017
volumes:
- mongodb_data:/data/db
mongo-express:
image: mongo-express:1.0.0-alpha
container_name: mongo-express
restart: unless-stopped
ports:
- 8081:8081
environment:
ME_CONFIG_MONGODB_SERVER: mongodb
ME_CONFIG_MONGODB_ADMINUSERNAME: root
ME_CONFIG_MONGODB_ADMINPASSWORD: pR0R0v2e2
volumes:
mongodb_data: