{ "cells": [ { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# News" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## Tagesschau API" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [], "source": [ "import json\n", "import requests\n", "from bs4 import BeautifulSoup\n", "\n", "\n", "class TagesschauAPI:\n", " def __init__(self):\n", " self.base_url = \"https://www.tagesschau.de/api2\"\n", "\n", " def get_news_for_sector(self, sector: str) -> dict:\n", " url = f\"{self.base_url}/news/\"\n", " regions = \",\".join([str(i) for i in range(1, 16)])\n", " result = requests.get(url=url, params={\"regions\": regions, \"ressort\": sector})\n", " return result.json()\n", "\n", " def custom_search(self, query: str) -> dict:\n", " url = f\"{self.base_url}/search/\"\n", " result = requests.get(url=url, params={\"searchText\": query})\n", " return result.json()\n", "\n", " def get_news_details_text(self, url: str) -> dict:\n", " content = requests.get(url)\n", " soup = BeautifulSoup(content.text, features=\"html.parser\")\n", "\n", " return \" \".join([elem.text for elem in soup.find_all(\"p\")])\n", "\n", "\n", "tagesschau = TagesschauAPI()\n", "\n", "data = tagesschau.get_news_for_sector(\"wirtschaft\")\n", "with open(\"./data/temp.json\", \"w+\", encoding=\"utf-8\") as file:\n", " json.dump(data, file, ensure_ascii=False)" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 51/51 [00:14<00:00, 3.64it/s]\n" ] }, { "data": { "text/html": [ "
\n", " | id | \n", "title | \n", "date | \n", "text | \n", "
---|---|---|---|---|
0 | \n", "836120ce-9602-4296-9b04-8eff4da34be5 | \n", "Befeuert Beyoncé die schwedische Inflation? | \n", "2023-06-15T16:31:26.141+02:00 | \n", "Stand: 15.06.2023 16:31 Uhr \\nDie Inflationsra... | \n", "
1 | \n", "5cc61bcb-d290-4114-b608-0d5aba426f27 | \n", "EZB-Zinsentscheid mit fadem Beigeschmack | \n", "2023-06-15T16:18:19.844+02:00 | \n", "Stand: 15.06.2023 16:18 Uhr \\nNach der Zinserh... | \n", "
2 | \n", "091386f3-78b5-4180-8d5b-7c5b371aed93 | \n", "Leitzins in der Eurozone steigt auf vier Prozent | \n", "2023-06-15T14:16:40.611+02:00 | \n", "Stand: 15.06.2023 14:16 Uhr \\nDie Europäische ... | \n", "
3 | \n", "add3a48e-d847-48eb-9e0d-2a1db3daf00c | \n", "Überschwemmte Äcker, bedrohte Ernten | \n", "2023-06-15T14:05:59.591+02:00 | \n", "Stand: 15.06.2023 14:05 Uhr \\nVom Kachowka-Sta... | \n", "
4 | \n", "54453278-1e7d-4bc3-b33f-ea84f2daa7ac | \n", "Forscher erwarten 2023 schrumpfende Wirtschaft | \n", "2023-06-15T12:52:14.804+02:00 | \n", "Stand: 15.06.2023 12:52 Uhr \\nMehrere Wirtscha... | \n", "