{ "cells": [ { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# News" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## Tagesschau API" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "import json\n", "import requests\n", "from bs4 import BeautifulSoup\n", "\n", "\n", "class TagesschauAPI:\n", " def __init__(self):\n", " self.base_url = \"https://www.tagesschau.de/api2\"\n", "\n", " def get_news_for_sector(self, sector: str) -> dict:\n", " url = f\"{self.base_url}/news/\"\n", " regions = \",\".join([str(i) for i in range(1, 16)])\n", " result = requests.get(url=url, params={\"regions\": regions, \"ressort\": sector})\n", " return result.json()\n", "\n", " def custom_search(self, query: str) -> dict:\n", " url = f\"{self.base_url}/search/\"\n", " result = requests.get(url=url, params={\"searchText\": query})\n", " return result.json()\n", "\n", " def get_news_details_text(self, url: str) -> dict:\n", " content = requests.get(url)\n", " soup = BeautifulSoup(content.text, features=\"html.parser\")\n", "\n", " return \" \".join(\n", " [elem.text.replace(\"\\n\", \" \") for elem in soup.find_all(\"p\")][1:]\n", " )\n", "\n", "\n", "tagesschau = TagesschauAPI()\n", "\n", "data = tagesschau.get_news_for_sector(\"wirtschaft\")\n", "with open(\"./data/temp.json\", \"w+\", encoding=\"utf-8\") as file:\n", " json.dump(data, file, ensure_ascii=False)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 50/50 [00:15<00:00, 3.33it/s]\n" ] }, { "data": { "text/html": [ "
\n", " | id | \n", "title | \n", "date | \n", "text | \n", "
---|---|---|---|---|
0 | \n", "873f8c93-c996-4e08-a077-7f2b182197aa | \n", "Netzagentur versteigert Flächen für Offshore-W... | \n", "2023-06-15T19:24:04.940+02:00 | \n", "Mehrere Unternehmen bewerben sich um Flächen ... | \n", "
1 | \n", "08a99fad-b0be-4481-ac87-b71a8eeeb95e | \n", "Die neue Asien-Strategie von Siemens | \n", "2023-06-15T19:23:48.142+02:00 | \n", "Der Siemens-Konzern hat neue Investitionen in... | \n", "
2 | \n", "5cc61bcb-d290-4114-b608-0d5aba426f27 | \n", "DAX zeigt Stärke | \n", "2023-06-15T18:26:25.866+02:00 | \n", "Der DAX hat die heutige Zinserhöhung erstaunl... | \n", "
3 | \n", "836120ce-9602-4296-9b04-8eff4da34be5 | \n", "Befeuert Beyoncé die schwedische Inflation? | \n", "2023-06-15T16:31:26.141+02:00 | \n", "Die Inflationsrate in Schweden ist zuletzt ni... | \n", "
4 | \n", "091386f3-78b5-4180-8d5b-7c5b371aed93 | \n", "Leitzins in der Eurozone steigt auf vier Prozent | \n", "2023-06-15T14:16:40.611+02:00 | \n", "Die Europäische Zentralbank hat den Leitzins ... | \n", "