{ "cells": [ { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# News" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## Tagesschau API" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import json\n", "import requests\n", "from bs4 import BeautifulSoup\n", "\n", "\n", "class TagesschauAPI:\n", " def __init__(self):\n", " self.base_url = \"https://www.tagesschau.de/api2\"\n", "\n", " def get_news_for_sector(self, sector: str) -> dict:\n", " url = f\"{self.base_url}/news/\"\n", " regions = \",\".join([str(i) for i in range(1, 16)])\n", " result = requests.get(url=url, params={\"regions\": regions, \"ressort\": sector})\n", " return result.json()\n", "\n", " def custom_search(self, query: str) -> dict:\n", " url = f\"{self.base_url}/search/\"\n", " result = requests.get(url=url, params={\"searchText\": query})\n", " return result.json()\n", "\n", " def get_news_details_text(self, url: str) -> dict:\n", " content = requests.get(url)\n", " soup = BeautifulSoup(content.text, features=\"html.parser\")\n", "\n", " return \" \".join(\n", " [elem.text.replace(\"\\n\", \" \") for elem in soup.find_all(\"p\")][1:]\n", " )\n", "\n", "\n", "tagesschau = TagesschauAPI()\n", "\n", "data = tagesschau.get_news_for_sector(\"wirtschaft\")\n", "with open(\"./data/temp.json\", \"w+\", encoding=\"utf-8\") as file:\n", " json.dump(data, file, ensure_ascii=False)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 54/54 [00:12<00:00, 4.49it/s]\n" ] }, { "data": { "text/html": [ "
\n", " | id | \n", "title | \n", "date | \n", "source_url | \n", "text | \n", "
---|---|---|---|---|---|
0 | \n", "f6f64332-a721-43b2-a6e1-0d64be343712 | \n", "Bezahlen mit der Karte - bald ohne Maestro | \n", "2023-06-17T07:14:11.918+02:00 | \n", "https://www.tagesschau.de/wirtschaft/verbrauch... | \n", "Das Maestro-Logo gehörte bislang normalerweis... | \n", "
1 | \n", "20869ff8-54ac-436d-964d-8d45032467a9 | \n", "Durchschnaufen an der Wall Street | \n", "2023-06-16T22:22:05.873+02:00 | \n", "https://www.tagesschau.de/wirtschaft/finanzen/... | \n", "Nach einer ereignisreichen Woche haben die US... | \n", "
2 | \n", "467c0c06-6332-4ab6-8678-666c35388850 | \n", "EVG und Deutsche Bahn vertagen sich auf kommen... | \n", "2023-06-16T21:53:30.404+02:00 | \n", "https://www.tagesschau.de/wirtschaft/unternehm... | \n", "Die Tarifverhandlungen zwischen Deutscher Bah... | \n", "
3 | \n", "83e6a13f-2731-41e8-8298-212263e59887 | \n", "Wie der Euro von steigenden Zinsen profitiert | \n", "2023-06-16T14:57:03.999+02:00 | \n", "https://www.tagesschau.de/wirtschaft/finanzen/... | \n", "Nach der jüngsten Zinserhöhung der EZB ist de... | \n", "
4 | \n", "a59fb7be-04d1-4c5b-ba8b-b2cafbd6aba7 | \n", "Intel will Chipfabrik im polnischen Breslau bauen | \n", "2023-06-16T14:00:01.950+02:00 | \n", "https://www.tagesschau.de/wirtschaft/unternehm... | \n", "Der US-Konzern Intel plant eine neue Chipfabr... | \n", "