{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# FinBert\n", "\n", "## Sources\n", "\n", "[HugginFace](https://huggingface.co/ProsusAI/finbert)\n", "[Tutorial](https://medium.com/codex/stocks-news-sentiment-analysis-with-deep-learning-transformers-and-machine-learning-cdcdb827fc06)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "ExecuteTime": { "end_time": "2023-04-30T21:54:44.056694Z", "start_time": "2023-04-30T21:53:45.027971Z" }, "collapsed": false, "jupyter": { "outputs_hidden": false }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "ERROR: To modify pip, please run the following command:\n", "C:\\Users\\phhor\\PycharmProjects\\aki_prj23_transparenzregister\\venv\\Scripts\\python.exe -m pip install transformers tqdm pandas numpy torch torchvision torchaudio pip -Uq\n", "\n", "[notice] A new release of pip is available: 23.0.1 -> 23.1.2\n", "[notice] To update, run: python.exe -m pip install --upgrade pip\n" ] } ], "source": [ "!pip install transformers tqdm pandas numpy torch torchvision torchaudio pip -Uq" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "tags": [] }, "outputs": [], "source": [ "import pandas as pd\n", "from transformers import AutoTokenizer, AutoModelForSequenceClassification\n", "\n", "# create a tokenizer object\n", "tokenizer = AutoTokenizer.from_pretrained(\"ProsusAI/finbert\")\n", "\n", "# fetch the pretrained model\n", "model = AutoModelForSequenceClassification.from_pretrained(\"ProsusAI/finbert\")" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "tensor([[0.0535, 0.0279, 0.9185]], grad_fn=)" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# A headline to be used as input\n", "import torch\n", "\n", "headline = \"Microsoft fails to hit profit expectations\"\n", "headline2 = (\n", " \"Am Aktienmarkt überwieg weiter die Zuversicht, wie der Kursverlauf des DAX zeigt.\"\n", ")\n", "\n", "# Pre-process input phrase\n", "input_tokens = tokenizer(headline2, padding=True, truncation=True, return_tensors=\"pt\")\n", "# Run inference on the tokenized phrase\n", "output = model(**input_tokens)\n", "\n", "# Pass model output logits through a softmax layer.\n", "sentim_scores = torch.nn.functional.softmax(output.logits, dim=-1)\n", "sentim_scores" ] }, { "cell_type": "code", "execution_count": 56, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Microsoft fails to hit profit expectations\n" ] }, { "data": { "text/plain": [ "+ 0.034084\n", "0 0.932933\n", "- 0.032982\n", "dtype: float32" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def analyze_sentiment(text: str):\n", " print(text)\n", " input_tokens = tokenizer(text, padding=True, truncation=True, return_tensors=\"pt\")\n", " output = model(**input_tokens)\n", " return pd.Series(\n", " torch.nn.functional.softmax(output.logits, dim=-1)[0].data,\n", " index=[\"+\", \"0\", \"-\"],\n", " )\n", "\n", "\n", "tf = analyze_sentiment(headline)\n", "tf" ] }, { "cell_type": "code", "execution_count": 80, "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
textlan
0Microsoft fails to hit profit expectationsen
1Am Aktienmarkt überwieg weiter die Zuversicht,...de
2Stocks rallied and the British pound gained.en
3Meyer Burger bedient ab sofort australischen M...de
4Meyer Burger enters Australian market and exhi...en
5J&T Express Vietnam hilft lokalen Handwerksdör...en
67 Experten empfehlen die Aktie zum Kauf, 1 Exp...de
7Microsoft aktie fällt.de
8Microsoft aktie steigt.de
\n", "
" ], "text/plain": [ " text lan\n", "0 Microsoft fails to hit profit expectations en\n", "1 Am Aktienmarkt überwieg weiter die Zuversicht,... de\n", "2 Stocks rallied and the British pound gained. en\n", "3 Meyer Burger bedient ab sofort australischen M... de\n", "4 Meyer Burger enters Australian market and exhi... en\n", "5 J&T Express Vietnam hilft lokalen Handwerksdör... en\n", "6 7 Experten empfehlen die Aktie zum Kauf, 1 Exp... de\n", "7 Microsoft aktie fällt. de\n", "8 Microsoft aktie steigt. de" ] }, "execution_count": 80, "metadata": {}, "output_type": "execute_result" } ], "source": [ "text_df = pd.DataFrame(\n", " [\n", " {\"text\": \"Microsoft fails to hit profit expectations\", \"lan\": \"en\"},\n", " {\n", " \"text\": \"Am Aktienmarkt überwieg weiter die Zuversicht, wie der Kursverlauf des DAX zeigt.\",\n", " \"lan\": \"de\",\n", " },\n", " {\"text\": \"Stocks rallied and the British pound gained.\", \"lan\": \"en\"},\n", " {\n", " \"text\": \"Meyer Burger bedient ab sofort australischen Markt und präsentiert sich auf Smart Energy Expo in Sydney.\",\n", " \"lan\": \"de\",\n", " },\n", " {\n", " \"text\": \"Meyer Burger enters Australian market and exhibits at Smart Energy Expo in Sydney.\",\n", " \"lan\": \"en\",\n", " },\n", " {\n", " \"text\": \"J&T Express Vietnam hilft lokalen Handwerksdörfern, ihre Reichweite zu vergrößern.\",\n", " \"lan\": \"en\",\n", " },\n", " {\n", " \"text\": \"7 Experten empfehlen die Aktie zum Kauf, 1 Experte empfiehlt, die Aktie zu halten.\",\n", " \"lan\": \"de\",\n", " },\n", " {\"text\": \"Microsoft aktie fällt.\", \"lan\": \"de\"},\n", " {\"text\": \"Microsoft aktie steigt.\", \"lan\": \"de\"},\n", " ]\n", ")\n", "text_df" ] }, { "cell_type": "code", "execution_count": 81, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Microsoft fails to hit profit expectations\n", "Am Aktienmarkt überwieg weiter die Zuversicht, wie der Kursverlauf des DAX zeigt.\n", "Stocks rallied and the British pound gained.\n", "Meyer Burger bedient ab sofort australischen Markt und präsentiert sich auf Smart Energy Expo in Sydney.\n", "Meyer Burger enters Australian market and exhibits at Smart Energy Expo in Sydney.\n", "J&T Express Vietnam hilft lokalen Handwerksdörfern, ihre Reichweite zu vergrößern.\n", "7 Experten empfehlen die Aktie zum Kauf, 1 Experte empfiehlt, die Aktie zu halten.\n", "Microsoft aktie fällt.\n", "Microsoft aktie steigt.\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
textlan+0-
0Microsoft fails to hit profit expectationsen0.0340840.9329330.032982
1Am Aktienmarkt überwieg weiter die Zuversicht,...de0.0535280.0279500.918522
2Stocks rallied and the British pound gained.en0.8983610.0344740.067165
3Meyer Burger bedient ab sofort australischen M...de0.1165970.0127900.870613
4Meyer Burger enters Australian market and exhi...en0.1875270.0088460.803627
5J&T Express Vietnam hilft lokalen Handwerksdör...en0.0662770.0206080.913115
67 Experten empfehlen die Aktie zum Kauf, 1 Exp...de0.0503460.0220040.927650
7Microsoft aktie fällt.de0.0660610.0164400.917498
8Microsoft aktie steigt.de0.0414490.0184710.940080
\n", "
" ], "text/plain": [ " text lan + 0 \n", "0 Microsoft fails to hit profit expectations en 0.034084 0.932933 \\\n", "1 Am Aktienmarkt überwieg weiter die Zuversicht,... de 0.053528 0.027950 \n", "2 Stocks rallied and the British pound gained. en 0.898361 0.034474 \n", "3 Meyer Burger bedient ab sofort australischen M... de 0.116597 0.012790 \n", "4 Meyer Burger enters Australian market and exhi... en 0.187527 0.008846 \n", "5 J&T Express Vietnam hilft lokalen Handwerksdör... en 0.066277 0.020608 \n", "6 7 Experten empfehlen die Aktie zum Kauf, 1 Exp... de 0.050346 0.022004 \n", "7 Microsoft aktie fällt. de 0.066061 0.016440 \n", "8 Microsoft aktie steigt. de 0.041449 0.018471 \n", "\n", " - \n", "0 0.032982 \n", "1 0.918522 \n", "2 0.067165 \n", "3 0.870613 \n", "4 0.803627 \n", "5 0.913115 \n", "6 0.927650 \n", "7 0.917498 \n", "8 0.940080 " ] }, "execution_count": 81, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def analyse_sentiments(texts: pd.Series) -> pd.DataFrame:\n", " values = texts[\"text\"].apply(analyze_sentiment)\n", " # print(values)\n", " texts[[\"+\", \"0\", \"-\"]] = values\n", " return texts\n", "\n", "\n", "analyse_sentiments(text_df)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.0" } }, "nbformat": 4, "nbformat_minor": 4 }