mirror of
https://github.com/fhswf/aki_prj23_transparenzregister.git
synced 2025-04-22 08:02:53 +02:00
494 lines
15 KiB
Plaintext
494 lines
15 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# FinBert\n",
|
|
"\n",
|
|
"## Sources\n",
|
|
"\n",
|
|
"[HugginFace](https://huggingface.co/ProsusAI/finbert)\n",
|
|
"[Tutorial](https://medium.com/codex/stocks-news-sentiment-analysis-with-deep-learning-transformers-and-machine-learning-cdcdb827fc06)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2023-04-30T21:54:44.056694Z",
|
|
"start_time": "2023-04-30T21:53:45.027971Z"
|
|
},
|
|
"collapsed": false,
|
|
"jupyter": {
|
|
"outputs_hidden": false
|
|
},
|
|
"tags": []
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"ERROR: To modify pip, please run the following command:\n",
|
|
"C:\\Users\\phhor\\PycharmProjects\\aki_prj23_transparenzregister\\venv\\Scripts\\python.exe -m pip install transformers tqdm pandas numpy torch torchvision torchaudio pip -Uq\n",
|
|
"\n",
|
|
"[notice] A new release of pip is available: 23.0.1 -> 23.1.2\n",
|
|
"[notice] To update, run: python.exe -m pip install --upgrade pip\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"!pip install transformers tqdm pandas numpy torch torchvision torchaudio pip -Uq"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 16,
|
|
"metadata": {
|
|
"collapsed": false,
|
|
"jupyter": {
|
|
"outputs_hidden": false
|
|
},
|
|
"tags": []
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"from transformers import AutoTokenizer, AutoModelForSequenceClassification\n",
|
|
"\n",
|
|
"# create a tokenizer object\n",
|
|
"tokenizer = AutoTokenizer.from_pretrained(\"ProsusAI/finbert\")\n",
|
|
"\n",
|
|
"# fetch the pretrained model\n",
|
|
"model = AutoModelForSequenceClassification.from_pretrained(\"ProsusAI/finbert\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"metadata": {
|
|
"collapsed": false,
|
|
"jupyter": {
|
|
"outputs_hidden": false
|
|
},
|
|
"tags": []
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"tensor([[0.0535, 0.0279, 0.9185]], grad_fn=<SoftmaxBackward0>)"
|
|
]
|
|
},
|
|
"execution_count": 12,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# A headline to be used as input\n",
|
|
"import torch\n",
|
|
"\n",
|
|
"headline = \"Microsoft fails to hit profit expectations\"\n",
|
|
"headline2 = (\n",
|
|
" \"Am Aktienmarkt überwieg weiter die Zuversicht, wie der Kursverlauf des DAX zeigt.\"\n",
|
|
")\n",
|
|
"\n",
|
|
"# Pre-process input phrase\n",
|
|
"input_tokens = tokenizer(headline2, padding=True, truncation=True, return_tensors=\"pt\")\n",
|
|
"# Run inference on the tokenized phrase\n",
|
|
"output = model(**input_tokens)\n",
|
|
"\n",
|
|
"# Pass model output logits through a softmax layer.\n",
|
|
"sentim_scores = torch.nn.functional.softmax(output.logits, dim=-1)\n",
|
|
"sentim_scores"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 56,
|
|
"metadata": {
|
|
"collapsed": false,
|
|
"jupyter": {
|
|
"outputs_hidden": false
|
|
}
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Microsoft fails to hit profit expectations\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"+ 0.034084\n",
|
|
"0 0.932933\n",
|
|
"- 0.032982\n",
|
|
"dtype: float32"
|
|
]
|
|
},
|
|
"execution_count": 56,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"def analyze_sentiment(text: str):\n",
|
|
" print(text)\n",
|
|
" input_tokens = tokenizer(text, padding=True, truncation=True, return_tensors=\"pt\")\n",
|
|
" output = model(**input_tokens)\n",
|
|
" return pd.Series(\n",
|
|
" torch.nn.functional.softmax(output.logits, dim=-1)[0].data,\n",
|
|
" index=[\"+\", \"0\", \"-\"],\n",
|
|
" )\n",
|
|
"\n",
|
|
"\n",
|
|
"tf = analyze_sentiment(headline)\n",
|
|
"tf"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 80,
|
|
"metadata": {
|
|
"tags": []
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>text</th>\n",
|
|
" <th>lan</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>Microsoft fails to hit profit expectations</td>\n",
|
|
" <td>en</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>Am Aktienmarkt überwieg weiter die Zuversicht,...</td>\n",
|
|
" <td>de</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>Stocks rallied and the British pound gained.</td>\n",
|
|
" <td>en</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>Meyer Burger bedient ab sofort australischen M...</td>\n",
|
|
" <td>de</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>Meyer Burger enters Australian market and exhi...</td>\n",
|
|
" <td>en</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5</th>\n",
|
|
" <td>J&T Express Vietnam hilft lokalen Handwerksdör...</td>\n",
|
|
" <td>en</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>6</th>\n",
|
|
" <td>7 Experten empfehlen die Aktie zum Kauf, 1 Exp...</td>\n",
|
|
" <td>de</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>7</th>\n",
|
|
" <td>Microsoft aktie fällt.</td>\n",
|
|
" <td>de</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>8</th>\n",
|
|
" <td>Microsoft aktie steigt.</td>\n",
|
|
" <td>de</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" text lan\n",
|
|
"0 Microsoft fails to hit profit expectations en\n",
|
|
"1 Am Aktienmarkt überwieg weiter die Zuversicht,... de\n",
|
|
"2 Stocks rallied and the British pound gained. en\n",
|
|
"3 Meyer Burger bedient ab sofort australischen M... de\n",
|
|
"4 Meyer Burger enters Australian market and exhi... en\n",
|
|
"5 J&T Express Vietnam hilft lokalen Handwerksdör... en\n",
|
|
"6 7 Experten empfehlen die Aktie zum Kauf, 1 Exp... de\n",
|
|
"7 Microsoft aktie fällt. de\n",
|
|
"8 Microsoft aktie steigt. de"
|
|
]
|
|
},
|
|
"execution_count": 80,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"text_df = pd.DataFrame(\n",
|
|
" [\n",
|
|
" {\"text\": \"Microsoft fails to hit profit expectations\", \"lan\": \"en\"},\n",
|
|
" {\n",
|
|
" \"text\": \"Am Aktienmarkt überwieg weiter die Zuversicht, wie der Kursverlauf des DAX zeigt.\",\n",
|
|
" \"lan\": \"de\",\n",
|
|
" },\n",
|
|
" {\"text\": \"Stocks rallied and the British pound gained.\", \"lan\": \"en\"},\n",
|
|
" {\n",
|
|
" \"text\": \"Meyer Burger bedient ab sofort australischen Markt und präsentiert sich auf Smart Energy Expo in Sydney.\",\n",
|
|
" \"lan\": \"de\",\n",
|
|
" },\n",
|
|
" {\n",
|
|
" \"text\": \"Meyer Burger enters Australian market and exhibits at Smart Energy Expo in Sydney.\",\n",
|
|
" \"lan\": \"en\",\n",
|
|
" },\n",
|
|
" {\n",
|
|
" \"text\": \"J&T Express Vietnam hilft lokalen Handwerksdörfern, ihre Reichweite zu vergrößern.\",\n",
|
|
" \"lan\": \"en\",\n",
|
|
" },\n",
|
|
" {\n",
|
|
" \"text\": \"7 Experten empfehlen die Aktie zum Kauf, 1 Experte empfiehlt, die Aktie zu halten.\",\n",
|
|
" \"lan\": \"de\",\n",
|
|
" },\n",
|
|
" {\"text\": \"Microsoft aktie fällt.\", \"lan\": \"de\"},\n",
|
|
" {\"text\": \"Microsoft aktie steigt.\", \"lan\": \"de\"},\n",
|
|
" ]\n",
|
|
")\n",
|
|
"text_df"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 81,
|
|
"metadata": {
|
|
"collapsed": false,
|
|
"jupyter": {
|
|
"outputs_hidden": false
|
|
}
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Microsoft fails to hit profit expectations\n",
|
|
"Am Aktienmarkt überwieg weiter die Zuversicht, wie der Kursverlauf des DAX zeigt.\n",
|
|
"Stocks rallied and the British pound gained.\n",
|
|
"Meyer Burger bedient ab sofort australischen Markt und präsentiert sich auf Smart Energy Expo in Sydney.\n",
|
|
"Meyer Burger enters Australian market and exhibits at Smart Energy Expo in Sydney.\n",
|
|
"J&T Express Vietnam hilft lokalen Handwerksdörfern, ihre Reichweite zu vergrößern.\n",
|
|
"7 Experten empfehlen die Aktie zum Kauf, 1 Experte empfiehlt, die Aktie zu halten.\n",
|
|
"Microsoft aktie fällt.\n",
|
|
"Microsoft aktie steigt.\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>text</th>\n",
|
|
" <th>lan</th>\n",
|
|
" <th>+</th>\n",
|
|
" <th>0</th>\n",
|
|
" <th>-</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>Microsoft fails to hit profit expectations</td>\n",
|
|
" <td>en</td>\n",
|
|
" <td>0.034084</td>\n",
|
|
" <td>0.932933</td>\n",
|
|
" <td>0.032982</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>Am Aktienmarkt überwieg weiter die Zuversicht,...</td>\n",
|
|
" <td>de</td>\n",
|
|
" <td>0.053528</td>\n",
|
|
" <td>0.027950</td>\n",
|
|
" <td>0.918522</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>Stocks rallied and the British pound gained.</td>\n",
|
|
" <td>en</td>\n",
|
|
" <td>0.898361</td>\n",
|
|
" <td>0.034474</td>\n",
|
|
" <td>0.067165</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>Meyer Burger bedient ab sofort australischen M...</td>\n",
|
|
" <td>de</td>\n",
|
|
" <td>0.116597</td>\n",
|
|
" <td>0.012790</td>\n",
|
|
" <td>0.870613</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>Meyer Burger enters Australian market and exhi...</td>\n",
|
|
" <td>en</td>\n",
|
|
" <td>0.187527</td>\n",
|
|
" <td>0.008846</td>\n",
|
|
" <td>0.803627</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5</th>\n",
|
|
" <td>J&T Express Vietnam hilft lokalen Handwerksdör...</td>\n",
|
|
" <td>en</td>\n",
|
|
" <td>0.066277</td>\n",
|
|
" <td>0.020608</td>\n",
|
|
" <td>0.913115</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>6</th>\n",
|
|
" <td>7 Experten empfehlen die Aktie zum Kauf, 1 Exp...</td>\n",
|
|
" <td>de</td>\n",
|
|
" <td>0.050346</td>\n",
|
|
" <td>0.022004</td>\n",
|
|
" <td>0.927650</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>7</th>\n",
|
|
" <td>Microsoft aktie fällt.</td>\n",
|
|
" <td>de</td>\n",
|
|
" <td>0.066061</td>\n",
|
|
" <td>0.016440</td>\n",
|
|
" <td>0.917498</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>8</th>\n",
|
|
" <td>Microsoft aktie steigt.</td>\n",
|
|
" <td>de</td>\n",
|
|
" <td>0.041449</td>\n",
|
|
" <td>0.018471</td>\n",
|
|
" <td>0.940080</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" text lan + 0 \n",
|
|
"0 Microsoft fails to hit profit expectations en 0.034084 0.932933 \\\n",
|
|
"1 Am Aktienmarkt überwieg weiter die Zuversicht,... de 0.053528 0.027950 \n",
|
|
"2 Stocks rallied and the British pound gained. en 0.898361 0.034474 \n",
|
|
"3 Meyer Burger bedient ab sofort australischen M... de 0.116597 0.012790 \n",
|
|
"4 Meyer Burger enters Australian market and exhi... en 0.187527 0.008846 \n",
|
|
"5 J&T Express Vietnam hilft lokalen Handwerksdör... en 0.066277 0.020608 \n",
|
|
"6 7 Experten empfehlen die Aktie zum Kauf, 1 Exp... de 0.050346 0.022004 \n",
|
|
"7 Microsoft aktie fällt. de 0.066061 0.016440 \n",
|
|
"8 Microsoft aktie steigt. de 0.041449 0.018471 \n",
|
|
"\n",
|
|
" - \n",
|
|
"0 0.032982 \n",
|
|
"1 0.918522 \n",
|
|
"2 0.067165 \n",
|
|
"3 0.870613 \n",
|
|
"4 0.803627 \n",
|
|
"5 0.913115 \n",
|
|
"6 0.927650 \n",
|
|
"7 0.917498 \n",
|
|
"8 0.940080 "
|
|
]
|
|
},
|
|
"execution_count": 81,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"def analyse_sentiments(texts: pd.Series) -> pd.DataFrame:\n",
|
|
" values = texts[\"text\"].apply(analyze_sentiment)\n",
|
|
" # print(values)\n",
|
|
" texts[[\"+\", \"0\", \"-\"]] = values\n",
|
|
" return texts\n",
|
|
"\n",
|
|
"\n",
|
|
"analyse_sentiments(text_df)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.11.0"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
}
|