Merge main into feature/data-extraction

This commit is contained in:
TrisNol
2023-07-10 17:15:43 +02:00
106 changed files with 27351 additions and 409 deletions

View File

@ -2,7 +2,11 @@
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"# FinBert\n",
"\n",
@ -19,6 +23,11 @@
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"## Libraries\n",
"\n",
@ -31,23 +40,22 @@
"* torchaudio\n",
"* sentencepiece\n",
"* sacremoses"
],
"metadata": {
"collapsed": false
}
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"ExecuteTime": {
"start_time": "2023-05-01T13:16:08.554998Z",
"end_time": "2023-05-01T13:16:13.740927Z"
"end_time": "2023-05-01T13:16:13.740927Z",
"start_time": "2023-05-01T13:16:08.554998Z"
},
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"slideshow": {
"slide_type": "skip"
},
"tags": []
},
"outputs": [
@ -108,26 +116,30 @@
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"### Importing and creation of models and tokenizer"
],
"metadata": {
"collapsed": false
}
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-05-01T13:16:15.121662Z",
"start_time": "2023-05-01T13:16:13.743921Z"
},
"jupyter": {
"outputs_hidden": false
},
"tags": [],
"ExecuteTime": {
"start_time": "2023-05-01T13:16:13.743921Z",
"end_time": "2023-05-01T13:16:15.121662Z"
}
"slideshow": {
"slide_type": "subslide"
},
"tags": []
},
"outputs": [],
"source": [
@ -145,30 +157,39 @@
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"### Analyze a single sentiment"
],
"metadata": {
"collapsed": false
}
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-05-01T13:16:15.194193Z",
"start_time": "2023-05-01T13:16:15.122665Z"
},
"jupyter": {
"outputs_hidden": false
},
"ExecuteTime": {
"start_time": "2023-05-01T13:16:15.122665Z",
"end_time": "2023-05-01T13:16:15.194193Z"
"slideshow": {
"slide_type": "-"
}
},
"outputs": [
{
"data": {
"text/plain": "+ 0.034084\n0 0.932933\n- 0.032982\ndtype: float32"
"text/plain": [
"+ 0.034084\n",
"0 0.932933\n",
"- 0.032982\n",
"dtype: float32"
]
},
"execution_count": 27,
"metadata": {},
@ -192,34 +213,29 @@
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"### Creating test data"
],
"metadata": {
"collapsed": false
}
]
},
{
"cell_type": "code",
"execution_count": 28,
"execution_count": null,
"metadata": {
"tags": [],
"ExecuteTime": {
"start_time": "2023-05-01T13:16:15.198186Z",
"end_time": "2023-05-01T13:16:15.208856Z"
}
"end_time": "2023-05-01T13:16:15.208856Z",
"start_time": "2023-05-01T13:16:15.198186Z"
},
"slideshow": {
"slide_type": "skip"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": " text lan\n0 Microsoft fails to hit profit expectations en\n1 Am Aktienmarkt überwieg weiter die Zuversicht,... de\n2 Stocks rallied and the British pound gained. en\n3 Meyer Burger bedient ab sofort australischen M... de\n4 Meyer Burger enters Australian market and exhi... en\n5 J&T Express Vietnam hilft lokalen Handwerksdör... de\n6 7 Experten empfehlen die Aktie zum Kauf, 1 Exp... de\n7 Microsoft aktie fällt. de\n8 Microsoft aktie steigt. de",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>text</th>\n <th>lan</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Microsoft fails to hit profit expectations</td>\n <td>en</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Am Aktienmarkt überwieg weiter die Zuversicht,...</td>\n <td>de</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Stocks rallied and the British pound gained.</td>\n <td>en</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Meyer Burger bedient ab sofort australischen M...</td>\n <td>de</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Meyer Burger enters Australian market and exhi...</td>\n <td>en</td>\n </tr>\n <tr>\n <th>5</th>\n <td>J&amp;T Express Vietnam hilft lokalen Handwerksdör...</td>\n <td>de</td>\n </tr>\n <tr>\n <th>6</th>\n <td>7 Experten empfehlen die Aktie zum Kauf, 1 Exp...</td>\n <td>de</td>\n </tr>\n <tr>\n <th>7</th>\n <td>Microsoft aktie fällt.</td>\n <td>de</td>\n </tr>\n <tr>\n <th>8</th>\n <td>Microsoft aktie steigt.</td>\n <td>de</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"text_df = pd.DataFrame(\n",
" [\n",
@ -248,44 +264,270 @@
" {\"text\": \"Microsoft aktie fällt.\", \"lan\": \"de\"},\n",
" {\"text\": \"Microsoft aktie steigt.\", \"lan\": \"de\"},\n",
" ]\n",
")\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"ExecuteTime": {
"end_time": "2023-05-01T13:16:15.208856Z",
"start_time": "2023-05-01T13:16:15.198186Z"
},
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>text</th>\n",
" <th>lan</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Microsoft fails to hit profit expectations</td>\n",
" <td>en</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Am Aktienmarkt überwieg weiter die Zuversicht,...</td>\n",
" <td>de</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Stocks rallied and the British pound gained.</td>\n",
" <td>en</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Meyer Burger bedient ab sofort australischen M...</td>\n",
" <td>de</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Meyer Burger enters Australian market and exhi...</td>\n",
" <td>en</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>J&amp;T Express Vietnam hilft lokalen Handwerksdör...</td>\n",
" <td>de</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>7 Experten empfehlen die Aktie zum Kauf, 1 Exp...</td>\n",
" <td>de</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Microsoft aktie fällt.</td>\n",
" <td>de</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Microsoft aktie steigt.</td>\n",
" <td>de</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" text lan\n",
"0 Microsoft fails to hit profit expectations en\n",
"1 Am Aktienmarkt überwieg weiter die Zuversicht,... de\n",
"2 Stocks rallied and the British pound gained. en\n",
"3 Meyer Burger bedient ab sofort australischen M... de\n",
"4 Meyer Burger enters Australian market and exhi... en\n",
"5 J&T Express Vietnam hilft lokalen Handwerksdör... de\n",
"6 7 Experten empfehlen die Aktie zum Kauf, 1 Exp... de\n",
"7 Microsoft aktie fällt. de\n",
"8 Microsoft aktie steigt. de"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"text_df"
]
},
{
"cell_type": "markdown",
"source": [],
"metadata": {
"collapsed": false
}
"metadata": {},
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Analyze multiple Sentiments"
],
"metadata": {
"collapsed": false
}
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-05-01T13:16:16.132009Z",
"start_time": "2023-05-01T13:16:15.211858Z"
},
"jupyter": {
"outputs_hidden": false
},
"ExecuteTime": {
"start_time": "2023-05-01T13:16:15.211858Z",
"end_time": "2023-05-01T13:16:16.132009Z"
}
},
"outputs": [
{
"data": {
"text/plain": " text lan + 0 \n0 Microsoft fails to hit profit expectations en 0.034084 0.932933 \\\n1 Am Aktienmarkt überwieg weiter die Zuversicht,... de 0.053528 0.027950 \n2 Stocks rallied and the British pound gained. en 0.898361 0.034474 \n3 Meyer Burger bedient ab sofort australischen M... de 0.116597 0.012790 \n4 Meyer Burger enters Australian market and exhi... en 0.187527 0.008846 \n5 J&T Express Vietnam hilft lokalen Handwerksdör... de 0.066277 0.020608 \n6 7 Experten empfehlen die Aktie zum Kauf, 1 Exp... de 0.050346 0.022004 \n7 Microsoft aktie fällt. de 0.066061 0.016440 \n8 Microsoft aktie steigt. de 0.041449 0.018471 \n\n - \n0 0.032982 \n1 0.918522 \n2 0.067165 \n3 0.870613 \n4 0.803627 \n5 0.913115 \n6 0.927650 \n7 0.917498 \n8 0.940080 ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>text</th>\n <th>lan</th>\n <th>+</th>\n <th>0</th>\n <th>-</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Microsoft fails to hit profit expectations</td>\n <td>en</td>\n <td>0.034084</td>\n <td>0.932933</td>\n <td>0.032982</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Am Aktienmarkt überwieg weiter die Zuversicht,...</td>\n <td>de</td>\n <td>0.053528</td>\n <td>0.027950</td>\n <td>0.918522</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Stocks rallied and the British pound gained.</td>\n <td>en</td>\n <td>0.898361</td>\n <td>0.034474</td>\n <td>0.067165</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Meyer Burger bedient ab sofort australischen M...</td>\n <td>de</td>\n <td>0.116597</td>\n <td>0.012790</td>\n <td>0.870613</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Meyer Burger enters Australian market and exhi...</td>\n <td>en</td>\n <td>0.187527</td>\n <td>0.008846</td>\n <td>0.803627</td>\n </tr>\n <tr>\n <th>5</th>\n <td>J&amp;T Express Vietnam hilft lokalen Handwerksdör...</td>\n <td>de</td>\n <td>0.066277</td>\n <td>0.020608</td>\n <td>0.913115</td>\n </tr>\n <tr>\n <th>6</th>\n <td>7 Experten empfehlen die Aktie zum Kauf, 1 Exp...</td>\n <td>de</td>\n <td>0.050346</td>\n <td>0.022004</td>\n <td>0.927650</td>\n </tr>\n <tr>\n <th>7</th>\n <td>Microsoft aktie fällt.</td>\n <td>de</td>\n <td>0.066061</td>\n <td>0.016440</td>\n <td>0.917498</td>\n </tr>\n <tr>\n <th>8</th>\n <td>Microsoft aktie steigt.</td>\n <td>de</td>\n <td>0.041449</td>\n <td>0.018471</td>\n <td>0.940080</td>\n </tr>\n </tbody>\n</table>\n</div>"
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>text</th>\n",
" <th>lan</th>\n",
" <th>+</th>\n",
" <th>0</th>\n",
" <th>-</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Microsoft fails to hit profit expectations</td>\n",
" <td>en</td>\n",
" <td>0.034084</td>\n",
" <td>0.932933</td>\n",
" <td>0.032982</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Am Aktienmarkt überwieg weiter die Zuversicht,...</td>\n",
" <td>de</td>\n",
" <td>0.053528</td>\n",
" <td>0.027950</td>\n",
" <td>0.918522</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Stocks rallied and the British pound gained.</td>\n",
" <td>en</td>\n",
" <td>0.898361</td>\n",
" <td>0.034474</td>\n",
" <td>0.067165</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Meyer Burger bedient ab sofort australischen M...</td>\n",
" <td>de</td>\n",
" <td>0.116597</td>\n",
" <td>0.012790</td>\n",
" <td>0.870613</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Meyer Burger enters Australian market and exhi...</td>\n",
" <td>en</td>\n",
" <td>0.187527</td>\n",
" <td>0.008846</td>\n",
" <td>0.803627</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>J&amp;T Express Vietnam hilft lokalen Handwerksdör...</td>\n",
" <td>de</td>\n",
" <td>0.066277</td>\n",
" <td>0.020608</td>\n",
" <td>0.913115</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>7 Experten empfehlen die Aktie zum Kauf, 1 Exp...</td>\n",
" <td>de</td>\n",
" <td>0.050346</td>\n",
" <td>0.022004</td>\n",
" <td>0.927650</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Microsoft aktie fällt.</td>\n",
" <td>de</td>\n",
" <td>0.066061</td>\n",
" <td>0.016440</td>\n",
" <td>0.917498</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Microsoft aktie steigt.</td>\n",
" <td>de</td>\n",
" <td>0.041449</td>\n",
" <td>0.018471</td>\n",
" <td>0.940080</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" text lan + 0 \n",
"0 Microsoft fails to hit profit expectations en 0.034084 0.932933 \\\n",
"1 Am Aktienmarkt überwieg weiter die Zuversicht,... de 0.053528 0.027950 \n",
"2 Stocks rallied and the British pound gained. en 0.898361 0.034474 \n",
"3 Meyer Burger bedient ab sofort australischen M... de 0.116597 0.012790 \n",
"4 Meyer Burger enters Australian market and exhi... en 0.187527 0.008846 \n",
"5 J&T Express Vietnam hilft lokalen Handwerksdör... de 0.066277 0.020608 \n",
"6 7 Experten empfehlen die Aktie zum Kauf, 1 Exp... de 0.050346 0.022004 \n",
"7 Microsoft aktie fällt. de 0.066061 0.016440 \n",
"8 Microsoft aktie steigt. de 0.041449 0.018471 \n",
"\n",
" - \n",
"0 0.032982 \n",
"1 0.918522 \n",
"2 0.067165 \n",
"3 0.870613 \n",
"4 0.803627 \n",
"5 0.913115 \n",
"6 0.927650 \n",
"7 0.917498 \n",
"8 0.940080 "
]
},
"execution_count": 29,
"metadata": {},
@ -304,19 +546,18 @@
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Conclusion about FinBert\n",
"\n",
"The current form of this model can't be used for the german language.\n",
"It could be used if the text is translated beforehand. But it is questionable if that will work well.\n",
"Another way would be to retrain the same model with translated text from this models' data. But I do not believe this to be feasible."
],
"metadata": {
"collapsed": false
}
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Translating sentiments before analysing them with FinBert\n",
"\n",
@ -326,14 +567,17 @@
"[Translator: Helsinki-NLP/opus-mt-de-en](https://huggingface.co/Helsinki-NLP/opus-mt-de-en)\n",
"https://huggingface.co/docs/transformers/main/en/model_doc/marian#transformers.MarianMTModel\n",
"\n"
],
"metadata": {
"collapsed": false
}
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"ExecuteTime": {
"end_time": "2023-05-01T13:16:19.308043Z",
"start_time": "2023-05-01T13:16:16.135009Z"
}
},
"outputs": [],
"source": [
"from transformers import AutoTokenizer, AutoModelForSeq2SeqLM\n",
@ -341,18 +585,17 @@
"translation_tokenizer = AutoTokenizer.from_pretrained(\"Helsinki-NLP/opus-mt-de-en\")\n",
"\n",
"translation_model = AutoModelForSeq2SeqLM.from_pretrained(\"Helsinki-NLP/opus-mt-de-en\")"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2023-05-01T13:16:16.135009Z",
"end_time": "2023-05-01T13:16:19.308043Z"
}
}
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"ExecuteTime": {
"end_time": "2023-05-01T13:16:19.928232Z",
"start_time": "2023-05-01T13:16:19.310046Z"
}
},
"outputs": [
{
"name": "stderr",
@ -364,7 +607,9 @@
},
{
"data": {
"text/plain": "'J&T Express Vietnam helps local craft villages increase their reach.'"
"text/plain": [
"'J&T Express Vietnam helps local craft villages increase their reach.'"
]
},
"execution_count": 31,
"metadata": {},
@ -385,18 +630,17 @@
")\n",
"tf = translate_sentiment(headline)\n",
"tf"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2023-05-01T13:16:19.310046Z",
"end_time": "2023-05-01T13:16:19.928232Z"
}
}
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"ExecuteTime": {
"end_time": "2023-05-01T13:16:23.381261Z",
"start_time": "2023-05-01T13:16:19.933234Z"
}
},
"outputs": [
{
"name": "stdout",
@ -412,8 +656,112 @@
},
{
"data": {
"text/plain": " lan orig \n0 en NaN \\\n1 de_translated Am Aktienmarkt überwieg weiter die Zuversicht,... \n2 en NaN \n3 de_translated Meyer Burger bedient ab sofort australischen M... \n4 en NaN \n5 de_translated J&T Express Vietnam hilft lokalen Handwerksdör... \n6 de_translated 7 Experten empfehlen die Aktie zum Kauf, 1 Exp... \n7 de_translated Microsoft aktie fällt. \n8 de_translated Microsoft aktie steigt. \n\n text \n0 Microsoft fails to hit profit expectations \n1 On the stock market, confidence continued to p... \n2 Stocks rallied and the British pound gained. \n3 Meyer Burger is now serving the Australian mar... \n4 Meyer Burger enters Australian market and exhi... \n5 J&T Express Vietnam helps local craft villages... \n6 7 experts recommend the stock for purchase, 1 ... \n7 Microsoft Aktie falls. \n8 Microsoft share is rising. ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>lan</th>\n <th>orig</th>\n <th>text</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>en</td>\n <td>NaN</td>\n <td>Microsoft fails to hit profit expectations</td>\n </tr>\n <tr>\n <th>1</th>\n <td>de_translated</td>\n <td>Am Aktienmarkt überwieg weiter die Zuversicht,...</td>\n <td>On the stock market, confidence continued to p...</td>\n </tr>\n <tr>\n <th>2</th>\n <td>en</td>\n <td>NaN</td>\n <td>Stocks rallied and the British pound gained.</td>\n </tr>\n <tr>\n <th>3</th>\n <td>de_translated</td>\n <td>Meyer Burger bedient ab sofort australischen M...</td>\n <td>Meyer Burger is now serving the Australian mar...</td>\n </tr>\n <tr>\n <th>4</th>\n <td>en</td>\n <td>NaN</td>\n <td>Meyer Burger enters Australian market and exhi...</td>\n </tr>\n <tr>\n <th>5</th>\n <td>de_translated</td>\n <td>J&amp;T Express Vietnam hilft lokalen Handwerksdör...</td>\n <td>J&amp;T Express Vietnam helps local craft villages...</td>\n </tr>\n <tr>\n <th>6</th>\n <td>de_translated</td>\n <td>7 Experten empfehlen die Aktie zum Kauf, 1 Exp...</td>\n <td>7 experts recommend the stock for purchase, 1 ...</td>\n </tr>\n <tr>\n <th>7</th>\n <td>de_translated</td>\n <td>Microsoft aktie fällt.</td>\n <td>Microsoft Aktie falls.</td>\n </tr>\n <tr>\n <th>8</th>\n <td>de_translated</td>\n <td>Microsoft aktie steigt.</td>\n <td>Microsoft share is rising.</td>\n </tr>\n </tbody>\n</table>\n</div>"
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>lan</th>\n",
" <th>orig</th>\n",
" <th>text</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>en</td>\n",
" <td>NaN</td>\n",
" <td>Microsoft fails to hit profit expectations</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>de_translated</td>\n",
" <td>Am Aktienmarkt überwieg weiter die Zuversicht,...</td>\n",
" <td>On the stock market, confidence continued to p...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>en</td>\n",
" <td>NaN</td>\n",
" <td>Stocks rallied and the British pound gained.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>de_translated</td>\n",
" <td>Meyer Burger bedient ab sofort australischen M...</td>\n",
" <td>Meyer Burger is now serving the Australian mar...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>en</td>\n",
" <td>NaN</td>\n",
" <td>Meyer Burger enters Australian market and exhi...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>de_translated</td>\n",
" <td>J&amp;T Express Vietnam hilft lokalen Handwerksdör...</td>\n",
" <td>J&amp;T Express Vietnam helps local craft villages...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>de_translated</td>\n",
" <td>7 Experten empfehlen die Aktie zum Kauf, 1 Exp...</td>\n",
" <td>7 experts recommend the stock for purchase, 1 ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>de_translated</td>\n",
" <td>Microsoft aktie fällt.</td>\n",
" <td>Microsoft Aktie falls.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>de_translated</td>\n",
" <td>Microsoft aktie steigt.</td>\n",
" <td>Microsoft share is rising.</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" lan orig \n",
"0 en NaN \\\n",
"1 de_translated Am Aktienmarkt überwieg weiter die Zuversicht,... \n",
"2 en NaN \n",
"3 de_translated Meyer Burger bedient ab sofort australischen M... \n",
"4 en NaN \n",
"5 de_translated J&T Express Vietnam hilft lokalen Handwerksdör... \n",
"6 de_translated 7 Experten empfehlen die Aktie zum Kauf, 1 Exp... \n",
"7 de_translated Microsoft aktie fällt. \n",
"8 de_translated Microsoft aktie steigt. \n",
"\n",
" text \n",
"0 Microsoft fails to hit profit expectations \n",
"1 On the stock market, confidence continued to p... \n",
"2 Stocks rallied and the British pound gained. \n",
"3 Meyer Burger is now serving the Australian mar... \n",
"4 Meyer Burger enters Australian market and exhi... \n",
"5 J&T Express Vietnam helps local craft villages... \n",
"6 7 experts recommend the stock for purchase, 1 ... \n",
"7 Microsoft Aktie falls. \n",
"8 Microsoft share is rising. "
]
},
"execution_count": 32,
"metadata": {},
@ -443,23 +791,167 @@
"\n",
"translated_df = translate_sentiments(text_df.copy())\n",
"translated_df"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2023-05-01T13:16:19.933234Z",
"end_time": "2023-05-01T13:16:23.381261Z"
}
}
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"ExecuteTime": {
"end_time": "2023-05-01T13:16:24.076261Z",
"start_time": "2023-05-01T13:16:23.383269Z"
}
},
"outputs": [
{
"data": {
"text/plain": " lan orig \n0 en NaN \\\n1 de_translated Am Aktienmarkt überwieg weiter die Zuversicht,... \n2 en NaN \n3 de_translated Meyer Burger bedient ab sofort australischen M... \n4 en NaN \n5 de_translated J&T Express Vietnam hilft lokalen Handwerksdör... \n6 de_translated 7 Experten empfehlen die Aktie zum Kauf, 1 Exp... \n7 de_translated Microsoft aktie fällt. \n8 de_translated Microsoft aktie steigt. \n\n text + 0 \n0 Microsoft fails to hit profit expectations 0.034084 0.932933 \\\n1 On the stock market, confidence continued to p... 0.919673 0.018426 \n2 Stocks rallied and the British pound gained. 0.898361 0.034474 \n3 Meyer Burger is now serving the Australian mar... 0.221019 0.006844 \n4 Meyer Burger enters Australian market and exhi... 0.187527 0.008846 \n5 J&T Express Vietnam helps local craft villages... 0.891114 0.007633 \n6 7 experts recommend the stock for purchase, 1 ... 0.040850 0.016722 \n7 Microsoft Aktie falls. 0.027456 0.889160 \n8 Microsoft share is rising. 0.952216 0.019054 \n\n - \n0 0.032982 \n1 0.061901 \n2 0.067165 \n3 0.772137 \n4 0.803627 \n5 0.101254 \n6 0.942427 \n7 0.083384 \n8 0.028730 ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>lan</th>\n <th>orig</th>\n <th>text</th>\n <th>+</th>\n <th>0</th>\n <th>-</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>en</td>\n <td>NaN</td>\n <td>Microsoft fails to hit profit expectations</td>\n <td>0.034084</td>\n <td>0.932933</td>\n <td>0.032982</td>\n </tr>\n <tr>\n <th>1</th>\n <td>de_translated</td>\n <td>Am Aktienmarkt überwieg weiter die Zuversicht,...</td>\n <td>On the stock market, confidence continued to p...</td>\n <td>0.919673</td>\n <td>0.018426</td>\n <td>0.061901</td>\n </tr>\n <tr>\n <th>2</th>\n <td>en</td>\n <td>NaN</td>\n <td>Stocks rallied and the British pound gained.</td>\n <td>0.898361</td>\n <td>0.034474</td>\n <td>0.067165</td>\n </tr>\n <tr>\n <th>3</th>\n <td>de_translated</td>\n <td>Meyer Burger bedient ab sofort australischen M...</td>\n <td>Meyer Burger is now serving the Australian mar...</td>\n <td>0.221019</td>\n <td>0.006844</td>\n <td>0.772137</td>\n </tr>\n <tr>\n <th>4</th>\n <td>en</td>\n <td>NaN</td>\n <td>Meyer Burger enters Australian market and exhi...</td>\n <td>0.187527</td>\n <td>0.008846</td>\n <td>0.803627</td>\n </tr>\n <tr>\n <th>5</th>\n <td>de_translated</td>\n <td>J&amp;T Express Vietnam hilft lokalen Handwerksdör...</td>\n <td>J&amp;T Express Vietnam helps local craft villages...</td>\n <td>0.891114</td>\n <td>0.007633</td>\n <td>0.101254</td>\n </tr>\n <tr>\n <th>6</th>\n <td>de_translated</td>\n <td>7 Experten empfehlen die Aktie zum Kauf, 1 Exp...</td>\n <td>7 experts recommend the stock for purchase, 1 ...</td>\n <td>0.040850</td>\n <td>0.016722</td>\n <td>0.942427</td>\n </tr>\n <tr>\n <th>7</th>\n <td>de_translated</td>\n <td>Microsoft aktie fällt.</td>\n <td>Microsoft Aktie falls.</td>\n <td>0.027456</td>\n <td>0.889160</td>\n <td>0.083384</td>\n </tr>\n <tr>\n <th>8</th>\n <td>de_translated</td>\n <td>Microsoft aktie steigt.</td>\n <td>Microsoft share is rising.</td>\n <td>0.952216</td>\n <td>0.019054</td>\n <td>0.028730</td>\n </tr>\n </tbody>\n</table>\n</div>"
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>lan</th>\n",
" <th>orig</th>\n",
" <th>text</th>\n",
" <th>+</th>\n",
" <th>0</th>\n",
" <th>-</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>en</td>\n",
" <td>NaN</td>\n",
" <td>Microsoft fails to hit profit expectations</td>\n",
" <td>0.034084</td>\n",
" <td>0.932933</td>\n",
" <td>0.032982</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>de_translated</td>\n",
" <td>Am Aktienmarkt überwieg weiter die Zuversicht,...</td>\n",
" <td>On the stock market, confidence continued to p...</td>\n",
" <td>0.919673</td>\n",
" <td>0.018426</td>\n",
" <td>0.061901</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>en</td>\n",
" <td>NaN</td>\n",
" <td>Stocks rallied and the British pound gained.</td>\n",
" <td>0.898361</td>\n",
" <td>0.034474</td>\n",
" <td>0.067165</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>de_translated</td>\n",
" <td>Meyer Burger bedient ab sofort australischen M...</td>\n",
" <td>Meyer Burger is now serving the Australian mar...</td>\n",
" <td>0.221019</td>\n",
" <td>0.006844</td>\n",
" <td>0.772137</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>en</td>\n",
" <td>NaN</td>\n",
" <td>Meyer Burger enters Australian market and exhi...</td>\n",
" <td>0.187527</td>\n",
" <td>0.008846</td>\n",
" <td>0.803627</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>de_translated</td>\n",
" <td>J&amp;T Express Vietnam hilft lokalen Handwerksdör...</td>\n",
" <td>J&amp;T Express Vietnam helps local craft villages...</td>\n",
" <td>0.891114</td>\n",
" <td>0.007633</td>\n",
" <td>0.101254</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>de_translated</td>\n",
" <td>7 Experten empfehlen die Aktie zum Kauf, 1 Exp...</td>\n",
" <td>7 experts recommend the stock for purchase, 1 ...</td>\n",
" <td>0.040850</td>\n",
" <td>0.016722</td>\n",
" <td>0.942427</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>de_translated</td>\n",
" <td>Microsoft aktie fällt.</td>\n",
" <td>Microsoft Aktie falls.</td>\n",
" <td>0.027456</td>\n",
" <td>0.889160</td>\n",
" <td>0.083384</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>de_translated</td>\n",
" <td>Microsoft aktie steigt.</td>\n",
" <td>Microsoft share is rising.</td>\n",
" <td>0.952216</td>\n",
" <td>0.019054</td>\n",
" <td>0.028730</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" lan orig \n",
"0 en NaN \\\n",
"1 de_translated Am Aktienmarkt überwieg weiter die Zuversicht,... \n",
"2 en NaN \n",
"3 de_translated Meyer Burger bedient ab sofort australischen M... \n",
"4 en NaN \n",
"5 de_translated J&T Express Vietnam hilft lokalen Handwerksdör... \n",
"6 de_translated 7 Experten empfehlen die Aktie zum Kauf, 1 Exp... \n",
"7 de_translated Microsoft aktie fällt. \n",
"8 de_translated Microsoft aktie steigt. \n",
"\n",
" text + 0 \n",
"0 Microsoft fails to hit profit expectations 0.034084 0.932933 \\\n",
"1 On the stock market, confidence continued to p... 0.919673 0.018426 \n",
"2 Stocks rallied and the British pound gained. 0.898361 0.034474 \n",
"3 Meyer Burger is now serving the Australian mar... 0.221019 0.006844 \n",
"4 Meyer Burger enters Australian market and exhi... 0.187527 0.008846 \n",
"5 J&T Express Vietnam helps local craft villages... 0.891114 0.007633 \n",
"6 7 experts recommend the stock for purchase, 1 ... 0.040850 0.016722 \n",
"7 Microsoft Aktie falls. 0.027456 0.889160 \n",
"8 Microsoft share is rising. 0.952216 0.019054 \n",
"\n",
" - \n",
"0 0.032982 \n",
"1 0.061901 \n",
"2 0.067165 \n",
"3 0.772137 \n",
"4 0.803627 \n",
"5 0.101254 \n",
"6 0.942427 \n",
"7 0.083384 \n",
"8 0.028730 "
]
},
"execution_count": 33,
"metadata": {},
@ -469,30 +961,22 @@
"source": [
"sentiments = analyse_sentiments(translated_df)\n",
"sentiments"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2023-05-01T13:16:23.383269Z",
"end_time": "2023-05-01T13:16:24.076261Z"
}
}
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Conclusion about a translated FinBert\n",
"\n",
"When translating a german text to english before using FinBert the results look much better and could be used for our project.\n",
"The big problem is that it will take even more CPU.\n",
"It should probably be combined with a language recognition and could be used to take multiple languages in since there are many variances of this translation model."
],
"metadata": {
"collapsed": false
}
]
}
],
"metadata": {
"celltoolbar": "Slideshow",
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",

View File

@ -0,0 +1,236 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 54,
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2023-06-03T01:36:32.345509400Z",
"start_time": "2023-06-03T01:36:32.332130700Z"
}
},
"outputs": [],
"source": [
"from typing import Final\n",
"\n",
"import numpy as np\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 1,
"outputs": [
{
"data": {
"text/plain": " Company 1 Connection Weight Company 2\n0 21 83 58\n1 37 88 86\n2 40 6 83\n3 60 35 2\n4 11 22 10\n.. ... ... ...\n695 62 37 11\n696 10 24 27\n697 97 40 55\n698 14 87 66\n699 50 55 82\n\n[693 rows x 3 columns]",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Company 1</th>\n <th>Connection Weight</th>\n <th>Company 2</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>21</td>\n <td>83</td>\n <td>58</td>\n </tr>\n <tr>\n <th>1</th>\n <td>37</td>\n <td>88</td>\n <td>86</td>\n </tr>\n <tr>\n <th>2</th>\n <td>40</td>\n <td>6</td>\n <td>83</td>\n </tr>\n <tr>\n <th>3</th>\n <td>60</td>\n <td>35</td>\n <td>2</td>\n </tr>\n <tr>\n <th>4</th>\n <td>11</td>\n <td>22</td>\n <td>10</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>695</th>\n <td>62</td>\n <td>37</td>\n <td>11</td>\n </tr>\n <tr>\n <th>696</th>\n <td>10</td>\n <td>24</td>\n <td>27</td>\n </tr>\n <tr>\n <th>697</th>\n <td>97</td>\n <td>40</td>\n <td>55</td>\n </tr>\n <tr>\n <th>698</th>\n <td>14</td>\n <td>87</td>\n <td>66</td>\n </tr>\n <tr>\n <th>699</th>\n <td>50</td>\n <td>55</td>\n <td>82</td>\n </tr>\n </tbody>\n</table>\n<p>693 rows × 3 columns</p>\n</div>"
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from typing import Final\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"number_of_entries = 100\n",
"number_of_contacts = 10\n",
"ids: Final = [_ for _ in range(number_of_entries)]\n",
"companies = pd.DataFrame(columns=[], index=pd.Index(ids, name=\"company_id\"))\n",
"companies\n",
"\n",
"\n",
"id1 = (\n",
" pd.Series(ids * number_of_contacts, name=\"Company 1\")\n",
" .sample(frac=0.7, random_state=42)\n",
" .reset_index(drop=True)\n",
")\n",
"id2 = (\n",
" pd.Series(ids * number_of_contacts, name=\"Company 2\")\n",
" .sample(frac=0.7, random_state=43)\n",
" .reset_index(drop=True)\n",
")\n",
"connections = (\n",
" pd.DataFrame(\n",
" [\n",
" id1,\n",
" pd.Series(\n",
" np.random.randint(0, 100, size=(max(len(id1), len(id2)))),\n",
" name=\"Connection Weight\",\n",
" ),\n",
" id2,\n",
" ]\n",
" )\n",
" .T.dropna()\n",
" .astype(int)\n",
")\n",
"connections = connections.loc[(connections[\"Company 1\"] != connections[\"Company 2\"])]\n",
"connections"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-06-03T10:15:42.647508100Z",
"start_time": "2023-06-03T10:15:40.656713900Z"
}
}
},
{
"cell_type": "code",
"execution_count": 69,
"outputs": [
{
"data": {
"text/plain": " Company 1 Connection Weight Company 2\n0 21 36 58\n1 37 59 86\n2 40 26 83\n3 60 21 2\n4 11 2 10\n.. ... ... ...\n695 62 45 11\n696 10 64 27\n697 97 24 55\n698 14 51 66\n699 50 93 82\n\n[693 rows x 3 columns]",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Company 1</th>\n <th>Connection Weight</th>\n <th>Company 2</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>21</td>\n <td>36</td>\n <td>58</td>\n </tr>\n <tr>\n <th>1</th>\n <td>37</td>\n <td>59</td>\n <td>86</td>\n </tr>\n <tr>\n <th>2</th>\n <td>40</td>\n <td>26</td>\n <td>83</td>\n </tr>\n <tr>\n <th>3</th>\n <td>60</td>\n <td>21</td>\n <td>2</td>\n </tr>\n <tr>\n <th>4</th>\n <td>11</td>\n <td>2</td>\n <td>10</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>695</th>\n <td>62</td>\n <td>45</td>\n <td>11</td>\n </tr>\n <tr>\n <th>696</th>\n <td>10</td>\n <td>64</td>\n <td>27</td>\n </tr>\n <tr>\n <th>697</th>\n <td>97</td>\n <td>24</td>\n <td>55</td>\n </tr>\n <tr>\n <th>698</th>\n <td>14</td>\n <td>51</td>\n <td>66</td>\n </tr>\n <tr>\n <th>699</th>\n <td>50</td>\n <td>93</td>\n <td>82</td>\n </tr>\n </tbody>\n</table>\n<p>693 rows × 3 columns</p>\n</div>"
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"id1 = (\n",
" pd.Series(ids * number_of_contacts, name=\"Company 1\")\n",
" .sample(frac=0.7, random_state=42)\n",
" .reset_index(drop=True)\n",
")\n",
"id2 = (\n",
" pd.Series(ids * number_of_contacts, name=\"Company 2\")\n",
" .sample(frac=0.7, random_state=43)\n",
" .reset_index(drop=True)\n",
")\n",
"connections = (\n",
" pd.DataFrame(\n",
" [\n",
" id1,\n",
" pd.Series(\n",
" np.random.randint(0, 100, size=(max(len(id1), len(id2)))),\n",
" name=\"Connection Weight\",\n",
" ),\n",
" id2,\n",
" ]\n",
" )\n",
" .T.dropna()\n",
" .astype(int)\n",
")\n",
"connections = connections.loc[(connections[\"Company 1\"] != connections[\"Company 2\"])]\n",
"connections"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-06-03T01:40:08.441882700Z",
"start_time": "2023-06-03T01:40:08.406876900Z"
}
}
},
{
"cell_type": "code",
"execution_count": 73,
"outputs": [
{
"data": {
"text/plain": " Company 2\nCompany 1 \n0 6\n1 6\n2 5\n3 9\n4 7\n... ...\n95 7\n96 8\n97 7\n98 6\n99 8\n\n[100 rows x 1 columns]",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Company 2</th>\n </tr>\n <tr>\n <th>Company 1</th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>6</td>\n </tr>\n <tr>\n <th>1</th>\n <td>6</td>\n </tr>\n <tr>\n <th>2</th>\n <td>5</td>\n </tr>\n <tr>\n <th>3</th>\n <td>9</td>\n </tr>\n <tr>\n <th>4</th>\n <td>7</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n </tr>\n <tr>\n <th>95</th>\n <td>7</td>\n </tr>\n <tr>\n <th>96</th>\n <td>8</td>\n </tr>\n <tr>\n <th>97</th>\n <td>7</td>\n </tr>\n <tr>\n <th>98</th>\n <td>6</td>\n </tr>\n <tr>\n <th>99</th>\n <td>8</td>\n </tr>\n </tbody>\n</table>\n<p>100 rows × 1 columns</p>\n</div>"
},
"execution_count": 73,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"connections[[\"Company 1\", \"Company 2\"]].groupby(\"Company 1\").count()"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-06-03T01:44:23.433333600Z",
"start_time": "2023-06-03T01:44:23.424841700Z"
}
}
},
{
"cell_type": "code",
"execution_count": 72,
"outputs": [
{
"data": {
"text/plain": " Analysis-d0 Analysis-d1\ncompany_id \n0 1 6\n1 1 6\n2 1 5\n3 1 9\n4 1 7\n... ... ...\n95 1 7\n96 1 8\n97 1 7\n98 1 6\n99 1 8\n\n[100 rows x 2 columns]",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Analysis-d0</th>\n <th>Analysis-d1</th>\n </tr>\n <tr>\n <th>company_id</th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1</td>\n <td>6</td>\n </tr>\n <tr>\n <th>1</th>\n <td>1</td>\n <td>6</td>\n </tr>\n <tr>\n <th>2</th>\n <td>1</td>\n <td>5</td>\n </tr>\n <tr>\n <th>3</th>\n <td>1</td>\n <td>9</td>\n </tr>\n <tr>\n <th>4</th>\n <td>1</td>\n <td>7</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>95</th>\n <td>1</td>\n <td>7</td>\n </tr>\n <tr>\n <th>96</th>\n <td>1</td>\n <td>8</td>\n </tr>\n <tr>\n <th>97</th>\n <td>1</td>\n <td>7</td>\n </tr>\n <tr>\n <th>98</th>\n <td>1</td>\n <td>6</td>\n </tr>\n <tr>\n <th>99</th>\n <td>1</td>\n <td>8</td>\n </tr>\n </tbody>\n</table>\n<p>100 rows × 2 columns</p>\n</div>"
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"companies[\"Analysis-d0\"] = 1\n",
"companies[\"Analysis-d1\"] = connections[[\"Company 1\", \"Company 2\"]].groupby(\"Company 1\").count()\n",
"connection_sum = connections.join(connections.set_index(\"Company 2\"), on=)\n",
"companies[\"Analysis-d1\"] = connections[[\"Company 1\", \"Company 2\"]].groupby(\"Company 1\").count()\n",
"# for tiers in range(5):\n",
"companies"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-06-03T01:43:25.341850700Z",
"start_time": "2023-06-03T01:43:25.318015500Z"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"companies"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2023-06-03T01:36:32.382091200Z"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2023-06-03T01:36:32.385093700Z"
}
}
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}