mirror of
https://github.com/fhswf/aki_prj23_transparenzregister.git
synced 2025-04-20 23:12:53 +02:00
Feature/ner (#103)
NER und Sentiment-Pipeline mit Services zur Datenextraktion. --------- Co-authored-by: Philipp Horstenkamp <philipp@horstenkamp.de> Co-authored-by: TrisNol <tristan.nolde@yahoo.de>
This commit is contained in:
parent
99b61e7c2e
commit
c680ac9759
2
.github/workflows/documentation.yaml
vendored
2
.github/workflows/documentation.yaml
vendored
@ -26,7 +26,7 @@ jobs:
|
||||
- name: Install and configure Poetry
|
||||
uses: snok/install-poetry@v1
|
||||
with:
|
||||
version: 1.4.2
|
||||
version: 1.6.1
|
||||
virtualenvs-create: false
|
||||
- run: poetry install --with doc --all-extras --without test,lint
|
||||
- name: Doc-Build
|
||||
|
4
.github/workflows/lint-actions.yaml
vendored
4
.github/workflows/lint-actions.yaml
vendored
@ -28,7 +28,7 @@ jobs:
|
||||
- name: Install and configure Poetry
|
||||
uses: snok/install-poetry@v1
|
||||
with:
|
||||
version: 1.4.2
|
||||
version: 1.6.1
|
||||
virtualenvs-create: false
|
||||
virtualenvs-path: ~/local/share/virtualenvs
|
||||
- run: poetry install --without develop,doc --all-extras
|
||||
@ -56,7 +56,7 @@ jobs:
|
||||
- name: Install and configure Poetry
|
||||
uses: snok/install-poetry@v1
|
||||
with:
|
||||
version: 1.4.2
|
||||
version: 1.6.1
|
||||
virtualenvs-create: true
|
||||
virtualenvs-path: ~/local/share/virtualenvs
|
||||
- name: Check out Git repository
|
||||
|
10
.github/workflows/test-and-build-action.yaml
vendored
10
.github/workflows/test-and-build-action.yaml
vendored
@ -20,10 +20,14 @@ jobs:
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: 3.11
|
||||
- name: Install Cuda
|
||||
run: |
|
||||
sudo apt update
|
||||
# sudo apt install cuda-10-0 -y
|
||||
- name: Install and configure Poetry
|
||||
uses: snok/install-poetry@v1
|
||||
with:
|
||||
version: 1.4.2
|
||||
version: 1.6.1
|
||||
virtualenvs-path: ~/local/share/virtualenvs
|
||||
- id: cache-pipenv
|
||||
uses: actions/cache@v3
|
||||
@ -85,7 +89,7 @@ jobs:
|
||||
- name: Install and configure Poetry
|
||||
uses: snok/install-poetry@v1
|
||||
with:
|
||||
version: 1.4.2
|
||||
version: 1.6.1
|
||||
virtualenvs-path: ~/local/share/virtualenvs
|
||||
- run: |
|
||||
poetry install --only test --all-extras
|
||||
@ -113,7 +117,7 @@ jobs:
|
||||
- name: Install and configure Poetry
|
||||
uses: snok/install-poetry@v1
|
||||
with:
|
||||
version: 1.4.2
|
||||
version: 1.6.1
|
||||
virtualenvs-path: ~/local/share/virtualenvs
|
||||
- id: cache-pipenv
|
||||
uses: actions/cache@v3
|
||||
|
1
Jupyter/NER/.$Flow_Chart_NER_Function.drawio.bkp
Normal file
1
Jupyter/NER/.$Flow_Chart_NER_Function.drawio.bkp
Normal file
@ -0,0 +1 @@
|
||||
<mxfile host="Electron" modified="2023-08-17T13:01:42.139Z" agent="5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/20.8.10 Chrome/106.0.5249.199 Electron/21.3.5 Safari/537.36" etag="3J0g9IE6MsjTAohlE4lt" version="20.8.10" type="device"><diagram id="C5RBs43oDa-KdzZeNtuy" name="Page-1">7VrbUuM4EP2aVO0+QNkxuT1uAszswlCzA7MMT1OKrTgiimVkOZf5+m3ZUmxHJjETgiHAC1FbN/c5p7stu+EMpotPHIXjL8zDtNG0vEXDOW00m7bda8E/aVmmlo7TSQ0+J57qlBmuyS+sjJayxsTDUaGjYIwKEhaNLgsC7IqCDXHO5sVuI0aLq4bIx4bh2kXUtN4ST4xTa7fZyeyfMfHHemW73UuvTJHurO4kGiOPzXMm56zhDDhjIv01XQwwlc7Tfrn9e3lLLyftT//8Gz2g7/2Lm6v/jtLJzp8yZHULHAfit6deTJb27GISx+7NQ8/+8c16uLg/Ul6IxFL7C3vgPtVkXIyZzwJEzzJrn7M48LCc1YJW1ueSsRCMNhjvsRBLxQUUCwamsZhSdRUviPghhx+3VOsud+V0oWZOGkvdCARf5gbJ5l3+WjYsaelx6f3Jm1qjwxZfqn4Ri7mLN/RzFKUR97HY4OiMMKA0zKYYNgnjOKZIkFlxc0hR3l/1y2CFHwrZJxBIbXKGaKxWItMQgJMsZ15MQZ/rNMhAlrjMx0Tg6xAlrphDoCgCOmKBUGjbcJd9n6IoUgBEgrPJSnqy90pHGT5V4ZhhLvAi5yzTo/pqR8UtFbYcJeJ5FgNsLexxTv8n1p4wODEwuNvodqvo4hJFlSovD0WiqMD7S0ZRaA8pcyep6ZzIzSu56KDd3Y4WRUNM+8id+MlGB4wyDpcCFsjAsD10PLP42qb4yuezSsOASR5FFk2MyupUM31lJBC5Lmw0imBj69RZLfj7bGoZbLpiO4fyN0a3msjUeRqX7FfPpbbBpUs0DRvNNgU39IccfvnyV0hj3weywHRBwzk3Y9eYTYdxtD1dFDCWDDpHU0Klvz5jOsOCuKgkqSBKfFj31AW4MS8nDyxJAh9a7ax1k5AV4u/+kk1X18QK81a7JNtYJdmmu69s0zEw/QropdA1LZrge2AZv9ddA6Fbd8rvVgnSbzoEv3jG19F0e8q3K4ZpxR7r2IG/AoFefxXQq1RTfpQB+2FY89AKAb3DHJ/6MR2alcAw5kFSCLBYfFQCmyqBdlkSetFKQAfCHKjfcIgIgGldHmIdsDraW+p23XWAjhRFCKj0adNKFXZgGKzXYh3rBTEoPX/r7pwW389B5+MHmBUOOh8hxrMfdG7aZE5oq+Slc5fLghHxf4aczYiHuZnctAU2kA3iGAlZWllIPj+x4b18VSHpDGwGp4ylkqGTB74miMqOwAV40GJ+8sCVNL6wwGencn4Zdp1+MIzCFXC1id8QdVXOPC5+q6B9u0T7qz557bf2pf3eh/Z3075+b7dN+806tW/WOYb2cQByxM8p+fSlJWFBwi8uS075jpK/Y7l36pa7fox5/Xrft2531ePaE6gCvOmsvVqz15BM44QatdNj6uN3tTXJS12yiukdRwINKYnGEVoXdTFzH7Z0V9V3fdK1DR+/hHRfado9qZh2W3WmXfO9tqHGAM+jn5BAd0u8iRKR6+IoWuXfK5j5aMAo1Xp9X7nW0ScL9Qm2+SHYNSFWEGy7TsGanw4Ygh3EPGL8ewAPyT4Hvcmz3irSfYgxJ3iDPEHS8tg4wiLRt0y2yVJa3h5z4ylgEalCW54v68lGBFM5GBZ7iFma5achCpIFtendRYBe3Snb/HqAjKBtUujYBdeLP/5MIUxRUkyyzIq98iuEfTnadoqebpUdY9glnrbt1r6Crfkd34d036x02yUfiTyXdKGZfZecPndlX3c7Z/8D</diagram></mxfile>
|
1
Jupyter/NER/Flow_Chart_NER_Function.drawio
Normal file
1
Jupyter/NER/Flow_Chart_NER_Function.drawio
Normal file
@ -0,0 +1 @@
|
||||
<mxfile host="Electron" modified="2023-08-20T09:25:41.321Z" agent="5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/19.0.3 Chrome/102.0.5005.63 Electron/19.0.3 Safari/537.36" etag="NgKJlrtoa61kZ5SbTZB-" version="19.0.3" type="device" pages="2"><diagram id="C5RBs43oDa-KdzZeNtuy" name="Pipeline">7VpZc6M4EP41rtp9SIojvh7XzjG7yUzNTjKbydOUDDIolhERwsf8+m2BxGEom0zs2HGSyoPV6ID+vq+7JWjZw+niiqPQ/8xcTFuW4S5a9nnLsswzy2rJf8NdppZep50aPE5c1Sk33JJfWBkNZY2Ji6NSR8EYFSQsGx0WBNgRJRvinM3L3caMllcNkYcrhlsH0ar1nrjCV09hdXP7J0w8X69sdvrplSnSndWTRD5y2bxgsi9a9pAzJtJf08UQU+k87Zf7v5f39GbSufrn3+gJfR9c33357ySd7PI5Q7JH4DgQvz31YrI0Z9eTOHbunvrmj2/G0/XjifJCJJbaX9gF96km48JnHgsQvcitA87iwMVyVgNaeZ8bxkIwmmB8xEIsFRdQLBiYfDGl6ipeEPFDDj9tq9ZD4cr5Qs2cNJa6EQi+LAySzYfitXxY0tLjGrpOuThiMXfwmn62YjDiHhZr/KooI51ZoKEC5gqzKYabhA4cUyTIrMxVpCjvZf1yWOGHQvYZBFJ3PUM0ViuRaQjASZYzN6agz1Ua5CBLXOY+Efg2RIlv5hAoyoCOWSAU2iY85cCjKIoUAJHgbJJJT/bOdPR8fGaYC7xY61F9tavClApbtkJknscAUwvbL+j/zNgRBmcVDB7Wut0ou7hGUbXKK0KRKCpw/5JRFNojypxJarok8uaVXHTQ7m1Gi6IRpgPkTLzkRoeMMg6XAhbIwLA5dGxZjZ2qGuvnaypHRRZNjMbqVDN9ZSQQhS5sPI7gxlapky34+2xqV9j0hb04lL8xuu2JTN3ncck8eC51Kly6QdOwZXUouGEw4vDLk79CGnsekAWmC1r2ZTV2+Ww6iqPN6aKEsWTQJZoSKv31CdMZFsRBNUkFUeLBuucOwI15PXlgSRJ40OrkrbuErBB/d5dsena3lGzanZpsY9Rkm96usk23gulXQC+FzjJogu+RZfx+bwWE3r5Tfq9JkH7TIfjVM76OpptTvtkwTCv2GKc2/JUIdPhVQL9RTflRBuyGYdaxFQL6Dgt8GsR0VK0ERjEPkkKAxeKjElhXCXTqktCrVgI6EBZA/YZDRABM4+YY64DsaE8fWFr7rgN0pChDQKVPLSNV2JFhsFqLdY1XxKD2QK734rR4tAed6w4wNx50Ni2ztn7Que6uC0LLkpfOXQ4LxsT7GXI2Iy7m1eSmLXAD+SCOkZCllYHk/omNHuWrCklnYDN4yZdKhk4uOJ8gKjsCF2Cjxbxkw5U0PrPAY+dyfhl27UEwisIMuGMSv1HSvlmj/axPUfvtXWm//6H9Z2lfv6bbpP2mBfDraL9a51S0jwOQI96m5NOXloQFCb+4LDnlO0r+juXe3bfc9Tbm8PW+Zd1uXY8rO1AFuGWvvFozV5BMA4ca9aJt6rrH3JDkpS5Zw/SOI4FGlER+hFZFXc7cxy3drPren3TNio9fQ7qHkXbPGqbd9kGl3ep77YoaAzyPfkICfVniTZSIHAdHUZZ/v8DMJ0NGqdbr+8q1tj5Z2J9grXcs2HZDwXYOSrDVTwcqgh3GPGL8ewCbZI+D3uRZbxPpPsWYE7xGniBpeWwcYZHoWybbZCktb5c58RTAiVShLc+X9WRjgqkcDIs9xSzN8tMQBcmC2vTuIkB/3ym7+vUAGUO7SqFTB1wv/vgzhTBFSTHJqFbsjV8h7MrRpl32dLvuGMOs8bRptncVbKvf8X1I981Kt1Pzkci2pAvN/LvkdN+Vf91tX/wP</diagram><diagram id="KGNsg-YwxBcfRzNuZPCy" name="Service">dZHBDoIwDIafZnfYFPGMiBdPHDwvrLIlg5IxA/r0QjbEBU126L7/77q2hGXNWBjeySsK0IRGYiTsRCiNd5SS+UTi6Uia7B2ojRLetIJSvcDDyNOHEtAHRouorepCWGHbQmUDxo3BIbTdUYdVO17DBpQV11t6U8JKT+PkuAoXULX0pVN6cELDF7PvpJdc4PCFWE5YZhCti5oxAz0Pb5mLyzv/UT8fM9DaHwlTsL49XYINsfwN</diagram></mxfile>
|
38
Jupyter/NER/NER-Pipeline.md
Normal file
38
Jupyter/NER/NER-Pipeline.md
Normal file
@ -0,0 +1,38 @@
|
||||
```mermaid
|
||||
flowchart LR
|
||||
DBConnect["`**Mongo Connect**
|
||||
- create connection string
|
||||
- establish connection`"]
|
||||
|
||||
DBRead["`**Mongo Read**
|
||||
- read database
|
||||
- get fields without attribute 'companies'`"]
|
||||
|
||||
NER["`**NERService**
|
||||
- process news article
|
||||
- get entities`"]
|
||||
|
||||
DBUpdate["`**Mongo Update Documents**
|
||||
- update processed documents
|
||||
- add an attribute 'companies'`"]
|
||||
|
||||
id1[["`**NERSpacy**
|
||||
Named Entitiy Recognition with spaCy`"]]
|
||||
|
||||
id2[["`**NERCompanyList**
|
||||
Named Entitiy Recognition by comparing text with list`"]]
|
||||
|
||||
id3[["`**NERTransformer**
|
||||
Named Entitiy Recognition with transformer`"]]
|
||||
|
||||
DBConnect-->DBRead-->NER
|
||||
NER--select service-->id1
|
||||
NER--select service-->id2
|
||||
NER--select service-->id3
|
||||
|
||||
id1-->DBUpdate
|
||||
id2-->DBUpdate
|
||||
id3-->DBUpdate
|
||||
|
||||
|
||||
```
|
1898
Jupyter/NER/NER_Pipeline.ipynb
Normal file
1898
Jupyter/NER/NER_Pipeline.ipynb
Normal file
File diff suppressed because one or more lines are too long
1066
Jupyter/NER/NER_from_StagingDB.ipynb
Normal file
1066
Jupyter/NER/NER_from_StagingDB.ipynb
Normal file
File diff suppressed because one or more lines are too long
32
Jupyter/NER/Sentiment-Pipeline.md
Normal file
32
Jupyter/NER/Sentiment-Pipeline.md
Normal file
@ -0,0 +1,32 @@
|
||||
```mermaid
|
||||
flowchart LR
|
||||
DBConnect["`**Mongo Connect**
|
||||
- create connection string
|
||||
- establish connection`"]
|
||||
|
||||
DBRead["`**Mongo Read**
|
||||
- read database
|
||||
- get fields without attribute 'companies'`"]
|
||||
|
||||
NER["`**SentimentService**
|
||||
- process news article
|
||||
- get sentiment`"]
|
||||
|
||||
DBUpdate["`**Mongo Update Documents**
|
||||
- update processed documents
|
||||
- add an attribute 'sentiment'`"]
|
||||
|
||||
id1[["`**SentimentSpacy**
|
||||
Sentiment analysis with spaCy`"]]
|
||||
|
||||
|
||||
id3[["`**SentimentTransformer**
|
||||
Sentiment analysis with transformer`"]]
|
||||
|
||||
DBConnect-->DBRead-->NER
|
||||
NER--select service-->id1
|
||||
NER--select service-->id3
|
||||
|
||||
id1-->DBUpdate
|
||||
id3-->DBUpdate
|
||||
```
|
952
Jupyter/NER/Sentiment_Pipeline.ipynb
Normal file
952
Jupyter/NER/Sentiment_Pipeline.ipynb
Normal file
File diff suppressed because one or more lines are too long
2424
Jupyter/Sentiment_Company_Matching/Name_Matching.ipynb
Normal file
2424
Jupyter/Sentiment_Company_Matching/Name_Matching.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
Binary file not shown.
After Width: | Height: | Size: 343 KiB |
@ -531,7 +531,7 @@
|
||||
" python-version: 3.11\n",
|
||||
" - uses: snok/install-poetry@v1 # setup poetry\n",
|
||||
" with:\n",
|
||||
" version: 1.4.2\n",
|
||||
" version: 1.6.1\n",
|
||||
" virtualenvs-path: ~/local/share/virtualenvs\n",
|
||||
" - uses: actions/checkout@v3\n",
|
||||
" - run: |\n",
|
||||
|
1254
poetry.lock
generated
1254
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@ -65,12 +65,18 @@ python = "^3.11"
|
||||
python-dotenv = "^1.0.0"
|
||||
seaborn = "^0.12.2"
|
||||
selenium = "^4.12.0"
|
||||
spacy = "^3.6.1"
|
||||
spacy-sentiws = "^3.0.0"
|
||||
torch = {version = "*", source = "torch-cpu"}
|
||||
torchaudio = {version = "*", source = "torch-cpu"}
|
||||
torchvision = {version = "*", source = "torch-cpu"}
|
||||
tqdm = "^4.66.1"
|
||||
transformers = {version = "*", extras = ["torch"]}
|
||||
xmltodict = "^0.13.0"
|
||||
|
||||
[tool.poetry.extras]
|
||||
ingest = ["selenium", "deutschland", "xmltodict"]
|
||||
transformation = []
|
||||
transformation = ["torch", "torchaudio", "torchvision", "transformers", "spacy-sentiws", "spacy"]
|
||||
web-server = ["dash", "dash-auth", "dash-bootstrap-components", "matplotlib", "seaborn"]
|
||||
|
||||
[tool.poetry.group.develop.dependencies]
|
||||
@ -108,6 +114,7 @@ types-cachetools = "^5.3.0.6"
|
||||
types-pyOpenSSL = "*"
|
||||
types-requests = "^2.31.0.2"
|
||||
types-setuptools = "*"
|
||||
types-tabulate = "^0.9.0.3"
|
||||
types-tqdm = "^4.66.0.2"
|
||||
|
||||
[tool.poetry.group.test.dependencies]
|
||||
@ -123,6 +130,11 @@ data-transformation = "aki_prj23_transparenzregister.utils.data_transfer:transfe
|
||||
reset-sql = "aki_prj23_transparenzregister.utils.sql.connector:reset_all_tables_cli"
|
||||
webserver = "aki_prj23_transparenzregister.ui.app:main"
|
||||
|
||||
[[tool.poetry.source]]
|
||||
name = "torch-cpu"
|
||||
priority = "explicit"
|
||||
url = "https://download.pytorch.org/whl/cpu"
|
||||
|
||||
[tool.ruff]
|
||||
exclude = [
|
||||
".bzr",
|
||||
|
102
src/aki_prj23_transparenzregister/utils/mongo/CompEntities.json
Normal file
102
src/aki_prj23_transparenzregister/utils/mongo/CompEntities.json
Normal file
@ -0,0 +1,102 @@
|
||||
[
|
||||
"Volkswagen",
|
||||
"Mercedes",
|
||||
"Benz",
|
||||
"Deutsche Telekom",
|
||||
"Bmw",
|
||||
"Deutsche Post",
|
||||
"E.On",
|
||||
"BASF",
|
||||
"Siemens",
|
||||
"Uniper",
|
||||
"Bayer",
|
||||
"Continental",
|
||||
"Fresenius",
|
||||
"Thyssen",
|
||||
"Siemens",
|
||||
"SAP",
|
||||
"Metro",
|
||||
"Hochtief",
|
||||
"Traton",
|
||||
"Ceconomy",
|
||||
"ENBW",
|
||||
"Adidas",
|
||||
"Henkel",
|
||||
"Heidelbergcement",
|
||||
"Fresenius",
|
||||
"Merck",
|
||||
"Mckesson",
|
||||
"RWE",
|
||||
"Lufthansa",
|
||||
"Hapag-Lloyd",
|
||||
"Schaeffler",
|
||||
"Evonik",
|
||||
"Aurubis",
|
||||
"Brenntag",
|
||||
"Covestro",
|
||||
"Infineon",
|
||||
"Tui",
|
||||
"Kion ",
|
||||
"Zalando",
|
||||
"Telefonica",
|
||||
"Salzgitter",
|
||||
"Beiersdorf",
|
||||
"Suedzucker",
|
||||
"Hella",
|
||||
"Lanxess",
|
||||
"Knorr",
|
||||
"Rheinmetall",
|
||||
"Hornbach",
|
||||
"United",
|
||||
"Puma",
|
||||
"Baywa",
|
||||
"Kloeckner",
|
||||
"Hornbach",
|
||||
"Bechtle",
|
||||
"Nordex",
|
||||
"Wacker",
|
||||
"Gea",
|
||||
"Vonovia",
|
||||
"Prosiebensat1",
|
||||
"Leoni",
|
||||
"MTU",
|
||||
"1&1",
|
||||
"Jungheinrich",
|
||||
"K+S",
|
||||
"Hellofresh",
|
||||
"Symrise",
|
||||
"Aurelius",
|
||||
"Mvv",
|
||||
"Bilfinger",
|
||||
"Draegerwerk",
|
||||
"Krones",
|
||||
"Duerr",
|
||||
"Osram",
|
||||
"Auto1",
|
||||
"Deutsche Wohnen",
|
||||
"Kabel Deutschland",
|
||||
"Freenet",
|
||||
"Kuka",
|
||||
"Delivery Hero",
|
||||
"Paul Hartmann",
|
||||
"Fuchs Petrolub",
|
||||
"Sartorius",
|
||||
"Gelsenwasser",
|
||||
"Mainova",
|
||||
"Ksb",
|
||||
"Heidelberger Druckmaschinen",
|
||||
"Sixt",
|
||||
"Hugo Boss",
|
||||
"Dmg Mori",
|
||||
"Mutares",
|
||||
"Zooplus",
|
||||
"Grammer",
|
||||
"Fraport",
|
||||
"Wacker Neuson",
|
||||
"Indus Holding",
|
||||
"Leg Immobilien",
|
||||
"Elringklinger",
|
||||
"Stroeer",
|
||||
"Fielmann",
|
||||
"Gerresheimer"
|
||||
]
|
@ -0,0 +1,77 @@
|
||||
SentiWS
|
||||
~~~~~~~
|
||||
|
||||
SentimentWortschatz, or SentiWS for short, is a publicly available German-language resource for sentiment analysis, opinion mining etc. It lists positive and negative polarity bearing words weighted within the interval of [-1; 1] plus their part of speech tag, and if applicable, their inflections. The current version of SentiWS (v2.0) contains around 1,650 positive and 1,800 negative words, which sum up to around 16,000 positive and around 18,000 negative word forms incl. their inflections, respectively. It not only contains adjectives and adverbs explicitly expressing a sentiment, but also nouns and verbs implicitly containing one.
|
||||
|
||||
|
||||
License
|
||||
~~~~~~~
|
||||
|
||||
SentiWS is licensed under a Creative Commons Attribution-Noncommercial-Share Alike 3.0 Unported License (http://creativecommons.org/licenses/by-nc-sa/3.0/).
|
||||
|
||||
|
||||
Obtain a Copy
|
||||
~~~~~~~~~~~~~
|
||||
The latest version of SentiWS can be found at https://wortschatz.uni-leipzig.de/download/.
|
||||
|
||||
|
||||
Data Format
|
||||
~~~~~~~~~~~
|
||||
SentiWS is organised in two utf8-encoded text files structured the following way:
|
||||
|
||||
<Word>|<POS tag> \t <Polarity weight> \t <Infl_1>,...,<Infl_k> \n
|
||||
|
||||
where \t denotes a tab, and \n denotes a new line.
|
||||
|
||||
|
||||
Citation
|
||||
~~~~~~~~
|
||||
|
||||
If you use SentiWS in your work we kindly ask you to cite
|
||||
|
||||
R. Remus, U. Quasthoff & G. Heyer: SentiWS - a Publicly Available German-language Resource for Sentiment Analysis.
|
||||
In: Proceedings of the 7th International Language Ressources and Evaluation (LREC'10), 2010
|
||||
|
||||
or use the following BibTeX-code snippet:
|
||||
|
||||
@INPROCEEDINGS{remquahey2010,
|
||||
title = {SentiWS -- a Publicly Available German-language Resource for Sentiment Analysis},
|
||||
booktitle = {Proceedings of the 7th International Language Resources and Evaluation (LREC'10)},
|
||||
author = {Remus, R. and Quasthoff, U. and Heyer, G.},
|
||||
year = {2010}
|
||||
}
|
||||
|
||||
|
||||
Version History
|
||||
~~~~~~~~~~~~~~~
|
||||
|
||||
SentiWS is "work in progress" and hence far from being fully-fledged and error-free. It will be continuously refined by adding missing words and word forms and removing ambiguous ones.
|
||||
|
||||
v1.8b, 2010-05-19: First publicly available version as described in Remus et al. (2010).
|
||||
v1.8c, 2012-03-21: Second publicly available version in which some POS tags were corrected.
|
||||
v2.0, 2018-10-19: Third publicly available version in which the inflected forms were extended.
|
||||
|
||||
|
||||
Statistics
|
||||
~~~~~~~~~~
|
||||
|
||||
Positive Negative
|
||||
Adjectives Baseforms 792 712
|
||||
Inflections 10,936 10,471
|
||||
Adverbs Baseforms 7 4
|
||||
Inflections 5 0
|
||||
Nouns Baseforms 548 688
|
||||
Inflections 736 1158
|
||||
Verbs Baseforms 297 423
|
||||
Inflections 3,246 4,580
|
||||
|
||||
All Baseforms 1,644 1,827
|
||||
Inflections 14,923 16,209
|
||||
|
||||
Total 16,567 18,036
|
||||
|
||||
Table: Overview of the dictionary's content
|
||||
|
||||
|
||||
|
||||
SentiWS.txt was last updated on 2019-09-12.
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
110
src/aki_prj23_transparenzregister/utils/mongo/ner_pipeline.py
Normal file
110
src/aki_prj23_transparenzregister/utils/mongo/ner_pipeline.py
Normal file
@ -0,0 +1,110 @@
|
||||
"""Pipeline to get Entities from Staging DB."""
|
||||
|
||||
import json
|
||||
import sys
|
||||
|
||||
from loguru import logger
|
||||
from tqdm import tqdm
|
||||
|
||||
import aki_prj23_transparenzregister.utils.mongo.connector as conn
|
||||
import aki_prj23_transparenzregister.utils.mongo.news_mongo_service as news
|
||||
from aki_prj23_transparenzregister.config.config_providers import (
|
||||
JsonFileConfigProvider,
|
||||
)
|
||||
from aki_prj23_transparenzregister.utils.mongo import ner_service
|
||||
|
||||
logger.add(sys.stdout, colorize=True)
|
||||
|
||||
|
||||
class EntityPipeline:
|
||||
"""Class to initialize NER Pipeline."""
|
||||
|
||||
def __init__(self, conn_string: conn.MongoConnection) -> None:
|
||||
"""Method to connect to StagingDB."""
|
||||
self.connect_string = conn_string
|
||||
self.connect_string.database = "transparenzregister_ner"
|
||||
self.connector = conn.MongoConnector(self.connect_string)
|
||||
self.news_obj = news.MongoNewsService(self.connector)
|
||||
|
||||
def process_documents(
|
||||
self, entity: str, doc_attrib: str, ner_selection: str
|
||||
) -> None:
|
||||
"""Method to check documents, get entities and write them to document."""
|
||||
CursorUnprogressed = self.news_obj.collection.find( # noqa: N806
|
||||
{"companies": {"$exists": False}}
|
||||
)
|
||||
documents = list(CursorUnprogressed)
|
||||
logger.info("Dokumente: ", str(CursorUnprogressed))
|
||||
|
||||
# Determine NER service based on config
|
||||
# spaCy
|
||||
if ner_selection == "use_spacy_ner":
|
||||
ner_service_instance = ner_service.NerAnalysisService(
|
||||
use_spacy=True, use_transformer=False, use_companylist=False
|
||||
)
|
||||
ner_service_func = ner_service_instance.ner_spacy
|
||||
|
||||
# company list
|
||||
elif ner_selection == "use_companylist_ner":
|
||||
ner_service_instance = ner_service.NerAnalysisService(
|
||||
use_spacy=False, use_transformer=False, use_companylist=True
|
||||
)
|
||||
ner_service_func = ner_service_instance.ner_company_list
|
||||
|
||||
# transformer
|
||||
elif ner_selection == "use_transformer_ner":
|
||||
ner_service_instance = ner_service.NerAnalysisService(
|
||||
use_spacy=False, use_transformer=True, use_companylist=False
|
||||
)
|
||||
ner_service_func = ner_service_instance.ner_transformer
|
||||
if len(documents) > 0:
|
||||
for document in tqdm(documents):
|
||||
ents = ner_service_func(document, entity, doc_attrib)
|
||||
self.news_obj.collection.update_one(
|
||||
{"_id": document["_id"]},
|
||||
{"$set": {"companies": ents}},
|
||||
)
|
||||
else:
|
||||
logger.info("No documents found.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Establish MongoDB Connection using secrets
|
||||
config_provider = JsonFileConfigProvider("./secrets.json")
|
||||
connect_string = config_provider.get_mongo_connection_string()
|
||||
|
||||
# dir of config json
|
||||
config_file_path = (
|
||||
"src/aki_prj23_transparenzregister/utils/mongo/ner_sentiment_config.json"
|
||||
)
|
||||
|
||||
# Load NER service configuration from JSON
|
||||
with open(config_file_path) as config_file:
|
||||
ner_config = json.load(config_file)
|
||||
|
||||
# read configuration
|
||||
entity = ner_config["ner_service"]["entity"]
|
||||
logger.info("NER Pipeline: searching for entity of type", str(entity))
|
||||
doc_attrib = ner_config["ner_service"]["doc_attrib"]
|
||||
logger.info("NER Pipeline: searching in document attribute ", str(doc_attrib))
|
||||
|
||||
# read selected service
|
||||
if ner_config["ner_service"]["use_companylist_ner"] is True:
|
||||
ner_selection = "use_companylist_ner"
|
||||
logger.info("NER Pipeline: Searching entities with company list")
|
||||
|
||||
elif ner_config["ner_service"]["use_spacy_ner"] is True:
|
||||
ner_selection = "use_spacy_ner"
|
||||
logger.info("NER Pipeline: Searching entities with spaCy")
|
||||
|
||||
elif ner_config["ner_service"]["use_transformer_ner"] is True:
|
||||
ner_selection = "use_transformer_ner"
|
||||
logger.info("NER Pipeline: Searching entities with transformer")
|
||||
|
||||
else:
|
||||
logger.info(
|
||||
"NER Pipeline: No NER services selected or error in configuration file."
|
||||
)
|
||||
|
||||
entity_pipeline = EntityPipeline(connect_string)
|
||||
entity_pipeline.process_documents(entity, doc_attrib, ner_selection)
|
@ -0,0 +1,17 @@
|
||||
{
|
||||
"sentiment_service": {
|
||||
"comment": "Select only one service by setting true and deselect the other with false. Valid doc_attrib: text, title",
|
||||
"use_spacy": false,
|
||||
"use_transformer": true,
|
||||
"doc_attrib": "text"
|
||||
},
|
||||
|
||||
"ner_service": {
|
||||
"comment": "Select only one service by setting true and deselect the other with false. Valid doc_attrib: text, title",
|
||||
"use_spacy_ner": false,
|
||||
"use_transformer_ner": true,
|
||||
"use_companylist_ner":false,
|
||||
"doc_attrib": "text",
|
||||
"entity":"ORG"
|
||||
}
|
||||
}
|
150
src/aki_prj23_transparenzregister/utils/mongo/ner_service.py
Normal file
150
src/aki_prj23_transparenzregister/utils/mongo/ner_service.py
Normal file
@ -0,0 +1,150 @@
|
||||
"""NER Service module."""
|
||||
|
||||
import json
|
||||
from collections import Counter
|
||||
from typing import Final
|
||||
|
||||
import spacy
|
||||
from transformers import pipeline
|
||||
|
||||
|
||||
class NerAnalysisService:
|
||||
"""Class to initialize NER model."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
use_spacy: bool = False,
|
||||
use_transformer: bool = False,
|
||||
use_companylist: bool = False,
|
||||
) -> None:
|
||||
"""Method to check which sentiment model is chosen."""
|
||||
if use_spacy:
|
||||
self.init_spacy()
|
||||
if use_transformer:
|
||||
self.init_transformer()
|
||||
if use_companylist:
|
||||
self.init_companylist()
|
||||
|
||||
def init_spacy(self) -> None:
|
||||
"""Method to initialize spaCy.
|
||||
|
||||
Optimized by ChatGPT.
|
||||
"""
|
||||
# check if model is available and load it
|
||||
SPACY_MODEL_NAME: Final[str] = "de_core_news_lg" # noqa: N806
|
||||
if not spacy.util.is_package(SPACY_MODEL_NAME):
|
||||
from spacy.cli.download import download as spacy_download
|
||||
|
||||
spacy_download(SPACY_MODEL_NAME) # type: ignore
|
||||
self.nlp = spacy.load(SPACY_MODEL_NAME)
|
||||
|
||||
def init_transformer(self) -> None:
|
||||
"""Method to initialize transformer."""
|
||||
# init NER Transformer
|
||||
self.classifier = pipeline(
|
||||
"ner",
|
||||
model="fhswf/bert_de_ner",
|
||||
grouped_entities=True,
|
||||
tokenizer="dbmdz/bert-base-german-cased",
|
||||
)
|
||||
|
||||
def init_companylist(self) -> None:
|
||||
"""Method to initialize company list."""
|
||||
with open(
|
||||
"src/aki_prj23_transparenzregister/utils/mongo/CompEntities.json", "rb"
|
||||
) as complist:
|
||||
self.complist = json.load(complist)
|
||||
|
||||
def ner_spacy(
|
||||
self, doc: dict, ent_type: str = "ORG", doc_attrib: str = "title"
|
||||
) -> dict:
|
||||
"""Named Entity Recognition with Spacy.
|
||||
|
||||
Args:
|
||||
doc: a document which is processed with spacy
|
||||
ent_type: string with specific entity (LOC - Location; PERSON - People; NORP - Nationalities or religious or political groups;
|
||||
FAC - Buildings, airports, highways, bridges, etc.; ORG - Companies, agencies, institutions, etc.;
|
||||
GPE - Countries, cities, states.; LOC - Non-GPE locations, mountain ranges, bodies of water)
|
||||
doc_attrib: which attribute of the document has to be processed: text or title
|
||||
Returns:
|
||||
list of entities.
|
||||
"""
|
||||
# init list for entities
|
||||
entities = []
|
||||
|
||||
text = doc[doc_attrib]
|
||||
|
||||
# get entities
|
||||
doc_nlp = self.nlp(text)
|
||||
|
||||
# select company
|
||||
for ent in doc_nlp.ents:
|
||||
if ent.label_ == ent_type:
|
||||
entities.append(ent.text)
|
||||
return dict(Counter(entities))
|
||||
|
||||
def ner_company_list(
|
||||
self, doc: dict, ent_type: str = "ORG", doc_attrib: str = "title"
|
||||
) -> dict:
|
||||
"""Named Entity Recognition by String comparision.
|
||||
|
||||
Args:
|
||||
doc: a dict from where entities are searched
|
||||
ent_type: type of searched entity
|
||||
doc_attrib: which attribute of the dict is searched
|
||||
Raises:
|
||||
NotImplementedError: To be defined by child classes
|
||||
Returns:
|
||||
list with entities.
|
||||
"""
|
||||
# Convert all entries in the company_list to lowercase
|
||||
self.complist = [company_name.lower() for company_name in self.complist]
|
||||
|
||||
# Create an empty list to store the entities
|
||||
entities = []
|
||||
|
||||
# Search the text for company names
|
||||
text = doc[doc_attrib]
|
||||
# Convert title to lowercase
|
||||
text = text.lower()
|
||||
|
||||
for company_name in self.complist:
|
||||
start_idx = text.find(company_name)
|
||||
if start_idx != -1: # Wort gefunden
|
||||
start_idx + len(company_name)
|
||||
entity = company_name
|
||||
if entity not in entities:
|
||||
entities.append(entity)
|
||||
|
||||
return dict(Counter(entities))
|
||||
|
||||
def ner_transformer(
|
||||
self, doc: dict, ent_type: str = "ORG", doc_attrib: str = "title"
|
||||
) -> dict:
|
||||
"""Named Entity Recognition with Transformer.
|
||||
|
||||
Args:
|
||||
doc: a string which is processed with a transformer model
|
||||
ent_type: string with specific entity (PERSON - People; NORP - Nationalities or religious or political groups;
|
||||
FAC - Buildings, airports, highways, bridges, etc.; ORG - Companies, agencies, institutions, etc.;
|
||||
GPE - Countries, cities, states.; LOC - Non-GPE locations, mountain ranges, bodies of water).
|
||||
doc_attrib: Attribute of the document (title or text)
|
||||
|
||||
Returns:
|
||||
list of entities.
|
||||
"""
|
||||
# init list for entities
|
||||
entities = []
|
||||
text = doc[doc_attrib]
|
||||
sentences = text.split(". ") # Split text into sentences based on '. '
|
||||
|
||||
# Process each sentence separately
|
||||
for sentence in sentences:
|
||||
res = self.classifier(
|
||||
sentence
|
||||
) # Assuming 'classifier' processes a single sentence at a time
|
||||
|
||||
for i in range(len(res)):
|
||||
if res[i]["entity_group"] == ent_type:
|
||||
entities.append(res[i]["word"])
|
||||
return dict(Counter(entities))
|
@ -0,0 +1,91 @@
|
||||
"""Pipeline to get sentiments from Staging DB nes articles."""
|
||||
|
||||
import json
|
||||
import os
|
||||
|
||||
from loguru import logger
|
||||
from tqdm import tqdm
|
||||
|
||||
import aki_prj23_transparenzregister.utils.mongo.connector as conn
|
||||
import aki_prj23_transparenzregister.utils.mongo.news_mongo_service as news
|
||||
from aki_prj23_transparenzregister.config.config_providers import JsonFileConfigProvider
|
||||
from aki_prj23_transparenzregister.config.config_template import MongoConnection
|
||||
from aki_prj23_transparenzregister.utils.mongo import sentiment_service
|
||||
|
||||
|
||||
class SentimentPipeline:
|
||||
"""Class to initialize sentiment Pipeline."""
|
||||
|
||||
def __init__(self, conn_string: MongoConnection) -> None:
|
||||
"""Method to connect to StagingDB."""
|
||||
self.connect_string = conn_string
|
||||
self.connect_string.database = "transparenzregister_ner"
|
||||
self.connector = conn.MongoConnector(self.connect_string)
|
||||
self.news_obj = news.MongoNewsService(self.connector)
|
||||
|
||||
def process_documents(self, doc_attrib: str, sentiment_selection: str) -> None:
|
||||
"""Method to check documents, get entities and write them to document."""
|
||||
CursorUnprogressed = self.news_obj.collection.find( # noqa: N806
|
||||
{"sentiment": {"$exists": False}}
|
||||
)
|
||||
documents = list(CursorUnprogressed)
|
||||
|
||||
if len(documents) > 0:
|
||||
for document in tqdm(documents):
|
||||
text = document[doc_attrib]
|
||||
|
||||
# Determine sentiment analysis service based on config
|
||||
if sentiment_selection == "use_spacy":
|
||||
selected_service = sentiment_service.SentimentAnalysisService(
|
||||
use_spacy=True, use_transformer=False
|
||||
)
|
||||
sentiment_service_func = selected_service.sentiment_spacy
|
||||
|
||||
elif sentiment_selection == "use_transformer":
|
||||
selected_service = sentiment_service.SentimentAnalysisService(
|
||||
use_spacy=False, use_transformer=True
|
||||
)
|
||||
sentiment_service_func = selected_service.sentiment_transformer
|
||||
|
||||
# sents = selected_service.sentiment_spacy(text)
|
||||
sents = sentiment_service_func(text)
|
||||
sentiment = {"label": sents[0], "score": sents[1]}
|
||||
self.news_obj.collection.update_one(
|
||||
{"_id": document["_id"]},
|
||||
{"$set": {"sentiment": sentiment}},
|
||||
)
|
||||
else:
|
||||
logger.info("No documents found.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Establish MongoDB Connection using secrets
|
||||
config_provider = JsonFileConfigProvider("./secrets.json")
|
||||
connect_string = config_provider.get_mongo_connection_string()
|
||||
|
||||
# dir of config json
|
||||
script_dir = os.path.dirname(__file__)
|
||||
config_file_path = os.path.join(script_dir, "ner_sentiment_config.json")
|
||||
# Load sentiment service configuration from JSON
|
||||
with open(config_file_path) as config_file:
|
||||
sentiment_config = json.load(config_file)
|
||||
# Where to search the sentiment
|
||||
doc_attrib = sentiment_config["sentiment_service"]["doc_attrib"]
|
||||
logger.info("Sentiment Pipeline: searching in document attribute ", str(doc_attrib))
|
||||
|
||||
# read selected service
|
||||
if sentiment_config["sentiment_service"]["use_spacy"] is True:
|
||||
sentiment_selection = "use_spacy"
|
||||
logger.info("Sentiment Pipleline: Searching sentiments with spaCy")
|
||||
|
||||
elif sentiment_config["sentiment_service"]["use_transformer"] is True:
|
||||
sentiment_selection = "use_transformer"
|
||||
logger.info("Sentiment Pipleline: Searching sentiments with transformer")
|
||||
|
||||
else:
|
||||
logger.info(
|
||||
"Sentiment Pipleline: No Sentiment services selected or error in configuration file."
|
||||
)
|
||||
|
||||
sentiment_pipeline = SentimentPipeline(connect_string)
|
||||
sentiment_pipeline.process_documents(doc_attrib, sentiment_selection)
|
@ -0,0 +1,170 @@
|
||||
"""Service for Sentiment analysis."""
|
||||
|
||||
import os
|
||||
import zipfile
|
||||
from typing import Final
|
||||
|
||||
import requests
|
||||
import spacy
|
||||
from loguru import logger
|
||||
from spacy_sentiws import spaCySentiWS # noqa: F401
|
||||
from transformers import pipeline
|
||||
|
||||
|
||||
class SentimentAnalysisService:
|
||||
"""Class to initialize spaCy or Transformer model."""
|
||||
|
||||
def __init__(self, use_spacy: bool = False, use_transformer: bool = False) -> None:
|
||||
"""Method to check which sentiment model is chosen."""
|
||||
if use_spacy:
|
||||
self.init_spacy()
|
||||
if use_transformer:
|
||||
self.init_transformer()
|
||||
|
||||
def init_spacy(self) -> None:
|
||||
"""Method to initialize spaCy."""
|
||||
# check if model is available and load it
|
||||
SPACY_MODEL_NAME: Final[str] = "de_core_news_lg" # noqa: N806
|
||||
if not spacy.util.is_package(SPACY_MODEL_NAME):
|
||||
cli = spacy.cli # type: ignore
|
||||
cli.download(SPACY_MODEL_NAME) # type: ignore
|
||||
self.nlp = spacy.load(SPACY_MODEL_NAME)
|
||||
|
||||
# path to spaCy vocabulary
|
||||
PATH: Final[ # noqa: N806
|
||||
str
|
||||
] = "src/aki_prj23_transparenzregister/utils/mongo/SentiWS/"
|
||||
# check if vocabulary is existing, otehrwise download it
|
||||
if not os.path.exists(PATH):
|
||||
URL: Final[ # noqa: N806
|
||||
str
|
||||
] = "https://downloads.wortschatz-leipzig.de/etc/SentiWS/SentiWS_v2.0.zip"
|
||||
logger.info("SentiWS vocabulary not found. Starting download...")
|
||||
# Create the data directory if it doesn't exist
|
||||
os.makedirs(PATH, exist_ok=True)
|
||||
|
||||
# File path for the downloaded ZIP file
|
||||
zip_file_path = os.path.join(PATH, "SentiWS_v2.0.zip")
|
||||
|
||||
# Download the ZIP file
|
||||
response = requests.get(URL) # noqa: S113
|
||||
with open(zip_file_path, "wb") as zip_file:
|
||||
zip_file.write(response.content)
|
||||
|
||||
# Extract the ZIP file
|
||||
with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
|
||||
zip_ref.extractall(PATH)
|
||||
|
||||
# Remove the downloaded ZIP file if it's no longer needed
|
||||
os.remove(zip_file_path)
|
||||
|
||||
logger.info("SentiWS data downloaded and extracted successfully.")
|
||||
else:
|
||||
logger.info("SentiWS data directory already exists.")
|
||||
|
||||
# create spaCy pipeline
|
||||
self.nlp.add_pipe("sentiws", config={"sentiws_path": PATH})
|
||||
|
||||
def init_transformer(self) -> None:
|
||||
"""Method to initialize transformer."""
|
||||
# loading the sentiment model(~ 436MB) for transformer
|
||||
self.sentiment_analyzer = pipeline(
|
||||
"sentiment-analysis", model="oliverguhr/german-sentiment-bert"
|
||||
)
|
||||
|
||||
def sentiment_spacy(self, doc: str) -> tuple:
|
||||
"""Sentiment Analytics with Spacy.
|
||||
|
||||
Args:
|
||||
doc: a document which is processed with spacy
|
||||
docAttrib: which attribute of the document has to be processed: text or title
|
||||
|
||||
Returns:
|
||||
label: positive, negative, neutral.
|
||||
"""
|
||||
# set limits for sentiments
|
||||
_upperlimit = 0.1
|
||||
_lowerlimit = -0.1
|
||||
|
||||
_doc = self.nlp(doc)
|
||||
_score = None
|
||||
_sent = None
|
||||
# init a sentiment counter
|
||||
_pos = 0
|
||||
_neg = 0
|
||||
# init a summarizer for maximum sentiment score to normalize values
|
||||
_max_score = 0
|
||||
for token in _doc:
|
||||
token_score = token._.sentiws # noqa: SLF001
|
||||
if token_score is not None:
|
||||
_max_score += abs(token_score)
|
||||
if token_score < 0:
|
||||
_neg += token_score
|
||||
if token_score > 0:
|
||||
_pos += token_score
|
||||
|
||||
# Normalize the score to the range 0..1
|
||||
_normalized_score = (_pos - abs(_neg)) / _max_score if _max_score > 0 else 0
|
||||
|
||||
if _normalized_score > _upperlimit:
|
||||
_sent = "positive"
|
||||
elif _normalized_score < _lowerlimit:
|
||||
_sent = "negative"
|
||||
else:
|
||||
_sent = "neutral"
|
||||
return _sent, abs(_normalized_score)
|
||||
|
||||
def sentiment_transformer(self, doc: str) -> tuple:
|
||||
"""Sentiment Analysis with Transformer.
|
||||
|
||||
Args:
|
||||
doc: a string which is processed with a transformer model
|
||||
|
||||
Returns:
|
||||
sentiment and score.
|
||||
"""
|
||||
sentences = doc.split(". ") # Split text into sentences based on '. '
|
||||
# init total sentiment and score counter
|
||||
total_score = 0
|
||||
total_positive_score = 0
|
||||
total_negative_score = 0
|
||||
total_neutral_score = 0
|
||||
|
||||
_score = None
|
||||
_sent = None
|
||||
|
||||
# Process each sentence separately
|
||||
for sentence in sentences:
|
||||
# get sentiment
|
||||
results = self.sentiment_analyzer(sentence)
|
||||
_score = results[0]["score"]
|
||||
_sent = results[0]["label"]
|
||||
|
||||
# sum up sepcific score
|
||||
if _sent == "positive":
|
||||
total_positive_score += _score
|
||||
elif _sent == "negative":
|
||||
total_negative_score += _score
|
||||
else:
|
||||
total_neutral_score += _score
|
||||
|
||||
# sum up total score
|
||||
total_score += _score
|
||||
|
||||
# total specific score
|
||||
total_positive_score_normalized = total_positive_score / total_score
|
||||
total_negative_score_normalized = total_negative_score / total_score
|
||||
total_neutral_score_normalized = total_neutral_score / total_score
|
||||
|
||||
if total_positive_score_normalized > total_negative_score_normalized:
|
||||
final_sentiment = "positive"
|
||||
out_score = total_positive_score_normalized
|
||||
elif total_positive_score_normalized < total_negative_score_normalized:
|
||||
final_sentiment = "negative"
|
||||
out_score = total_negative_score_normalized
|
||||
else:
|
||||
final_sentiment = "neutral"
|
||||
out_score = total_neutral_score_normalized
|
||||
|
||||
# return _sent, _score
|
||||
return final_sentiment, out_score
|
291
tests/utils/mongo/ner_pipeline_test.py
Normal file
291
tests/utils/mongo/ner_pipeline_test.py
Normal file
@ -0,0 +1,291 @@
|
||||
"""Tests for checking NER Pipeline."""
|
||||
|
||||
from unittest.mock import Mock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from aki_prj23_transparenzregister.config.config_template import MongoConnection
|
||||
from aki_prj23_transparenzregister.utils.mongo.ner_pipeline import EntityPipeline
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def mock_mongo_connection() -> MongoConnection:
|
||||
"""Mock MongoConnector class.
|
||||
|
||||
Args:
|
||||
mocker (any): Library mocker
|
||||
|
||||
Returns:
|
||||
Mock: Mocked MongoConnector
|
||||
"""
|
||||
return MongoConnection("", "", None, "" "", "")
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def mock_mongo_connector(mocker: Mock) -> Mock:
|
||||
"""Mock MongoConnector class.
|
||||
|
||||
Args:
|
||||
mocker (any): Library mocker
|
||||
|
||||
Returns:
|
||||
Mock: Mocked MongoConnector
|
||||
"""
|
||||
mock = Mock()
|
||||
mocker.patch(
|
||||
"aki_prj23_transparenzregister.utils.mongo.connector.MongoConnector",
|
||||
return_value=mock,
|
||||
)
|
||||
mock.database = {"news": Mock()}
|
||||
return mock
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def mock_spacy(mocker: Mock) -> Mock:
|
||||
"""Mock MongoConnector class.
|
||||
|
||||
Args:
|
||||
mocker (any): Library mocker
|
||||
|
||||
Returns:
|
||||
Mock: Mocked MongoConnector
|
||||
"""
|
||||
mock = Mock()
|
||||
mocker.patch(
|
||||
"aki_prj23_transparenzregister.utils.mongo.ner_service.NerAnalysisService.init_spacy",
|
||||
return_value=mock,
|
||||
)
|
||||
return mock
|
||||
|
||||
|
||||
# Mocking the NerAnalysisService methods
|
||||
@patch(
|
||||
"aki_prj23_transparenzregister.utils.mongo.ner_service.NerAnalysisService.ner_spacy"
|
||||
)
|
||||
def test_entity_pipeline_with_spacy(
|
||||
mock_ner_spacy: Mock,
|
||||
mock_mongo_connector: Mock,
|
||||
mock_mongo_connection: MongoConnection,
|
||||
mock_spacy: Mock,
|
||||
) -> None:
|
||||
# Configure the mock to return a specific NER result
|
||||
mock_ner_spacy.return_value = {"ORG": 2, "PERSON": 1}
|
||||
|
||||
# Create an instance of the EntityPipeline
|
||||
entity_pipeline = EntityPipeline(mock_mongo_connection)
|
||||
|
||||
# Mock the news collection and documents for testing
|
||||
mock_collection = Mock()
|
||||
mock_documents = [{"_id": "document1", "title": "Apple Inc is a tech company."}]
|
||||
|
||||
# Set the collection to the mock_collection
|
||||
entity_pipeline.news_obj.collection = mock_collection
|
||||
|
||||
# Mock the find method of the collection to return the mock documents
|
||||
mock_collection.find.return_value = mock_documents
|
||||
|
||||
# Call the process_documents method with spaCy NER
|
||||
entity_pipeline.process_documents(
|
||||
entity="ORG", doc_attrib="title", ner_selection="use_spacy_ner"
|
||||
)
|
||||
|
||||
# Ensure that ner_spacy was called with the correct parameters
|
||||
mock_ner_spacy.assert_called_once_with(mock_documents[0], "ORG", "title")
|
||||
|
||||
# Ensure that the document in the collection was updated with the NER results
|
||||
mock_collection.update_one.assert_called_once_with(
|
||||
{"_id": "document1"},
|
||||
{"$set": {"companies": {"ORG": 2, "PERSON": 1}}},
|
||||
)
|
||||
|
||||
|
||||
@patch(
|
||||
"aki_prj23_transparenzregister.utils.mongo.ner_service.NerAnalysisService.ner_spacy"
|
||||
)
|
||||
def test_entity_pipeline_with_spacy_no_docs(
|
||||
mock_ner_spacy: Mock,
|
||||
mock_mongo_connector: Mock,
|
||||
mock_mongo_connection: MongoConnection,
|
||||
mock_spacy: Mock,
|
||||
) -> None:
|
||||
# Configure the mock to return a specific NER result
|
||||
mock_ner_spacy.return_value = {"ORG": 2, "PERSON": 1}
|
||||
|
||||
# Create an instance of the EntityPipeline
|
||||
entity_pipeline = EntityPipeline(mock_mongo_connection)
|
||||
|
||||
# Mock the news collection and documents for testing
|
||||
mock_collection = Mock()
|
||||
mock_documents: list[dict] = []
|
||||
|
||||
# Set the collection to the mock_collection
|
||||
entity_pipeline.news_obj.collection = mock_collection
|
||||
|
||||
# Mock the find method of the collection to return the mock documents
|
||||
mock_collection.find.return_value = mock_documents
|
||||
|
||||
# Call the process_documents method with spaCy NER
|
||||
entity_pipeline.process_documents(
|
||||
entity="ORG", doc_attrib="title", ner_selection="use_spacy_ner"
|
||||
)
|
||||
|
||||
# Ensure that sentiment_spacy was not called
|
||||
mock_ner_spacy.assert_not_called()
|
||||
|
||||
# Ensure that the document in the collection was not updated
|
||||
mock_collection.assert_not_called()
|
||||
|
||||
|
||||
@patch(
|
||||
"aki_prj23_transparenzregister.utils.mongo.ner_service.NerAnalysisService.ner_company_list"
|
||||
)
|
||||
def test_entity_pipeline_with_companylist_ner(
|
||||
mock_ner_companylist: Mock,
|
||||
mock_mongo_connector: Mock,
|
||||
mock_mongo_connection: MongoConnection,
|
||||
mock_spacy: Mock,
|
||||
) -> None:
|
||||
# Konfigurieren Sie das Mock-Objekt, um ein spezifisches NER-Ergebnis zurückzugeben
|
||||
mock_ner_companylist.return_value = {"ORG": 3, "LOCATION": 2}
|
||||
|
||||
# Create an instance of the EntityPipeline
|
||||
entity_pipeline = EntityPipeline(mock_mongo_connection)
|
||||
|
||||
# Mock die News-Sammlung und Dokumente für Tests
|
||||
mock_collection = Mock()
|
||||
mock_documents = [
|
||||
{"_id": "document2", "title": "Siemens ist ein deutsches Unternehmen."}
|
||||
]
|
||||
|
||||
# Set the collection to the mock_collection
|
||||
entity_pipeline.news_obj.collection = mock_collection
|
||||
# Mock the find method of the collection to return the mock documents
|
||||
mock_collection.find.return_value = mock_documents
|
||||
|
||||
# Call the process_documents method with Company List NER
|
||||
entity_pipeline.process_documents(
|
||||
entity="ORG", doc_attrib="title", ner_selection="use_companylist_ner"
|
||||
)
|
||||
|
||||
# Überprüfen Sie, ob ner_company_list mit den richtigen Parametern aufgerufen wurde
|
||||
mock_ner_companylist.assert_called_once_with(mock_documents[0], "ORG", "title")
|
||||
|
||||
# Überprüfen Sie, ob das Dokument in der Sammlung mit den NER-Ergebnissen aktualisiert wurde
|
||||
mock_collection.update_one.assert_called_once_with(
|
||||
{"_id": "document2"},
|
||||
{"$set": {"companies": {"ORG": 3, "LOCATION": 2}}},
|
||||
)
|
||||
|
||||
|
||||
@patch(
|
||||
"aki_prj23_transparenzregister.utils.mongo.ner_service.NerAnalysisService.ner_company_list"
|
||||
)
|
||||
def test_entity_pipeline_with_companylist_ner_no_docs(
|
||||
mock_ner_companylist: Mock,
|
||||
mock_mongo_connector: Mock,
|
||||
mock_mongo_connection: MongoConnection,
|
||||
mock_spacy: Mock,
|
||||
) -> None:
|
||||
# Configure the mock to return a specific NER result
|
||||
mock_ner_companylist.return_value = {"ORG": 3, "LOCATION": 2}
|
||||
|
||||
# Create an instance of the EntityPipeline
|
||||
entity_pipeline = EntityPipeline(mock_mongo_connection)
|
||||
|
||||
# Mock die News-Sammlung und Dokumente für Tests
|
||||
mock_collection = Mock()
|
||||
mock_documents: list[dict] = []
|
||||
|
||||
# Set the collection to the mock_collection
|
||||
entity_pipeline.news_obj.collection = mock_collection
|
||||
# Mock the find method of the collection to return the mock documents
|
||||
mock_collection.find.return_value = mock_documents
|
||||
|
||||
# Call the process_documents method with Company List NER
|
||||
entity_pipeline.process_documents(
|
||||
entity="ORG", doc_attrib="title", ner_selection="use_companylist_ner"
|
||||
)
|
||||
|
||||
# Ensure that ner_company_list is not called
|
||||
mock_ner_companylist.assert_not_called()
|
||||
|
||||
# Ensure that the document in the collection was not updated
|
||||
mock_collection.update_one.assert_not_called()
|
||||
|
||||
|
||||
# Add more test cases for other NER methods (e.g., use_companylist_ner, use_transformer_ner) following a similar pattern.
|
||||
@patch(
|
||||
"aki_prj23_transparenzregister.utils.mongo.ner_service.NerAnalysisService.ner_spacy"
|
||||
)
|
||||
def test_entity_pipeline_with_transformer(
|
||||
mock_ner_transformer: Mock,
|
||||
mock_mongo_connector: Mock,
|
||||
mock_mongo_connection: MongoConnection,
|
||||
mock_spacy: Mock,
|
||||
) -> None:
|
||||
# Configure the mock to return a specific NER result
|
||||
mock_ner_transformer.return_value = {"ORG": 2, "PERSON": 1}
|
||||
|
||||
# Create an instance of the EntityPipeline
|
||||
entity_pipeline = EntityPipeline(mock_mongo_connection)
|
||||
|
||||
# Mock the news collection and documents for testing
|
||||
mock_collection = Mock()
|
||||
mock_documents = [{"_id": "document1", "title": "Apple Inc is a tech company."}]
|
||||
|
||||
# Set the collection to the mock_collection
|
||||
entity_pipeline.news_obj.collection = mock_collection
|
||||
|
||||
# Mock the find method of the collection to return the mock documents
|
||||
mock_collection.find.return_value = mock_documents
|
||||
|
||||
# Call the process_documents method with spaCy NER
|
||||
entity_pipeline.process_documents(
|
||||
entity="ORG", doc_attrib="title", ner_selection="use_spacy_ner"
|
||||
)
|
||||
|
||||
# Ensure that ner_spacy was called with the correct parameters
|
||||
mock_ner_transformer.assert_called_once_with(mock_documents[0], "ORG", "title")
|
||||
|
||||
# Ensure that the document in the collection was updated with the NER results
|
||||
mock_collection.update_one.assert_called_once_with(
|
||||
{"_id": "document1"},
|
||||
{"$set": {"companies": {"ORG": 2, "PERSON": 1}}},
|
||||
)
|
||||
|
||||
|
||||
@patch(
|
||||
"aki_prj23_transparenzregister.utils.mongo.ner_service.NerAnalysisService.ner_spacy"
|
||||
)
|
||||
def test_entity_pipeline_with_transformer_no_docs(
|
||||
mock_ner_transformer: Mock,
|
||||
mock_mongo_connector: Mock,
|
||||
mock_mongo_connection: MongoConnection,
|
||||
mock_spacy: Mock,
|
||||
) -> None:
|
||||
# Configure the mock to return a specific NER result
|
||||
mock_ner_transformer.return_value = {"ORG": 2, "PERSON": 1}
|
||||
|
||||
# Create an instance of the EntityPipeline
|
||||
entity_pipeline = EntityPipeline(mock_mongo_connection)
|
||||
|
||||
# Mock the news collection and documents for testing
|
||||
mock_collection = Mock()
|
||||
mock_documents: list[dict] = []
|
||||
|
||||
# Set the collection to the mock_collection
|
||||
entity_pipeline.news_obj.collection = mock_collection
|
||||
|
||||
# Mock the find method of the collection to return the mock documents
|
||||
mock_collection.find.return_value = mock_documents
|
||||
|
||||
# Call the process_documents method with spaCy NER
|
||||
entity_pipeline.process_documents(
|
||||
entity="ORG", doc_attrib="title", ner_selection="use_spacy_ner"
|
||||
)
|
||||
|
||||
# Ensure that ner_transformer is not called
|
||||
mock_ner_transformer.assert_not_called()
|
||||
|
||||
# Ensure that the document in the collection was not updated
|
||||
mock_collection.update_one.assert_not_called()
|
54
tests/utils/mongo/ner_service_test.py
Normal file
54
tests/utils/mongo/ner_service_test.py
Normal file
@ -0,0 +1,54 @@
|
||||
"""Tests for checking NER Services."""
|
||||
|
||||
from aki_prj23_transparenzregister.utils.mongo.ner_service import NerAnalysisService
|
||||
|
||||
|
||||
def test_ner_spacy() -> None:
|
||||
"""Mock TestNerService."""
|
||||
# Create instance of NerAnalysisService with use_spacy=True
|
||||
ner_service = NerAnalysisService(
|
||||
use_spacy=True, use_transformer=False, use_companylist=False
|
||||
)
|
||||
# 1st testing
|
||||
doc = {"title": "Siemens ist ein Unternehmen."}
|
||||
result = ner_service.ner_spacy(doc, ent_type="ORG", doc_attrib="title")
|
||||
assert result == {"Siemens": 1}
|
||||
|
||||
# 2nd testing
|
||||
doc = {"text": "BASF ist ein großes Unternehmen."}
|
||||
result = ner_service.ner_spacy(doc, ent_type="ORG", doc_attrib="text")
|
||||
assert result == {"BASF": 1}
|
||||
|
||||
|
||||
def test_ner_company_list() -> None:
|
||||
"""Mock test_ner_company."""
|
||||
# Create instance of NerAnalysisService with use_use_companylist=True
|
||||
ner_service = NerAnalysisService(
|
||||
use_spacy=False, use_transformer=False, use_companylist=True
|
||||
)
|
||||
|
||||
doc = {"title": "Siemens ist ein Unternehmen."}
|
||||
result = ner_service.ner_company_list(doc, ent_type="ORG", doc_attrib="title")
|
||||
assert result == {"siemens": 1}
|
||||
|
||||
# 2nd testing
|
||||
doc = {"text": "BASF ist ein großes Unternehmen."}
|
||||
result = ner_service.ner_company_list(doc, ent_type="ORG", doc_attrib="text")
|
||||
assert result == {"basf": 1}
|
||||
|
||||
|
||||
def test_ner_transformer() -> None:
|
||||
"""Mock test_ner_company."""
|
||||
# Create instance of NerAnalysisService with use_use_companylist=True
|
||||
ner_service = NerAnalysisService(
|
||||
use_spacy=False, use_transformer=True, use_companylist=False
|
||||
)
|
||||
|
||||
doc = {"title": "Siemens ist ein Unternehmen."}
|
||||
result = ner_service.ner_transformer(doc, ent_type="ORG", doc_attrib="title")
|
||||
assert result == {"Siemens": 1}
|
||||
|
||||
# 2nd testing
|
||||
doc = {"text": "BASF ist ein großes Unternehmen."}
|
||||
result = ner_service.ner_transformer(doc, ent_type="ORG", doc_attrib="text")
|
||||
assert result == {"BASF": 1}
|
210
tests/utils/mongo/sentiment_pipeline_test.py
Normal file
210
tests/utils/mongo/sentiment_pipeline_test.py
Normal file
@ -0,0 +1,210 @@
|
||||
"""Unit test for sentiment pipeline."""
|
||||
|
||||
from unittest.mock import Mock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from aki_prj23_transparenzregister.config.config_template import MongoConnection
|
||||
from aki_prj23_transparenzregister.utils.mongo.sentiment_pipeline import (
|
||||
SentimentPipeline,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def mock_mongo_connection() -> MongoConnection:
|
||||
"""Mock MongoConnector class.
|
||||
|
||||
Args:
|
||||
mocker (any): Library mocker
|
||||
|
||||
Returns:
|
||||
Mock: Mocked MongoConnector
|
||||
"""
|
||||
return MongoConnection("", "", None, "" "", "")
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def mock_mongo_connector(mocker: Mock) -> Mock:
|
||||
"""Mock MongoConnector class.
|
||||
|
||||
Args:
|
||||
mocker (any): Library mocker
|
||||
|
||||
Returns:
|
||||
Mock: Mocked MongoConnector
|
||||
"""
|
||||
mock = Mock()
|
||||
mocker.patch(
|
||||
"aki_prj23_transparenzregister.utils.mongo.connector.MongoConnector",
|
||||
return_value=mock,
|
||||
)
|
||||
mock.database = {"news": Mock()}
|
||||
return mock
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def mock_spacy(mocker: Mock) -> Mock:
|
||||
"""Mock MongoConnector class.
|
||||
|
||||
Args:
|
||||
mocker (any): Library mocker
|
||||
|
||||
Returns:
|
||||
Mock: Mocked MongoConnector
|
||||
"""
|
||||
mock = Mock()
|
||||
mocker.patch(
|
||||
"aki_prj23_transparenzregister.utils.mongo.sentiment_service.SentimentAnalysisService.init_spacy",
|
||||
return_value=mock,
|
||||
)
|
||||
return mock
|
||||
|
||||
|
||||
@patch(
|
||||
"aki_prj23_transparenzregister.utils.mongo.sentiment_service.SentimentAnalysisService.sentiment_spacy"
|
||||
)
|
||||
def test_sentiment_pipeline_existing_sentiment(
|
||||
mock_sentiment_spacy: Mock,
|
||||
mock_mongo_connector: Mock,
|
||||
mock_mongo_connection: MongoConnection,
|
||||
mock_spacy: Mock,
|
||||
) -> None:
|
||||
# Configure the mock to return a specific sentiment result
|
||||
mock_sentiment_spacy.return_value = ("positive", 0.8)
|
||||
|
||||
# Create an instance of the SentimentPipeline
|
||||
sentiment_pipeline = SentimentPipeline(mock_mongo_connection)
|
||||
|
||||
# Mock the news collection and documents for testing
|
||||
mock_collection = Mock()
|
||||
mock_documents = [
|
||||
{
|
||||
"_id": "document1",
|
||||
"text": "This is a positive text.",
|
||||
"sentiment": {"label": "neutral", "score": 0.5},
|
||||
}
|
||||
]
|
||||
|
||||
# Set the collection to the mock_collection
|
||||
sentiment_pipeline.news_obj.collection = mock_collection
|
||||
|
||||
# Mock the find method of the collection to return the mock documents
|
||||
mock_collection.find.return_value = mock_documents
|
||||
|
||||
# Call the process_documents method
|
||||
sentiment_pipeline.process_documents("text", "use_spacy")
|
||||
|
||||
# Ensure that sentiment_spacy was called with the correct text
|
||||
mock_sentiment_spacy.assert_called_once_with("This is a positive text.")
|
||||
|
||||
# Ensure that the document in the collection was not updated with sentiment
|
||||
# mock_collection.update_one.assert_not_called()
|
||||
|
||||
|
||||
@patch(
|
||||
"aki_prj23_transparenzregister.utils.mongo.sentiment_service.SentimentAnalysisService.sentiment_spacy"
|
||||
)
|
||||
def test_sentiment_pipeline_no_documents(
|
||||
mock_sentiment_spacy: Mock,
|
||||
mock_mongo_connector: Mock,
|
||||
mock_mongo_connection: MongoConnection,
|
||||
mock_spacy: Mock,
|
||||
) -> None:
|
||||
# Configure the mock to return a specific sentiment result
|
||||
mock_sentiment_spacy.return_value = ("positive", 0.8)
|
||||
|
||||
# Create an instance of the SentimentPipeline
|
||||
sentiment_pipeline = SentimentPipeline(mock_mongo_connection)
|
||||
|
||||
# Mock the news collection to return an empty result
|
||||
mock_collection = Mock()
|
||||
mock_collection.find.return_value = []
|
||||
|
||||
# Set the collection to the mock_collection
|
||||
sentiment_pipeline.news_obj.collection = mock_collection
|
||||
|
||||
# Call the process_documents method
|
||||
sentiment_pipeline.process_documents("text", "use_spacy")
|
||||
|
||||
# Ensure that sentiment_spacy was not called
|
||||
mock_sentiment_spacy.assert_not_called()
|
||||
|
||||
# Ensure that the document in the collection was not updated with sentiment
|
||||
mock_collection.update_one.assert_not_called()
|
||||
|
||||
|
||||
@patch(
|
||||
"aki_prj23_transparenzregister.utils.mongo.sentiment_service.SentimentAnalysisService.sentiment_spacy"
|
||||
)
|
||||
def test_sentiment_pipeline_with_spacy(
|
||||
mock_sentiment_spacy: Mock,
|
||||
mock_mongo_connector: Mock,
|
||||
mock_mongo_connection: MongoConnection,
|
||||
mock_spacy: Mock,
|
||||
) -> None:
|
||||
# Configure the mock to return a specific sentiment result
|
||||
mock_sentiment_spacy.return_value = ("positive", 0.8)
|
||||
|
||||
# Create an instance of the SentimentPipeline
|
||||
sentiment_pipeline = SentimentPipeline(mock_mongo_connection)
|
||||
|
||||
# Mock the news collection and documents for testing
|
||||
mock_collection = Mock()
|
||||
mock_documents = [{"_id": "document1", "text": "This is a positive text."}]
|
||||
|
||||
# Set the collection to the mock_collection
|
||||
sentiment_pipeline.news_obj.collection = mock_collection
|
||||
|
||||
# Mock the find method of the collection to return the mock documents
|
||||
mock_collection.find.return_value = mock_documents
|
||||
|
||||
# Call the process_documents method
|
||||
sentiment_pipeline.process_documents("text", "use_spacy")
|
||||
|
||||
# Ensure that sentiment_spacy was called with the correct text
|
||||
mock_sentiment_spacy.assert_called_once_with("This is a positive text.")
|
||||
|
||||
# Ensure that the document in the collection was updated with the sentiment result
|
||||
mock_collection.update_one.assert_called_once_with(
|
||||
{"_id": "document1"},
|
||||
{"$set": {"sentiment": {"label": "positive", "score": 0.8}}},
|
||||
)
|
||||
|
||||
|
||||
# Mocking the SentimentAnalysisService methods
|
||||
@patch(
|
||||
"aki_prj23_transparenzregister.utils.mongo.sentiment_service.SentimentAnalysisService.sentiment_transformer"
|
||||
)
|
||||
def test_sentiment_pipeline_with_transformer(
|
||||
mock_sentiment_transformer: Mock,
|
||||
mock_mongo_connector: Mock,
|
||||
mock_mongo_connection: MongoConnection,
|
||||
mock_spacy: Mock,
|
||||
) -> None:
|
||||
# Configure the mock to return a specific sentiment result
|
||||
mock_sentiment_transformer.return_value = ("negative", 0.6)
|
||||
|
||||
# Create an instance of the SentimentPipeline
|
||||
sentiment_pipeline = SentimentPipeline(mock_mongo_connection)
|
||||
|
||||
# Mock the news collection and documents for testing
|
||||
mock_collection = Mock()
|
||||
mock_documents = [{"_id": "document2", "text": "This is a negative text."}]
|
||||
|
||||
# Set the collection to the mock_collection
|
||||
sentiment_pipeline.news_obj.collection = mock_collection
|
||||
|
||||
# Mock the find method of the collection to return the mock documents
|
||||
mock_collection.find.return_value = mock_documents
|
||||
|
||||
# Call the process_documents method
|
||||
sentiment_pipeline.process_documents("text", "use_transformer")
|
||||
|
||||
# Ensure that sentiment_transformer was called with the correct text
|
||||
mock_sentiment_transformer.assert_called_once_with("This is a negative text.")
|
||||
|
||||
# Ensure that the document in the collection was updated with the sentiment result
|
||||
mock_collection.update_one.assert_called_once_with(
|
||||
{"_id": "document2"},
|
||||
{"$set": {"sentiment": {"label": "negative", "score": 0.6}}},
|
||||
)
|
78
tests/utils/mongo/sentiment_service_test.py
Normal file
78
tests/utils/mongo/sentiment_service_test.py
Normal file
@ -0,0 +1,78 @@
|
||||
"""Tests for checking Sentiment Services."""
|
||||
|
||||
|
||||
from aki_prj23_transparenzregister.utils.mongo.sentiment_service import (
|
||||
SentimentAnalysisService,
|
||||
)
|
||||
|
||||
|
||||
def test_sentiment_service_with_spacy_pos() -> None:
|
||||
"""Mock testing spaCy Sentiment Service with positive sentiment."""
|
||||
# Init SentimentAnalysisService with spaCy
|
||||
sentiment_service = SentimentAnalysisService(use_spacy=True)
|
||||
|
||||
# run the test
|
||||
text = "Dies ist ein großartiger Test. Ich liebe es!"
|
||||
sentiment, score = sentiment_service.sentiment_spacy(text)
|
||||
assert sentiment == "positive"
|
||||
assert score > 0
|
||||
|
||||
|
||||
def test_sentiment_service_with_spacy_neg() -> None:
|
||||
"""Mock testing spaCy Sentiment Service with negative sentiment."""
|
||||
# Init SentimentAnalysisService with spaCy
|
||||
sentiment_service = SentimentAnalysisService(use_spacy=True)
|
||||
|
||||
# run the test
|
||||
text = "Dies ist ein wirklich schrecklicher Test. Ich hasse ihn!"
|
||||
sentiment, score = sentiment_service.sentiment_spacy(text)
|
||||
assert sentiment == "negative"
|
||||
assert score > 0
|
||||
|
||||
|
||||
def test_sentiment_service_with_spacy_neut() -> None:
|
||||
"""Mock testing spaCy Sentiment Service with neutral sentiment."""
|
||||
# Init SentimentAnalysisService with spaCy
|
||||
sentiment_service = SentimentAnalysisService(use_spacy=True)
|
||||
|
||||
# run the test
|
||||
text = "Dies ist ein Test."
|
||||
sentiment, score = sentiment_service.sentiment_spacy(text)
|
||||
assert sentiment == "neutral"
|
||||
assert score >= 0
|
||||
|
||||
|
||||
def test_sentiment_service_with_transformer_pos() -> None:
|
||||
"""Mock testing Transformer Sentiment Service with positive Sentiment."""
|
||||
# Init SentimentAnalysisService with Transformer
|
||||
sentiment_service = SentimentAnalysisService(use_transformer=True)
|
||||
|
||||
# run the test
|
||||
text = "Dies ist ein großartiger Test. Ich liebe es!"
|
||||
sentiment, score = sentiment_service.sentiment_transformer(text)
|
||||
assert sentiment == "positive"
|
||||
assert score > 0
|
||||
|
||||
|
||||
def test_sentiment_service_with_transformer_neg() -> None:
|
||||
"""Mock testing Transformer Sentiment Service with negative Sentiment."""
|
||||
# Init SentimentAnalysisService with Transformer
|
||||
sentiment_service = SentimentAnalysisService(use_transformer=True)
|
||||
|
||||
# run the test
|
||||
text = "Dies ist ein wirklich schrecklicher Test. Ich hasse ihn!"
|
||||
sentiment, score = sentiment_service.sentiment_transformer(text)
|
||||
assert sentiment == "negative"
|
||||
assert score > 0
|
||||
|
||||
|
||||
def test_sentiment_service_with_transformer_neut() -> None:
|
||||
"""Mock testing Transformer Sentiment Service with neutral Sentiment."""
|
||||
# Init SentimentAnalysisService with Transformer
|
||||
sentiment_service = SentimentAnalysisService(use_transformer=True)
|
||||
|
||||
# run the test
|
||||
text = "Das ist ein Text, ohne besondere Stimmung."
|
||||
sentiment, score = sentiment_service.sentiment_transformer(text)
|
||||
assert sentiment == "neutral"
|
||||
assert score >= 0
|
Loading…
x
Reference in New Issue
Block a user