mirror of
https://github.com/fhswf/aki_prj23_transparenzregister.git
synced 2025-06-21 15:13:55 +02:00
Feature/ner (#103)
NER und Sentiment-Pipeline mit Services zur Datenextraktion. --------- Co-authored-by: Philipp Horstenkamp <philipp@horstenkamp.de> Co-authored-by: TrisNol <tristan.nolde@yahoo.de>
This commit is contained in:
1
Jupyter/NER/.$Flow_Chart_NER_Function.drawio.bkp
Normal file
1
Jupyter/NER/.$Flow_Chart_NER_Function.drawio.bkp
Normal file
@ -0,0 +1 @@
|
||||
<mxfile host="Electron" modified="2023-08-17T13:01:42.139Z" agent="5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/20.8.10 Chrome/106.0.5249.199 Electron/21.3.5 Safari/537.36" etag="3J0g9IE6MsjTAohlE4lt" version="20.8.10" type="device"><diagram id="C5RBs43oDa-KdzZeNtuy" name="Page-1">7VrbUuM4EP2aVO0+QNkxuT1uAszswlCzA7MMT1OKrTgiimVkOZf5+m3ZUmxHJjETgiHAC1FbN/c5p7stu+EMpotPHIXjL8zDtNG0vEXDOW00m7bda8E/aVmmlo7TSQ0+J57qlBmuyS+sjJayxsTDUaGjYIwKEhaNLgsC7IqCDXHO5sVuI0aLq4bIx4bh2kXUtN4ST4xTa7fZyeyfMfHHemW73UuvTJHurO4kGiOPzXMm56zhDDhjIv01XQwwlc7Tfrn9e3lLLyftT//8Gz2g7/2Lm6v/jtLJzp8yZHULHAfit6deTJb27GISx+7NQ8/+8c16uLg/Ul6IxFL7C3vgPtVkXIyZzwJEzzJrn7M48LCc1YJW1ueSsRCMNhjvsRBLxQUUCwamsZhSdRUviPghhx+3VOsud+V0oWZOGkvdCARf5gbJ5l3+WjYsaelx6f3Jm1qjwxZfqn4Ri7mLN/RzFKUR97HY4OiMMKA0zKYYNgnjOKZIkFlxc0hR3l/1y2CFHwrZJxBIbXKGaKxWItMQgJMsZ15MQZ/rNMhAlrjMx0Tg6xAlrphDoCgCOmKBUGjbcJd9n6IoUgBEgrPJSnqy90pHGT5V4ZhhLvAi5yzTo/pqR8UtFbYcJeJ5FgNsLexxTv8n1p4wODEwuNvodqvo4hJFlSovD0WiqMD7S0ZRaA8pcyep6ZzIzSu56KDd3Y4WRUNM+8id+MlGB4wyDpcCFsjAsD10PLP42qb4yuezSsOASR5FFk2MyupUM31lJBC5Lmw0imBj69RZLfj7bGoZbLpiO4fyN0a3msjUeRqX7FfPpbbBpUs0DRvNNgU39IccfvnyV0hj3weywHRBwzk3Y9eYTYdxtD1dFDCWDDpHU0Klvz5jOsOCuKgkqSBKfFj31AW4MS8nDyxJAh9a7ax1k5AV4u/+kk1X18QK81a7JNtYJdmmu69s0zEw/QropdA1LZrge2AZv9ddA6Fbd8rvVgnSbzoEv3jG19F0e8q3K4ZpxR7r2IG/AoFefxXQq1RTfpQB+2FY89AKAb3DHJ/6MR2alcAw5kFSCLBYfFQCmyqBdlkSetFKQAfCHKjfcIgIgGldHmIdsDraW+p23XWAjhRFCKj0adNKFXZgGKzXYh3rBTEoPX/r7pwW389B5+MHmBUOOh8hxrMfdG7aZE5oq+Slc5fLghHxf4aczYiHuZnctAU2kA3iGAlZWllIPj+x4b18VSHpDGwGp4ylkqGTB74miMqOwAV40GJ+8sCVNL6wwGencn4Zdp1+MIzCFXC1id8QdVXOPC5+q6B9u0T7qz557bf2pf3eh/Z3075+b7dN+806tW/WOYb2cQByxM8p+fSlJWFBwi8uS075jpK/Y7l36pa7fox5/Xrft2531ePaE6gCvOmsvVqz15BM44QatdNj6uN3tTXJS12yiukdRwINKYnGEVoXdTFzH7Z0V9V3fdK1DR+/hHRfado9qZh2W3WmXfO9tqHGAM+jn5BAd0u8iRKR6+IoWuXfK5j5aMAo1Xp9X7nW0ScL9Qm2+SHYNSFWEGy7TsGanw4Ygh3EPGL8ewAPyT4Hvcmz3irSfYgxJ3iDPEHS8tg4wiLRt0y2yVJa3h5z4ylgEalCW54v68lGBFM5GBZ7iFma5achCpIFtendRYBe3Snb/HqAjKBtUujYBdeLP/5MIUxRUkyyzIq98iuEfTnadoqebpUdY9glnrbt1r6Crfkd34d036x02yUfiTyXdKGZfZecPndlX3c7Z/8D</diagram></mxfile>
|
1
Jupyter/NER/Flow_Chart_NER_Function.drawio
Normal file
1
Jupyter/NER/Flow_Chart_NER_Function.drawio
Normal file
@ -0,0 +1 @@
|
||||
<mxfile host="Electron" modified="2023-08-20T09:25:41.321Z" agent="5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/19.0.3 Chrome/102.0.5005.63 Electron/19.0.3 Safari/537.36" etag="NgKJlrtoa61kZ5SbTZB-" version="19.0.3" type="device" pages="2"><diagram id="C5RBs43oDa-KdzZeNtuy" name="Pipeline">7VpZc6M4EP41rtp9SIojvh7XzjG7yUzNTjKbydOUDDIolhERwsf8+m2BxGEom0zs2HGSyoPV6ID+vq+7JWjZw+niiqPQ/8xcTFuW4S5a9nnLsswzy2rJf8NdppZep50aPE5c1Sk33JJfWBkNZY2Ji6NSR8EYFSQsGx0WBNgRJRvinM3L3caMllcNkYcrhlsH0ar1nrjCV09hdXP7J0w8X69sdvrplSnSndWTRD5y2bxgsi9a9pAzJtJf08UQU+k87Zf7v5f39GbSufrn3+gJfR9c33357ySd7PI5Q7JH4DgQvz31YrI0Z9eTOHbunvrmj2/G0/XjifJCJJbaX9gF96km48JnHgsQvcitA87iwMVyVgNaeZ8bxkIwmmB8xEIsFRdQLBiYfDGl6ipeEPFDDj9tq9ZD4cr5Qs2cNJa6EQi+LAySzYfitXxY0tLjGrpOuThiMXfwmn62YjDiHhZr/KooI51ZoKEC5gqzKYabhA4cUyTIrMxVpCjvZf1yWOGHQvYZBFJ3PUM0ViuRaQjASZYzN6agz1Ua5CBLXOY+Efg2RIlv5hAoyoCOWSAU2iY85cCjKIoUAJHgbJJJT/bOdPR8fGaYC7xY61F9tavClApbtkJknscAUwvbL+j/zNgRBmcVDB7Wut0ou7hGUbXKK0KRKCpw/5JRFNojypxJarok8uaVXHTQ7m1Gi6IRpgPkTLzkRoeMMg6XAhbIwLA5dGxZjZ2qGuvnaypHRRZNjMbqVDN9ZSQQhS5sPI7gxlapky34+2xqV9j0hb04lL8xuu2JTN3ncck8eC51Kly6QdOwZXUouGEw4vDLk79CGnsekAWmC1r2ZTV2+Ww6iqPN6aKEsWTQJZoSKv31CdMZFsRBNUkFUeLBuucOwI15PXlgSRJ40OrkrbuErBB/d5dsena3lGzanZpsY9Rkm96usk23gulXQC+FzjJogu+RZfx+bwWE3r5Tfq9JkH7TIfjVM76OpptTvtkwTCv2GKc2/JUIdPhVQL9RTflRBuyGYdaxFQL6Dgt8GsR0VK0ERjEPkkKAxeKjElhXCXTqktCrVgI6EBZA/YZDRABM4+YY64DsaE8fWFr7rgN0pChDQKVPLSNV2JFhsFqLdY1XxKD2QK734rR4tAed6w4wNx50Ni2ztn7Que6uC0LLkpfOXQ4LxsT7GXI2Iy7m1eSmLXAD+SCOkZCllYHk/omNHuWrCklnYDN4yZdKhk4uOJ8gKjsCF2Cjxbxkw5U0PrPAY+dyfhl27UEwisIMuGMSv1HSvlmj/axPUfvtXWm//6H9Z2lfv6bbpP2mBfDraL9a51S0jwOQI96m5NOXloQFCb+4LDnlO0r+juXe3bfc9Tbm8PW+Zd1uXY8rO1AFuGWvvFozV5BMA4ca9aJt6rrH3JDkpS5Zw/SOI4FGlER+hFZFXc7cxy3drPren3TNio9fQ7qHkXbPGqbd9kGl3ep77YoaAzyPfkICfVniTZSIHAdHUZZ/v8DMJ0NGqdbr+8q1tj5Z2J9grXcs2HZDwXYOSrDVTwcqgh3GPGL8ewCbZI+D3uRZbxPpPsWYE7xGniBpeWwcYZHoWybbZCktb5c58RTAiVShLc+X9WRjgqkcDIs9xSzN8tMQBcmC2vTuIkB/3ym7+vUAGUO7SqFTB1wv/vgzhTBFSTHJqFbsjV8h7MrRpl32dLvuGMOs8bRptncVbKvf8X1I981Kt1Pzkci2pAvN/LvkdN+Vf91tX/wP</diagram><diagram id="KGNsg-YwxBcfRzNuZPCy" name="Service">dZHBDoIwDIafZnfYFPGMiBdPHDwvrLIlg5IxA/r0QjbEBU126L7/77q2hGXNWBjeySsK0IRGYiTsRCiNd5SS+UTi6Uia7B2ojRLetIJSvcDDyNOHEtAHRouorepCWGHbQmUDxo3BIbTdUYdVO17DBpQV11t6U8JKT+PkuAoXULX0pVN6cELDF7PvpJdc4PCFWE5YZhCti5oxAz0Pb5mLyzv/UT8fM9DaHwlTsL49XYINsfwN</diagram></mxfile>
|
38
Jupyter/NER/NER-Pipeline.md
Normal file
38
Jupyter/NER/NER-Pipeline.md
Normal file
@ -0,0 +1,38 @@
|
||||
```mermaid
|
||||
flowchart LR
|
||||
DBConnect["`**Mongo Connect**
|
||||
- create connection string
|
||||
- establish connection`"]
|
||||
|
||||
DBRead["`**Mongo Read**
|
||||
- read database
|
||||
- get fields without attribute 'companies'`"]
|
||||
|
||||
NER["`**NERService**
|
||||
- process news article
|
||||
- get entities`"]
|
||||
|
||||
DBUpdate["`**Mongo Update Documents**
|
||||
- update processed documents
|
||||
- add an attribute 'companies'`"]
|
||||
|
||||
id1[["`**NERSpacy**
|
||||
Named Entitiy Recognition with spaCy`"]]
|
||||
|
||||
id2[["`**NERCompanyList**
|
||||
Named Entitiy Recognition by comparing text with list`"]]
|
||||
|
||||
id3[["`**NERTransformer**
|
||||
Named Entitiy Recognition with transformer`"]]
|
||||
|
||||
DBConnect-->DBRead-->NER
|
||||
NER--select service-->id1
|
||||
NER--select service-->id2
|
||||
NER--select service-->id3
|
||||
|
||||
id1-->DBUpdate
|
||||
id2-->DBUpdate
|
||||
id3-->DBUpdate
|
||||
|
||||
|
||||
```
|
1898
Jupyter/NER/NER_Pipeline.ipynb
Normal file
1898
Jupyter/NER/NER_Pipeline.ipynb
Normal file
File diff suppressed because one or more lines are too long
1066
Jupyter/NER/NER_from_StagingDB.ipynb
Normal file
1066
Jupyter/NER/NER_from_StagingDB.ipynb
Normal file
File diff suppressed because one or more lines are too long
32
Jupyter/NER/Sentiment-Pipeline.md
Normal file
32
Jupyter/NER/Sentiment-Pipeline.md
Normal file
@ -0,0 +1,32 @@
|
||||
```mermaid
|
||||
flowchart LR
|
||||
DBConnect["`**Mongo Connect**
|
||||
- create connection string
|
||||
- establish connection`"]
|
||||
|
||||
DBRead["`**Mongo Read**
|
||||
- read database
|
||||
- get fields without attribute 'companies'`"]
|
||||
|
||||
NER["`**SentimentService**
|
||||
- process news article
|
||||
- get sentiment`"]
|
||||
|
||||
DBUpdate["`**Mongo Update Documents**
|
||||
- update processed documents
|
||||
- add an attribute 'sentiment'`"]
|
||||
|
||||
id1[["`**SentimentSpacy**
|
||||
Sentiment analysis with spaCy`"]]
|
||||
|
||||
|
||||
id3[["`**SentimentTransformer**
|
||||
Sentiment analysis with transformer`"]]
|
||||
|
||||
DBConnect-->DBRead-->NER
|
||||
NER--select service-->id1
|
||||
NER--select service-->id3
|
||||
|
||||
id1-->DBUpdate
|
||||
id3-->DBUpdate
|
||||
```
|
952
Jupyter/NER/Sentiment_Pipeline.ipynb
Normal file
952
Jupyter/NER/Sentiment_Pipeline.ipynb
Normal file
File diff suppressed because one or more lines are too long
2424
Jupyter/Sentiment_Company_Matching/Name_Matching.ipynb
Normal file
2424
Jupyter/Sentiment_Company_Matching/Name_Matching.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user