Feature/ner (#103)

NER und Sentiment-Pipeline mit Services zur Datenextraktion.

---------

Co-authored-by: Philipp Horstenkamp <philipp@horstenkamp.de>
Co-authored-by: TrisNol <tristan.nolde@yahoo.de>
This commit is contained in:
Sebastian
2023-10-16 19:54:24 +02:00
committed by GitHub
parent 99b61e7c2e
commit c680ac9759
28 changed files with 12509 additions and 10 deletions

View File

@ -0,0 +1,54 @@
"""Tests for checking NER Services."""
from aki_prj23_transparenzregister.utils.mongo.ner_service import NerAnalysisService
def test_ner_spacy() -> None:
"""Mock TestNerService."""
# Create instance of NerAnalysisService with use_spacy=True
ner_service = NerAnalysisService(
use_spacy=True, use_transformer=False, use_companylist=False
)
# 1st testing
doc = {"title": "Siemens ist ein Unternehmen."}
result = ner_service.ner_spacy(doc, ent_type="ORG", doc_attrib="title")
assert result == {"Siemens": 1}
# 2nd testing
doc = {"text": "BASF ist ein großes Unternehmen."}
result = ner_service.ner_spacy(doc, ent_type="ORG", doc_attrib="text")
assert result == {"BASF": 1}
def test_ner_company_list() -> None:
"""Mock test_ner_company."""
# Create instance of NerAnalysisService with use_use_companylist=True
ner_service = NerAnalysisService(
use_spacy=False, use_transformer=False, use_companylist=True
)
doc = {"title": "Siemens ist ein Unternehmen."}
result = ner_service.ner_company_list(doc, ent_type="ORG", doc_attrib="title")
assert result == {"siemens": 1}
# 2nd testing
doc = {"text": "BASF ist ein großes Unternehmen."}
result = ner_service.ner_company_list(doc, ent_type="ORG", doc_attrib="text")
assert result == {"basf": 1}
def test_ner_transformer() -> None:
"""Mock test_ner_company."""
# Create instance of NerAnalysisService with use_use_companylist=True
ner_service = NerAnalysisService(
use_spacy=False, use_transformer=True, use_companylist=False
)
doc = {"title": "Siemens ist ein Unternehmen."}
result = ner_service.ner_transformer(doc, ent_type="ORG", doc_attrib="title")
assert result == {"Siemens": 1}
# 2nd testing
doc = {"text": "BASF ist ein großes Unternehmen."}
result = ner_service.ner_transformer(doc, ent_type="ORG", doc_attrib="text")
assert result == {"BASF": 1}