aki_prj23_transparenzregister/tests/ai/ner_pipeline_test.py
Philipp Horstenkamp 066800123d
Created pipeline to run ner sentiment and sql ingest (#314)
Created a dataprocessing pipline that enhances the raw mined data with
Organsiation extractions and sentiment analysis prio to moving the data
to the sql db.
The transfer of matched data is done afterword.

---------

Co-authored-by: SeZett <zeleny.sebastian@fh-swf.de>
2023-11-11 13:28:12 +00:00

271 lines
9.1 KiB
Python

"""Tests for checking NER Pipeline."""
from unittest.mock import Mock, patch
import pytest
from aki_prj23_transparenzregister.ai.ner_pipeline import EntityPipeline
from aki_prj23_transparenzregister.config.config_template import MongoConnection
@pytest.fixture()
def mock_mongo_connection() -> MongoConnection:
"""Mock MongoConnector class.
Args:
mocker (any): Library mocker
Returns:
Mock: Mocked MongoConnector
"""
return MongoConnection("", "", None, "" "", "")
@pytest.fixture()
def mock_mongo_connector(mocker: Mock) -> Mock:
"""Mock MongoConnector class.
Args:
mocker (any): Library mocker
Returns:
Mock: Mocked MongoConnector
"""
mock = Mock()
mocker.patch(
"aki_prj23_transparenzregister.utils.mongo.connector.MongoConnector",
return_value=mock,
)
mock.database = {"news": Mock()}
return mock
@pytest.fixture()
def mock_spacy(mocker: Mock) -> Mock:
"""Mock MongoConnector class.
Args:
mocker (any): Library mocker
Returns:
Mock: Mocked MongoConnector
"""
mock = Mock()
mocker.patch(
"aki_prj23_transparenzregister.ai.ner_service.NerAnalysisService.init_spacy",
return_value=mock,
)
return mock
# Mocking the NerAnalysisService methods
@patch("aki_prj23_transparenzregister.ai.ner_service.NerAnalysisService.ner_spacy")
def test_entity_pipeline_with_spacy(
mock_ner_spacy: Mock,
mock_mongo_connector: Mock,
mock_mongo_connection: MongoConnection,
mock_spacy: Mock,
) -> None:
# Configure the mock to return a specific NER result
mock_ner_spacy.return_value = {"ORG": 2, "PERSON": 1}
# Create an instance of the EntityPipeline
entity_pipeline = EntityPipeline(mock_mongo_connection)
# Mock the news collection and documents for testing
mock_collection = Mock()
mock_documents = [{"_id": "document1", "title": "Apple Inc is a tech company."}]
# Set the collection to the mock_collection
entity_pipeline.news_obj.collection = mock_collection
# Mock the find method of the collection to return the mock documents
mock_collection.find.return_value = mock_documents
# Call the process_documents method with spaCy NER
entity_pipeline.process_documents(doc_attrib="title", ner_method="spacy")
# Ensure that ner_spacy was called with the correct parameters
mock_ner_spacy.assert_called_once_with(mock_documents[0], "ORG", "title")
# Ensure that the document in the collection was updated with the NER results
mock_collection.update_one.assert_called_once_with(
{"_id": "document1"},
{"$set": {"companies": {"ORG": 2, "PERSON": 1}}},
)
@patch("aki_prj23_transparenzregister.ai.ner_service.NerAnalysisService.ner_spacy")
def test_entity_pipeline_with_spacy_no_docs(
mock_ner_spacy: Mock,
mock_mongo_connector: Mock,
mock_mongo_connection: MongoConnection,
mock_spacy: Mock,
) -> None:
# Configure the mock to return a specific NER result
mock_ner_spacy.return_value = {"ORG": 2, "PERSON": 1}
# Create an instance of the EntityPipeline
entity_pipeline = EntityPipeline(mock_mongo_connection)
# Mock the news collection and documents for testing
mock_collection = Mock()
mock_documents: list[dict] = []
# Set the collection to the mock_collection
entity_pipeline.news_obj.collection = mock_collection
# Mock the find method of the collection to return the mock documents
mock_collection.find.return_value = mock_documents
# Call the process_documents method with spaCy NER
entity_pipeline.process_documents(doc_attrib="title", ner_method="spacy")
# Ensure that sentiment_spacy was not called
mock_ner_spacy.assert_not_called()
# Ensure that the document in the collection was not updated
mock_collection.assert_not_called()
@patch(
"aki_prj23_transparenzregister.ai.ner_service.NerAnalysisService.ner_company_list"
)
def test_entity_pipeline_with_company_list_ner(
mock_ner_company_list: Mock,
mock_mongo_connector: Mock,
mock_mongo_connection: MongoConnection,
mock_spacy: Mock,
) -> None:
# Konfigurieren Sie das Mock-Objekt, um ein spezifisches NER-Ergebnis zurückzugeben
mock_ner_company_list.return_value = {"ORG": 3, "LOCATION": 2}
# Create an instance of the EntityPipeline
entity_pipeline = EntityPipeline(mock_mongo_connection)
# Mock die News-Sammlung und Dokumente für Tests
mock_collection = Mock()
mock_documents = [
{"_id": "document2", "title": "Siemens ist ein deutsches Unternehmen."}
]
# Set the collection to the mock_collection
entity_pipeline.news_obj.collection = mock_collection
# Mock the find method of the collection to return the mock documents
mock_collection.find.return_value = mock_documents
# Call the process_documents method with Company List NER
entity_pipeline.process_documents(doc_attrib="title", ner_method="company_list")
# Überprüfen Sie, ob ner_company_list mit den richtigen Parametern aufgerufen wurde
mock_ner_company_list.assert_called_once_with(mock_documents[0], "ORG", "title")
# Überprüfen Sie, ob das Dokument in der Sammlung mit den NER-Ergebnissen aktualisiert wurde
mock_collection.update_one.assert_called_once_with(
{"_id": "document2"},
{"$set": {"companies": {"ORG": 3, "LOCATION": 2}}},
)
@patch(
"aki_prj23_transparenzregister.ai.ner_service.NerAnalysisService.ner_company_list"
)
def test_entity_pipeline_with_company_list_ner_no_docs(
mock_ner_company_list: Mock,
mock_mongo_connector: Mock,
mock_mongo_connection: MongoConnection,
mock_spacy: Mock,
) -> None:
# Configure the mock to return a specific NER result
mock_ner_company_list.return_value = {"ORG": 3, "LOCATION": 2}
# Create an instance of the EntityPipeline
entity_pipeline = EntityPipeline(mock_mongo_connection)
# Mock die News-Sammlung und Dokumente für Tests
mock_collection = Mock()
mock_documents: list[dict] = []
# Set the collection to the mock_collection
entity_pipeline.news_obj.collection = mock_collection
# Mock the find method of the collection to return the mock documents
mock_collection.find.return_value = mock_documents
# Call the process_documents method with Company List NER
entity_pipeline.process_documents(doc_attrib="title", ner_method="company_list")
# Ensure that ner_company_list is not called
mock_ner_company_list.assert_not_called()
# Ensure that the document in the collection was not updated
mock_collection.update_one.assert_not_called()
@patch("aki_prj23_transparenzregister.ai.ner_service.NerAnalysisService.ner_spacy")
def test_entity_pipeline_with_transformer(
mock_ner_transformer: Mock,
mock_mongo_connector: Mock,
mock_mongo_connection: MongoConnection,
mock_spacy: Mock,
) -> None:
# Configure the mock to return a specific NER result
mock_ner_transformer.return_value = {"ORG": 2, "PERSON": 1}
# Create an instance of the EntityPipeline
entity_pipeline = EntityPipeline(mock_mongo_connection)
# Mock the news collection and documents for testing
mock_collection = Mock()
mock_documents = [{"_id": "document1", "title": "Apple Inc is a tech company."}]
# Set the collection to the mock_collection
entity_pipeline.news_obj.collection = mock_collection
# Mock the find method of the collection to return the mock documents
mock_collection.find.return_value = mock_documents
# Call the process_documents method with spaCy NER
entity_pipeline.process_documents(doc_attrib="title", ner_method="spacy")
# Ensure that ner_spacy was called with the correct parameters
mock_ner_transformer.assert_called_once_with(mock_documents[0], "ORG", "title")
# Ensure that the document in the collection was updated with the NER results
mock_collection.update_one.assert_called_once_with(
{"_id": "document1"},
{"$set": {"companies": {"ORG": 2, "PERSON": 1}}},
)
@patch("aki_prj23_transparenzregister.ai.ner_service.NerAnalysisService.ner_spacy")
def test_entity_pipeline_with_transformer_no_docs(
mock_ner_transformer: Mock,
mock_mongo_connector: Mock,
mock_mongo_connection: MongoConnection,
mock_spacy: Mock,
) -> None:
# Configure the mock to return a specific NER result
mock_ner_transformer.return_value = {"ORG": 2, "PERSON": 1}
# Create an instance of the EntityPipeline
entity_pipeline = EntityPipeline(mock_mongo_connection)
# Mock the news collection and documents for testing
mock_collection = Mock()
mock_documents: list[dict] = []
# Set the collection to the mock_collection
entity_pipeline.news_obj.collection = mock_collection
# Mock the find method of the collection to return the mock documents
mock_collection.find.return_value = mock_documents
# Call the process_documents method with spaCy NER
entity_pipeline.process_documents(doc_attrib="title", ner_method="spacy")
# Ensure that ner_transformer is not called
mock_ner_transformer.assert_not_called()
# Ensure that the document in the collection was not updated
mock_collection.update_one.assert_not_called()