aki_prj23_transparenzregister/tests/ai/ner_pipeline_test.py

284 lines
9.5 KiB
Python

"""Tests for checking NER Pipeline."""
from unittest.mock import Mock, patch
import pytest
from aki_prj23_transparenzregister.ai.ner_pipeline import EntityPipeline
from aki_prj23_transparenzregister.config.config_template import MongoConnection
@pytest.fixture()
def mock_mongo_connection() -> MongoConnection:
"""Mock MongoConnector class.
Args:
mocker (any): Library mocker
Returns:
Mock: Mocked MongoConnector
"""
return MongoConnection("", "", None, "" "", "")
@pytest.fixture()
def mock_mongo_connector(mocker: Mock) -> Mock:
"""Mock MongoConnector class.
Args:
mocker (any): Library mocker
Returns:
Mock: Mocked MongoConnector
"""
mock = Mock()
mocker.patch(
"aki_prj23_transparenzregister.utils.mongo.connector.MongoConnector",
return_value=mock,
)
mock.database = {"news": Mock()}
return mock
@pytest.fixture()
def mock_spacy(mocker: Mock) -> Mock:
"""Mock MongoConnector class.
Args:
mocker (any): Library mocker
Returns:
Mock: Mocked MongoConnector
"""
mock = Mock()
mocker.patch(
"aki_prj23_transparenzregister.ai.ner_service.NerAnalysisService.init_spacy",
return_value=mock,
)
return mock
# Mocking the NerAnalysisService methods
@patch("aki_prj23_transparenzregister.ai.ner_service.NerAnalysisService.ner_spacy")
def test_entity_pipeline_with_spacy(
mock_ner_spacy: Mock,
mock_mongo_connector: Mock,
mock_mongo_connection: MongoConnection,
mock_spacy: Mock,
) -> None:
# Configure the mock to return a specific NER result
mock_ner_spacy.return_value = {"ORG": 2, "PERSON": 1}
# Create an instance of the EntityPipeline
entity_pipeline = EntityPipeline(mock_mongo_connection)
# Mock the news collection and documents for testing
mock_collection = Mock()
mock_documents = [{"_id": "document1", "title": "Apple Inc is a tech company."}]
# Set the collection to the mock_collection
entity_pipeline.news_obj.collection = mock_collection
# Mock the find method of the collection to return the mock documents
mock_collection.find.return_value = mock_documents
# Call the process_documents method with spaCy NER
entity_pipeline.process_documents(
entity="ORG", doc_attrib="title", ner_selection="use_spacy_ner"
)
# Ensure that ner_spacy was called with the correct parameters
mock_ner_spacy.assert_called_once_with(mock_documents[0], "ORG", "title")
# Ensure that the document in the collection was updated with the NER results
mock_collection.update_one.assert_called_once_with(
{"_id": "document1"},
{"$set": {"companies": {"ORG": 2, "PERSON": 1}}},
)
@patch("aki_prj23_transparenzregister.ai.ner_service.NerAnalysisService.ner_spacy")
def test_entity_pipeline_with_spacy_no_docs(
mock_ner_spacy: Mock,
mock_mongo_connector: Mock,
mock_mongo_connection: MongoConnection,
mock_spacy: Mock,
) -> None:
# Configure the mock to return a specific NER result
mock_ner_spacy.return_value = {"ORG": 2, "PERSON": 1}
# Create an instance of the EntityPipeline
entity_pipeline = EntityPipeline(mock_mongo_connection)
# Mock the news collection and documents for testing
mock_collection = Mock()
mock_documents: list[dict] = []
# Set the collection to the mock_collection
entity_pipeline.news_obj.collection = mock_collection
# Mock the find method of the collection to return the mock documents
mock_collection.find.return_value = mock_documents
# Call the process_documents method with spaCy NER
entity_pipeline.process_documents(
entity="ORG", doc_attrib="title", ner_selection="use_spacy_ner"
)
# Ensure that sentiment_spacy was not called
mock_ner_spacy.assert_not_called()
# Ensure that the document in the collection was not updated
mock_collection.assert_not_called()
@patch(
"aki_prj23_transparenzregister.ai.ner_service.NerAnalysisService.ner_company_list"
)
def test_entity_pipeline_with_companylist_ner(
mock_ner_companylist: Mock,
mock_mongo_connector: Mock,
mock_mongo_connection: MongoConnection,
mock_spacy: Mock,
) -> None:
# Konfigurieren Sie das Mock-Objekt, um ein spezifisches NER-Ergebnis zurückzugeben
mock_ner_companylist.return_value = {"ORG": 3, "LOCATION": 2}
# Create an instance of the EntityPipeline
entity_pipeline = EntityPipeline(mock_mongo_connection)
# Mock die News-Sammlung und Dokumente für Tests
mock_collection = Mock()
mock_documents = [
{"_id": "document2", "title": "Siemens ist ein deutsches Unternehmen."}
]
# Set the collection to the mock_collection
entity_pipeline.news_obj.collection = mock_collection
# Mock the find method of the collection to return the mock documents
mock_collection.find.return_value = mock_documents
# Call the process_documents method with Company List NER
entity_pipeline.process_documents(
entity="ORG", doc_attrib="title", ner_selection="use_companylist_ner"
)
# Überprüfen Sie, ob ner_company_list mit den richtigen Parametern aufgerufen wurde
mock_ner_companylist.assert_called_once_with(mock_documents[0], "ORG", "title")
# Überprüfen Sie, ob das Dokument in der Sammlung mit den NER-Ergebnissen aktualisiert wurde
mock_collection.update_one.assert_called_once_with(
{"_id": "document2"},
{"$set": {"companies": {"ORG": 3, "LOCATION": 2}}},
)
@patch(
"aki_prj23_transparenzregister.ai.ner_service.NerAnalysisService.ner_company_list"
)
def test_entity_pipeline_with_companylist_ner_no_docs(
mock_ner_companylist: Mock,
mock_mongo_connector: Mock,
mock_mongo_connection: MongoConnection,
mock_spacy: Mock,
) -> None:
# Configure the mock to return a specific NER result
mock_ner_companylist.return_value = {"ORG": 3, "LOCATION": 2}
# Create an instance of the EntityPipeline
entity_pipeline = EntityPipeline(mock_mongo_connection)
# Mock die News-Sammlung und Dokumente für Tests
mock_collection = Mock()
mock_documents: list[dict] = []
# Set the collection to the mock_collection
entity_pipeline.news_obj.collection = mock_collection
# Mock the find method of the collection to return the mock documents
mock_collection.find.return_value = mock_documents
# Call the process_documents method with Company List NER
entity_pipeline.process_documents(
entity="ORG", doc_attrib="title", ner_selection="use_companylist_ner"
)
# Ensure that ner_company_list is not called
mock_ner_companylist.assert_not_called()
# Ensure that the document in the collection was not updated
mock_collection.update_one.assert_not_called()
# Add more test cases for other NER methods (e.g., use_companylist_ner, use_transformer_ner) following a similar pattern.
@patch("aki_prj23_transparenzregister.ai.ner_service.NerAnalysisService.ner_spacy")
def test_entity_pipeline_with_transformer(
mock_ner_transformer: Mock,
mock_mongo_connector: Mock,
mock_mongo_connection: MongoConnection,
mock_spacy: Mock,
) -> None:
# Configure the mock to return a specific NER result
mock_ner_transformer.return_value = {"ORG": 2, "PERSON": 1}
# Create an instance of the EntityPipeline
entity_pipeline = EntityPipeline(mock_mongo_connection)
# Mock the news collection and documents for testing
mock_collection = Mock()
mock_documents = [{"_id": "document1", "title": "Apple Inc is a tech company."}]
# Set the collection to the mock_collection
entity_pipeline.news_obj.collection = mock_collection
# Mock the find method of the collection to return the mock documents
mock_collection.find.return_value = mock_documents
# Call the process_documents method with spaCy NER
entity_pipeline.process_documents(
entity="ORG", doc_attrib="title", ner_selection="use_spacy_ner"
)
# Ensure that ner_spacy was called with the correct parameters
mock_ner_transformer.assert_called_once_with(mock_documents[0], "ORG", "title")
# Ensure that the document in the collection was updated with the NER results
mock_collection.update_one.assert_called_once_with(
{"_id": "document1"},
{"$set": {"companies": {"ORG": 2, "PERSON": 1}}},
)
@patch("aki_prj23_transparenzregister.ai.ner_service.NerAnalysisService.ner_spacy")
def test_entity_pipeline_with_transformer_no_docs(
mock_ner_transformer: Mock,
mock_mongo_connector: Mock,
mock_mongo_connection: MongoConnection,
mock_spacy: Mock,
) -> None:
# Configure the mock to return a specific NER result
mock_ner_transformer.return_value = {"ORG": 2, "PERSON": 1}
# Create an instance of the EntityPipeline
entity_pipeline = EntityPipeline(mock_mongo_connection)
# Mock the news collection and documents for testing
mock_collection = Mock()
mock_documents: list[dict] = []
# Set the collection to the mock_collection
entity_pipeline.news_obj.collection = mock_collection
# Mock the find method of the collection to return the mock documents
mock_collection.find.return_value = mock_documents
# Call the process_documents method with spaCy NER
entity_pipeline.process_documents(
entity="ORG", doc_attrib="title", ner_selection="use_spacy_ner"
)
# Ensure that ner_transformer is not called
mock_ner_transformer.assert_not_called()
# Ensure that the document in the collection was not updated
mock_collection.update_one.assert_not_called()