mirror of
https://github.com/fhswf/aki_prj23_transparenzregister.git
synced 2025-04-22 16:12:55 +02:00
Created a dataprocessing pipline that enhances the raw mined data with Organsiation extractions and sentiment analysis prio to moving the data to the sql db. The transfer of matched data is done afterword. --------- Co-authored-by: SeZett <zeleny.sebastian@fh-swf.de>
271 lines
9.1 KiB
Python
271 lines
9.1 KiB
Python
"""Tests for checking NER Pipeline."""
|
|
|
|
from unittest.mock import Mock, patch
|
|
|
|
import pytest
|
|
|
|
from aki_prj23_transparenzregister.ai.ner_pipeline import EntityPipeline
|
|
from aki_prj23_transparenzregister.config.config_template import MongoConnection
|
|
|
|
|
|
@pytest.fixture()
|
|
def mock_mongo_connection() -> MongoConnection:
|
|
"""Mock MongoConnector class.
|
|
|
|
Args:
|
|
mocker (any): Library mocker
|
|
|
|
Returns:
|
|
Mock: Mocked MongoConnector
|
|
"""
|
|
return MongoConnection("", "", None, "" "", "")
|
|
|
|
|
|
@pytest.fixture()
|
|
def mock_mongo_connector(mocker: Mock) -> Mock:
|
|
"""Mock MongoConnector class.
|
|
|
|
Args:
|
|
mocker (any): Library mocker
|
|
|
|
Returns:
|
|
Mock: Mocked MongoConnector
|
|
"""
|
|
mock = Mock()
|
|
mocker.patch(
|
|
"aki_prj23_transparenzregister.utils.mongo.connector.MongoConnector",
|
|
return_value=mock,
|
|
)
|
|
mock.database = {"news": Mock()}
|
|
return mock
|
|
|
|
|
|
@pytest.fixture()
|
|
def mock_spacy(mocker: Mock) -> Mock:
|
|
"""Mock MongoConnector class.
|
|
|
|
Args:
|
|
mocker (any): Library mocker
|
|
|
|
Returns:
|
|
Mock: Mocked MongoConnector
|
|
"""
|
|
mock = Mock()
|
|
mocker.patch(
|
|
"aki_prj23_transparenzregister.ai.ner_service.NerAnalysisService.init_spacy",
|
|
return_value=mock,
|
|
)
|
|
return mock
|
|
|
|
|
|
# Mocking the NerAnalysisService methods
|
|
@patch("aki_prj23_transparenzregister.ai.ner_service.NerAnalysisService.ner_spacy")
|
|
def test_entity_pipeline_with_spacy(
|
|
mock_ner_spacy: Mock,
|
|
mock_mongo_connector: Mock,
|
|
mock_mongo_connection: MongoConnection,
|
|
mock_spacy: Mock,
|
|
) -> None:
|
|
# Configure the mock to return a specific NER result
|
|
mock_ner_spacy.return_value = {"ORG": 2, "PERSON": 1}
|
|
|
|
# Create an instance of the EntityPipeline
|
|
entity_pipeline = EntityPipeline(mock_mongo_connection)
|
|
|
|
# Mock the news collection and documents for testing
|
|
mock_collection = Mock()
|
|
mock_documents = [{"_id": "document1", "title": "Apple Inc is a tech company."}]
|
|
|
|
# Set the collection to the mock_collection
|
|
entity_pipeline.news_obj.collection = mock_collection
|
|
|
|
# Mock the find method of the collection to return the mock documents
|
|
mock_collection.find.return_value = mock_documents
|
|
|
|
# Call the process_documents method with spaCy NER
|
|
entity_pipeline.process_documents(doc_attrib="title", ner_method="spacy")
|
|
|
|
# Ensure that ner_spacy was called with the correct parameters
|
|
mock_ner_spacy.assert_called_once_with(mock_documents[0], "ORG", "title")
|
|
|
|
# Ensure that the document in the collection was updated with the NER results
|
|
mock_collection.update_one.assert_called_once_with(
|
|
{"_id": "document1"},
|
|
{"$set": {"companies": {"ORG": 2, "PERSON": 1}}},
|
|
)
|
|
|
|
|
|
@patch("aki_prj23_transparenzregister.ai.ner_service.NerAnalysisService.ner_spacy")
|
|
def test_entity_pipeline_with_spacy_no_docs(
|
|
mock_ner_spacy: Mock,
|
|
mock_mongo_connector: Mock,
|
|
mock_mongo_connection: MongoConnection,
|
|
mock_spacy: Mock,
|
|
) -> None:
|
|
# Configure the mock to return a specific NER result
|
|
mock_ner_spacy.return_value = {"ORG": 2, "PERSON": 1}
|
|
|
|
# Create an instance of the EntityPipeline
|
|
entity_pipeline = EntityPipeline(mock_mongo_connection)
|
|
|
|
# Mock the news collection and documents for testing
|
|
mock_collection = Mock()
|
|
mock_documents: list[dict] = []
|
|
|
|
# Set the collection to the mock_collection
|
|
entity_pipeline.news_obj.collection = mock_collection
|
|
|
|
# Mock the find method of the collection to return the mock documents
|
|
mock_collection.find.return_value = mock_documents
|
|
|
|
# Call the process_documents method with spaCy NER
|
|
entity_pipeline.process_documents(doc_attrib="title", ner_method="spacy")
|
|
|
|
# Ensure that sentiment_spacy was not called
|
|
mock_ner_spacy.assert_not_called()
|
|
|
|
# Ensure that the document in the collection was not updated
|
|
mock_collection.assert_not_called()
|
|
|
|
|
|
@patch(
|
|
"aki_prj23_transparenzregister.ai.ner_service.NerAnalysisService.ner_company_list"
|
|
)
|
|
def test_entity_pipeline_with_company_list_ner(
|
|
mock_ner_company_list: Mock,
|
|
mock_mongo_connector: Mock,
|
|
mock_mongo_connection: MongoConnection,
|
|
mock_spacy: Mock,
|
|
) -> None:
|
|
# Konfigurieren Sie das Mock-Objekt, um ein spezifisches NER-Ergebnis zurückzugeben
|
|
mock_ner_company_list.return_value = {"ORG": 3, "LOCATION": 2}
|
|
|
|
# Create an instance of the EntityPipeline
|
|
entity_pipeline = EntityPipeline(mock_mongo_connection)
|
|
|
|
# Mock die News-Sammlung und Dokumente für Tests
|
|
mock_collection = Mock()
|
|
mock_documents = [
|
|
{"_id": "document2", "title": "Siemens ist ein deutsches Unternehmen."}
|
|
]
|
|
|
|
# Set the collection to the mock_collection
|
|
entity_pipeline.news_obj.collection = mock_collection
|
|
# Mock the find method of the collection to return the mock documents
|
|
mock_collection.find.return_value = mock_documents
|
|
|
|
# Call the process_documents method with Company List NER
|
|
entity_pipeline.process_documents(doc_attrib="title", ner_method="company_list")
|
|
|
|
# Überprüfen Sie, ob ner_company_list mit den richtigen Parametern aufgerufen wurde
|
|
mock_ner_company_list.assert_called_once_with(mock_documents[0], "ORG", "title")
|
|
|
|
# Überprüfen Sie, ob das Dokument in der Sammlung mit den NER-Ergebnissen aktualisiert wurde
|
|
mock_collection.update_one.assert_called_once_with(
|
|
{"_id": "document2"},
|
|
{"$set": {"companies": {"ORG": 3, "LOCATION": 2}}},
|
|
)
|
|
|
|
|
|
@patch(
|
|
"aki_prj23_transparenzregister.ai.ner_service.NerAnalysisService.ner_company_list"
|
|
)
|
|
def test_entity_pipeline_with_company_list_ner_no_docs(
|
|
mock_ner_company_list: Mock,
|
|
mock_mongo_connector: Mock,
|
|
mock_mongo_connection: MongoConnection,
|
|
mock_spacy: Mock,
|
|
) -> None:
|
|
# Configure the mock to return a specific NER result
|
|
mock_ner_company_list.return_value = {"ORG": 3, "LOCATION": 2}
|
|
|
|
# Create an instance of the EntityPipeline
|
|
entity_pipeline = EntityPipeline(mock_mongo_connection)
|
|
|
|
# Mock die News-Sammlung und Dokumente für Tests
|
|
mock_collection = Mock()
|
|
mock_documents: list[dict] = []
|
|
|
|
# Set the collection to the mock_collection
|
|
entity_pipeline.news_obj.collection = mock_collection
|
|
# Mock the find method of the collection to return the mock documents
|
|
mock_collection.find.return_value = mock_documents
|
|
|
|
# Call the process_documents method with Company List NER
|
|
entity_pipeline.process_documents(doc_attrib="title", ner_method="company_list")
|
|
|
|
# Ensure that ner_company_list is not called
|
|
mock_ner_company_list.assert_not_called()
|
|
|
|
# Ensure that the document in the collection was not updated
|
|
mock_collection.update_one.assert_not_called()
|
|
|
|
|
|
@patch("aki_prj23_transparenzregister.ai.ner_service.NerAnalysisService.ner_spacy")
|
|
def test_entity_pipeline_with_transformer(
|
|
mock_ner_transformer: Mock,
|
|
mock_mongo_connector: Mock,
|
|
mock_mongo_connection: MongoConnection,
|
|
mock_spacy: Mock,
|
|
) -> None:
|
|
# Configure the mock to return a specific NER result
|
|
mock_ner_transformer.return_value = {"ORG": 2, "PERSON": 1}
|
|
|
|
# Create an instance of the EntityPipeline
|
|
entity_pipeline = EntityPipeline(mock_mongo_connection)
|
|
|
|
# Mock the news collection and documents for testing
|
|
mock_collection = Mock()
|
|
mock_documents = [{"_id": "document1", "title": "Apple Inc is a tech company."}]
|
|
|
|
# Set the collection to the mock_collection
|
|
entity_pipeline.news_obj.collection = mock_collection
|
|
|
|
# Mock the find method of the collection to return the mock documents
|
|
mock_collection.find.return_value = mock_documents
|
|
|
|
# Call the process_documents method with spaCy NER
|
|
entity_pipeline.process_documents(doc_attrib="title", ner_method="spacy")
|
|
|
|
# Ensure that ner_spacy was called with the correct parameters
|
|
mock_ner_transformer.assert_called_once_with(mock_documents[0], "ORG", "title")
|
|
|
|
# Ensure that the document in the collection was updated with the NER results
|
|
mock_collection.update_one.assert_called_once_with(
|
|
{"_id": "document1"},
|
|
{"$set": {"companies": {"ORG": 2, "PERSON": 1}}},
|
|
)
|
|
|
|
|
|
@patch("aki_prj23_transparenzregister.ai.ner_service.NerAnalysisService.ner_spacy")
|
|
def test_entity_pipeline_with_transformer_no_docs(
|
|
mock_ner_transformer: Mock,
|
|
mock_mongo_connector: Mock,
|
|
mock_mongo_connection: MongoConnection,
|
|
mock_spacy: Mock,
|
|
) -> None:
|
|
# Configure the mock to return a specific NER result
|
|
mock_ner_transformer.return_value = {"ORG": 2, "PERSON": 1}
|
|
|
|
# Create an instance of the EntityPipeline
|
|
entity_pipeline = EntityPipeline(mock_mongo_connection)
|
|
|
|
# Mock the news collection and documents for testing
|
|
mock_collection = Mock()
|
|
mock_documents: list[dict] = []
|
|
|
|
# Set the collection to the mock_collection
|
|
entity_pipeline.news_obj.collection = mock_collection
|
|
|
|
# Mock the find method of the collection to return the mock documents
|
|
mock_collection.find.return_value = mock_documents
|
|
|
|
# Call the process_documents method with spaCy NER
|
|
entity_pipeline.process_documents(doc_attrib="title", ner_method="spacy")
|
|
|
|
# Ensure that ner_transformer is not called
|
|
mock_ner_transformer.assert_not_called()
|
|
|
|
# Ensure that the document in the collection was not updated
|
|
mock_collection.update_one.assert_not_called()
|