mirror of
https://github.com/fhswf/aki_prj23_transparenzregister.git
synced 2025-06-21 23:33:54 +02:00
Created pipeline to run ner sentiment and sql ingest (#314)
Created a dataprocessing pipline that enhances the raw mined data with Organsiation extractions and sentiment analysis prio to moving the data to the sql db. The transfer of matched data is done afterword. --------- Co-authored-by: SeZett <zeleny.sebastian@fh-swf.de>
This commit is contained in:
@ -83,9 +83,7 @@ def test_entity_pipeline_with_spacy(
|
||||
mock_collection.find.return_value = mock_documents
|
||||
|
||||
# Call the process_documents method with spaCy NER
|
||||
entity_pipeline.process_documents(
|
||||
entity="ORG", doc_attrib="title", ner_selection="use_spacy_ner"
|
||||
)
|
||||
entity_pipeline.process_documents(doc_attrib="title", ner_method="spacy")
|
||||
|
||||
# Ensure that ner_spacy was called with the correct parameters
|
||||
mock_ner_spacy.assert_called_once_with(mock_documents[0], "ORG", "title")
|
||||
@ -121,9 +119,7 @@ def test_entity_pipeline_with_spacy_no_docs(
|
||||
mock_collection.find.return_value = mock_documents
|
||||
|
||||
# Call the process_documents method with spaCy NER
|
||||
entity_pipeline.process_documents(
|
||||
entity="ORG", doc_attrib="title", ner_selection="use_spacy_ner"
|
||||
)
|
||||
entity_pipeline.process_documents(doc_attrib="title", ner_method="spacy")
|
||||
|
||||
# Ensure that sentiment_spacy was not called
|
||||
mock_ner_spacy.assert_not_called()
|
||||
@ -135,14 +131,14 @@ def test_entity_pipeline_with_spacy_no_docs(
|
||||
@patch(
|
||||
"aki_prj23_transparenzregister.ai.ner_service.NerAnalysisService.ner_company_list"
|
||||
)
|
||||
def test_entity_pipeline_with_companylist_ner(
|
||||
mock_ner_companylist: Mock,
|
||||
def test_entity_pipeline_with_company_list_ner(
|
||||
mock_ner_company_list: Mock,
|
||||
mock_mongo_connector: Mock,
|
||||
mock_mongo_connection: MongoConnection,
|
||||
mock_spacy: Mock,
|
||||
) -> None:
|
||||
# Konfigurieren Sie das Mock-Objekt, um ein spezifisches NER-Ergebnis zurückzugeben
|
||||
mock_ner_companylist.return_value = {"ORG": 3, "LOCATION": 2}
|
||||
mock_ner_company_list.return_value = {"ORG": 3, "LOCATION": 2}
|
||||
|
||||
# Create an instance of the EntityPipeline
|
||||
entity_pipeline = EntityPipeline(mock_mongo_connection)
|
||||
@ -159,12 +155,10 @@ def test_entity_pipeline_with_companylist_ner(
|
||||
mock_collection.find.return_value = mock_documents
|
||||
|
||||
# Call the process_documents method with Company List NER
|
||||
entity_pipeline.process_documents(
|
||||
entity="ORG", doc_attrib="title", ner_selection="use_companylist_ner"
|
||||
)
|
||||
entity_pipeline.process_documents(doc_attrib="title", ner_method="company_list")
|
||||
|
||||
# Überprüfen Sie, ob ner_company_list mit den richtigen Parametern aufgerufen wurde
|
||||
mock_ner_companylist.assert_called_once_with(mock_documents[0], "ORG", "title")
|
||||
mock_ner_company_list.assert_called_once_with(mock_documents[0], "ORG", "title")
|
||||
|
||||
# Überprüfen Sie, ob das Dokument in der Sammlung mit den NER-Ergebnissen aktualisiert wurde
|
||||
mock_collection.update_one.assert_called_once_with(
|
||||
@ -176,14 +170,14 @@ def test_entity_pipeline_with_companylist_ner(
|
||||
@patch(
|
||||
"aki_prj23_transparenzregister.ai.ner_service.NerAnalysisService.ner_company_list"
|
||||
)
|
||||
def test_entity_pipeline_with_companylist_ner_no_docs(
|
||||
mock_ner_companylist: Mock,
|
||||
def test_entity_pipeline_with_company_list_ner_no_docs(
|
||||
mock_ner_company_list: Mock,
|
||||
mock_mongo_connector: Mock,
|
||||
mock_mongo_connection: MongoConnection,
|
||||
mock_spacy: Mock,
|
||||
) -> None:
|
||||
# Configure the mock to return a specific NER result
|
||||
mock_ner_companylist.return_value = {"ORG": 3, "LOCATION": 2}
|
||||
mock_ner_company_list.return_value = {"ORG": 3, "LOCATION": 2}
|
||||
|
||||
# Create an instance of the EntityPipeline
|
||||
entity_pipeline = EntityPipeline(mock_mongo_connection)
|
||||
@ -198,18 +192,15 @@ def test_entity_pipeline_with_companylist_ner_no_docs(
|
||||
mock_collection.find.return_value = mock_documents
|
||||
|
||||
# Call the process_documents method with Company List NER
|
||||
entity_pipeline.process_documents(
|
||||
entity="ORG", doc_attrib="title", ner_selection="use_companylist_ner"
|
||||
)
|
||||
entity_pipeline.process_documents(doc_attrib="title", ner_method="company_list")
|
||||
|
||||
# Ensure that ner_company_list is not called
|
||||
mock_ner_companylist.assert_not_called()
|
||||
mock_ner_company_list.assert_not_called()
|
||||
|
||||
# Ensure that the document in the collection was not updated
|
||||
mock_collection.update_one.assert_not_called()
|
||||
|
||||
|
||||
# Add more test cases for other NER methods (e.g., use_companylist_ner, use_transformer_ner) following a similar pattern.
|
||||
@patch("aki_prj23_transparenzregister.ai.ner_service.NerAnalysisService.ner_spacy")
|
||||
def test_entity_pipeline_with_transformer(
|
||||
mock_ner_transformer: Mock,
|
||||
@ -234,9 +225,7 @@ def test_entity_pipeline_with_transformer(
|
||||
mock_collection.find.return_value = mock_documents
|
||||
|
||||
# Call the process_documents method with spaCy NER
|
||||
entity_pipeline.process_documents(
|
||||
entity="ORG", doc_attrib="title", ner_selection="use_spacy_ner"
|
||||
)
|
||||
entity_pipeline.process_documents(doc_attrib="title", ner_method="spacy")
|
||||
|
||||
# Ensure that ner_spacy was called with the correct parameters
|
||||
mock_ner_transformer.assert_called_once_with(mock_documents[0], "ORG", "title")
|
||||
@ -272,9 +261,7 @@ def test_entity_pipeline_with_transformer_no_docs(
|
||||
mock_collection.find.return_value = mock_documents
|
||||
|
||||
# Call the process_documents method with spaCy NER
|
||||
entity_pipeline.process_documents(
|
||||
entity="ORG", doc_attrib="title", ner_selection="use_spacy_ner"
|
||||
)
|
||||
entity_pipeline.process_documents(doc_attrib="title", ner_method="spacy")
|
||||
|
||||
# Ensure that ner_transformer is not called
|
||||
mock_ner_transformer.assert_not_called()
|
||||
|
Reference in New Issue
Block a user