Simplified some functions. (#414)

This commit is contained in:
Philipp Horstenkamp 2023-11-26 15:01:12 +01:00 committed by GitHub
parent 6a584f5c10
commit 6890562a18
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 32 additions and 34 deletions

View File

@ -67,15 +67,15 @@ class EntityPipeline:
ner_service_func = ner_service_instance.ner_transformer
else:
raise ValueError
if len(documents) > 0:
for document in tqdm(documents):
ents = ner_service_func(document, "ORG", doc_attrib)
self.news_obj.collection.update_one(
{"_id": document["_id"]},
{"$set": {"companies": ents}},
)
else:
if not documents:
logger.info("No documents found.")
return
for document in tqdm(documents):
ents = ner_service_func(document, "ORG", doc_attrib)
self.news_obj.collection.update_one(
{"_id": document["_id"]},
{"$set": {"companies": ents}},
)
def execute_ner(config_provider: ConfigProvider) -> None:

View File

@ -43,33 +43,31 @@ class SentimentPipeline:
{"sentiment": {"$exists": False}}
)
documents = list(cursor_unprocessed)
logger.info(f"Documents to be processed: {cursor_unprocessed}")
if len(documents) > 0:
for document in tqdm(documents):
text = document[doc_attrib]
# Determine sentiment analysis service based on config
if sentiment_method == "spacy":
selected_service = sentiment_service.SentimentAnalysisService(
use_spacy=True, use_transformer=False
)
sentiment_service_func = selected_service.sentiment_spacy
elif sentiment_method == "transformer":
selected_service = sentiment_service.SentimentAnalysisService(
use_spacy=False, use_transformer=True
)
sentiment_service_func = selected_service.sentiment_transformer
sents = sentiment_service_func(text)
sentiment = {"label": sents[0], "score": sents[1]}
self.news_obj.collection.update_one(
{"_id": document["_id"]},
{"$set": {"sentiment": sentiment}},
)
else:
if not documents:
logger.info("No documents found.")
return
for document in tqdm(documents):
text = document[doc_attrib]
# Determine sentiment analysis service based on config
if sentiment_method == "spacy":
selected_service = sentiment_service.SentimentAnalysisService(
use_spacy=True, use_transformer=False
)
sentiment_service_func = selected_service.sentiment_spacy
elif sentiment_method == "transformer":
selected_service = sentiment_service.SentimentAnalysisService(
use_spacy=False, use_transformer=True
)
sentiment_service_func = selected_service.sentiment_transformer
sents = sentiment_service_func(text)
sentiment = {"label": sents[0], "score": sents[1]}
self.news_obj.collection.update_one(
{"_id": document["_id"]},
{"$set": {"sentiment": sentiment}},
)
def execute_sentiment(config_provider: ConfigProvider) -> None: