diff --git a/src/aki_prj23_transparenzregister/ai/ner_pipeline.py b/src/aki_prj23_transparenzregister/ai/ner_pipeline.py index c90f7c5..f98ef6a 100644 --- a/src/aki_prj23_transparenzregister/ai/ner_pipeline.py +++ b/src/aki_prj23_transparenzregister/ai/ner_pipeline.py @@ -67,15 +67,15 @@ class EntityPipeline: ner_service_func = ner_service_instance.ner_transformer else: raise ValueError - if len(documents) > 0: - for document in tqdm(documents): - ents = ner_service_func(document, "ORG", doc_attrib) - self.news_obj.collection.update_one( - {"_id": document["_id"]}, - {"$set": {"companies": ents}}, - ) - else: + if not documents: logger.info("No documents found.") + return + for document in tqdm(documents): + ents = ner_service_func(document, "ORG", doc_attrib) + self.news_obj.collection.update_one( + {"_id": document["_id"]}, + {"$set": {"companies": ents}}, + ) def execute_ner(config_provider: ConfigProvider) -> None: diff --git a/src/aki_prj23_transparenzregister/ai/sentiment_pipeline.py b/src/aki_prj23_transparenzregister/ai/sentiment_pipeline.py index 88c9a91..31d5f56 100644 --- a/src/aki_prj23_transparenzregister/ai/sentiment_pipeline.py +++ b/src/aki_prj23_transparenzregister/ai/sentiment_pipeline.py @@ -43,33 +43,31 @@ class SentimentPipeline: {"sentiment": {"$exists": False}} ) documents = list(cursor_unprocessed) - logger.info(f"Documents to be processed: {cursor_unprocessed}") - - if len(documents) > 0: - for document in tqdm(documents): - text = document[doc_attrib] - - # Determine sentiment analysis service based on config - if sentiment_method == "spacy": - selected_service = sentiment_service.SentimentAnalysisService( - use_spacy=True, use_transformer=False - ) - sentiment_service_func = selected_service.sentiment_spacy - - elif sentiment_method == "transformer": - selected_service = sentiment_service.SentimentAnalysisService( - use_spacy=False, use_transformer=True - ) - sentiment_service_func = selected_service.sentiment_transformer - - sents = sentiment_service_func(text) - sentiment = {"label": sents[0], "score": sents[1]} - self.news_obj.collection.update_one( - {"_id": document["_id"]}, - {"$set": {"sentiment": sentiment}}, - ) - else: + if not documents: logger.info("No documents found.") + return + for document in tqdm(documents): + text = document[doc_attrib] + + # Determine sentiment analysis service based on config + if sentiment_method == "spacy": + selected_service = sentiment_service.SentimentAnalysisService( + use_spacy=True, use_transformer=False + ) + sentiment_service_func = selected_service.sentiment_spacy + + elif sentiment_method == "transformer": + selected_service = sentiment_service.SentimentAnalysisService( + use_spacy=False, use_transformer=True + ) + sentiment_service_func = selected_service.sentiment_transformer + + sents = sentiment_service_func(text) + sentiment = {"label": sents[0], "score": sents[1]} + self.news_obj.collection.update_one( + {"_id": document["_id"]}, + {"$set": {"sentiment": sentiment}}, + ) def execute_sentiment(config_provider: ConfigProvider) -> None: