|
|
from transformers import pipeline |
|
|
import torch |
|
|
import re |
|
|
|
|
|
class NewsAnalyzer: |
|
|
def __init__(self, model_name=None): |
|
|
""" |
|
|
Initialize news analyzer with fast, CPU-friendly Zero-Shot pipelines |
|
|
""" |
|
|
print("Initializing Zero-Shot News Analyzer...") |
|
|
|
|
|
self.device = 0 if torch.cuda.is_available() else -1 |
|
|
print(f"Using device: {'cuda' if self.device == 0 else 'cpu'}") |
|
|
|
|
|
try: |
|
|
|
|
|
print("Loading Sentiment model...") |
|
|
self.sentiment_pipeline = pipeline( |
|
|
"sentiment-analysis", |
|
|
model="distilbert-base-uncased-finetuned-sst-2-english", |
|
|
device=self.device |
|
|
) |
|
|
|
|
|
|
|
|
print("Loading Zero-Shot model...") |
|
|
|
|
|
self.classifier_pipeline = pipeline( |
|
|
"zero-shot-classification", |
|
|
model="facebook/bart-large-mnli", |
|
|
device=self.device |
|
|
) |
|
|
|
|
|
|
|
|
print("Models loaded successfully!") |
|
|
|
|
|
|
|
|
self.theme_labels = [ |
|
|
"Earnings/Finance", "Product/Service", "Legal/Regulation", |
|
|
"Management/M&A", "Market/Economy", "Other" |
|
|
] |
|
|
self.impact_labels = ["Opportunity", "Risk", "Neutral"] |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Fatal error loading models: {e}") |
|
|
self.sentiment_pipeline = None |
|
|
self.classifier_pipeline = None |
|
|
|
|
|
def analyze_news_item(self, text): |
|
|
""" |
|
|
วิเคราะห์ข่าว (Sentiment, Theme, Impact) โดยใช้ Zero-Shot |
|
|
""" |
|
|
|
|
|
if not self.classifier_pipeline or not self.sentiment_pipeline: |
|
|
print("Error: Pipelines are not loaded.") |
|
|
return { |
|
|
"sentiment": "Neutral", "score": 0.5, "theme": "N/A", |
|
|
"impact": "N/A", "explanation": "Model loading failed" |
|
|
} |
|
|
|
|
|
if not text or len(text.strip()) == 0: |
|
|
return { |
|
|
"sentiment": "Neutral", "score": 0.5, "theme": "N/A", |
|
|
"impact": "N/A", "explanation": "No text" |
|
|
} |
|
|
|
|
|
try: |
|
|
|
|
|
sentiment_result = self.sentiment_pipeline(text[:512])[0] |
|
|
sentiment = sentiment_result['label'].capitalize() |
|
|
score = sentiment_result['score'] |
|
|
|
|
|
|
|
|
theme_result = self.classifier_pipeline( |
|
|
text[:512], |
|
|
candidate_labels=self.theme_labels |
|
|
) |
|
|
theme = theme_result['labels'][0] |
|
|
|
|
|
|
|
|
impact_result = self.classifier_pipeline( |
|
|
text[:512], |
|
|
candidate_labels=self.impact_labels |
|
|
) |
|
|
impact = impact_result['labels'][0] |
|
|
|
|
|
|
|
|
explanation = f"Classified as '{theme}' (Impact: {impact}) via zero-shot analysis." |
|
|
|
|
|
|
|
|
|
|
|
final_sentiment = "Positive" if sentiment == "Positive" else "Negative" |
|
|
|
|
|
return { |
|
|
"sentiment": final_sentiment, |
|
|
"score": score, |
|
|
"theme": theme, |
|
|
"impact": impact, |
|
|
"explanation": explanation |
|
|
} |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error in analysis: {e}") |
|
|
return { |
|
|
"sentiment": "Neutral", "score": 0.5, "theme": "N/A", |
|
|
"impact": "N/A", "explanation": "Analysis failed" |
|
|
} |
|
|
|
|
|
def analyze_batch(self, news_list): |
|
|
""" |
|
|
วิเคราะห์ sentiment หลายข่าวพร้อมกัน |
|
|
""" |
|
|
results = [] |
|
|
for news in news_list: |
|
|
combined_text = f"{news.get('title', '')} {news.get('summary', '')}" |
|
|
sentiment_result = self.analyze_news_item(combined_text) |
|
|
results.append({ |
|
|
**news, |
|
|
**sentiment_result |
|
|
}) |
|
|
return results |