mai / sentiment_analyzer.py
Sooteemon's picture
Update sentiment_analyzer.py
7b263f0 verified
from transformers import pipeline
import torch
import re
class NewsAnalyzer:
def __init__(self, model_name=None):
"""
Initialize news analyzer with fast, CPU-friendly Zero-Shot pipelines
"""
print("Initializing Zero-Shot News Analyzer...")
self.device = 0 if torch.cuda.is_available() else -1
print(f"Using device: {'cuda' if self.device == 0 else 'cpu'}")
try:
# Pipeline 1: For Sentiment Analysis
print("Loading Sentiment model...")
self.sentiment_pipeline = pipeline(
"sentiment-analysis",
model="distilbert-base-uncased-finetuned-sst-2-english",
device=self.device
)
# Pipeline 2: For Zero-Shot Classification (Theme & Impact)
print("Loading Zero-Shot model...")
# --- MODIFIED: Corrected model name ---
self.classifier_pipeline = pipeline(
"zero-shot-classification",
model="facebook/bart-large-mnli", # นี่คือโมเดลมาตรฐานที่ถูกต้อง
device=self.device
)
# --- End of modification ---
print("Models loaded successfully!")
# Define the labels for classification
self.theme_labels = [
"Earnings/Finance", "Product/Service", "Legal/Regulation",
"Management/M&A", "Market/Economy", "Other"
]
self.impact_labels = ["Opportunity", "Risk", "Neutral"]
except Exception as e:
print(f"Fatal error loading models: {e}")
self.sentiment_pipeline = None
self.classifier_pipeline = None
def analyze_news_item(self, text):
"""
วิเคราะห์ข่าว (Sentiment, Theme, Impact) โดยใช้ Zero-Shot
"""
# ตรวจสอบว่า pipeline โหลดสำเร็จหรือไม่
if not self.classifier_pipeline or not self.sentiment_pipeline:
print("Error: Pipelines are not loaded.")
return {
"sentiment": "Neutral", "score": 0.5, "theme": "N/A",
"impact": "N/A", "explanation": "Model loading failed"
}
if not text or len(text.strip()) == 0:
return {
"sentiment": "Neutral", "score": 0.5, "theme": "N/A",
"impact": "N/A", "explanation": "No text"
}
try:
# 1. Analyze Sentiment
sentiment_result = self.sentiment_pipeline(text[:512])[0]
sentiment = sentiment_result['label'].capitalize()
score = sentiment_result['score']
# 2. Analyze Theme
theme_result = self.classifier_pipeline(
text[:512],
candidate_labels=self.theme_labels
)
theme = theme_result['labels'][0]
# 3. Analyze Impact
impact_result = self.classifier_pipeline(
text[:512],
candidate_labels=self.impact_labels
)
impact = impact_result['labels'][0]
# 4. Create an explanation
explanation = f"Classified as '{theme}' (Impact: {impact}) via zero-shot analysis."
# แปลง Sentiment จาก 'Positive'/'Negative' ของ distilbert
# เป็น 'Positive'/'Negative'/'Neutral' (แต่โมเดลนี้ไม่มี Neutral)
final_sentiment = "Positive" if sentiment == "Positive" else "Negative"
return {
"sentiment": final_sentiment,
"score": score,
"theme": theme,
"impact": impact,
"explanation": explanation
}
except Exception as e:
print(f"Error in analysis: {e}")
return {
"sentiment": "Neutral", "score": 0.5, "theme": "N/A",
"impact": "N/A", "explanation": "Analysis failed"
}
def analyze_batch(self, news_list):
"""
วิเคราะห์ sentiment หลายข่าวพร้อมกัน
"""
results = []
for news in news_list:
combined_text = f"{news.get('title', '')} {news.get('summary', '')}"
sentiment_result = self.analyze_news_item(combined_text)
results.append({
**news,
**sentiment_result
})
return results