try_topic / sentiment_analyzer.py
Supitn's picture
Create sentiment_analyzer.py
abaee64 verified
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
import re
class SentimentAnalyzer:
def __init__(self, model_name="google/gemma-2-2b-it"):
"""
Initialize sentiment analyzer with Gemma model
Args:
model_name: Hugging Face model name (ใช้ gemma-2-2b-it แทน 3-4b ที่ยังไม่มี)
"""
print(f"Loading model: {model_name}")
self.device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {self.device}")
try:
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
self.model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
device_map="auto" if self.device == "cuda" else None,
low_cpu_mem_usage=True
)
if self.device == "cpu":
self.model = self.model.to(self.device)
print("Model loaded successfully!")
except Exception as e:
print(f"Error loading model: {e}")
# Fallback to sentiment pipeline
self.model = None
self.sentiment_pipeline = pipeline(
"sentiment-analysis",
model="distilbert-base-uncased-finetuned-sst-2-english"
)
def analyze_sentiment(self, text):
"""
วิเคราะห์ sentiment ของข้อความ
Args:
text: ข้อความที่ต้องการวิเคราะห์
Returns:
dict: {sentiment, score, explanation}
"""
if not text or len(text.strip()) == 0:
return {
"sentiment": "Neutral",
"score": 0.5,
"explanation": "No text to analyze"
}
# ถ้า model โหลดไม่สำเร็จ ใช้ fallback pipeline
if self.model is None:
return self._fallback_sentiment(text)
try:
# สร้าง prompt สำหรับ Gemma
prompt = f"""Analyze the sentiment of this financial news. Rate it as Positive, Negative, or Neutral with a confidence score (0-1).
News: {text[:500]}
Provide your analysis in this exact format:
Sentiment: [Positive/Negative/Neutral]
Score: [0.0-1.0]
Reason: [Brief explanation]"""
# Tokenize และ generate
inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
inputs = inputs.to(self.device)
with torch.no_grad():
outputs = self.model.generate(
**inputs,
max_new_tokens=150,
temperature=0.3,
do_sample=True,
pad_token_id=self.tokenizer.eos_token_id
)
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
# Parse response
return self._parse_llm_response(response)
except Exception as e:
print(f"Error in analysis: {e}")
return self._fallback_sentiment(text)
def _parse_llm_response(self, response):
"""แยก sentiment, score และ explanation จาก LLM response"""
sentiment = "Neutral"
score = 0.5
explanation = "Unable to analyze"
try:
# Extract sentiment
if "Sentiment:" in response:
sentiment_line = re.search(r'Sentiment:\s*(\w+)', response, re.IGNORECASE)
if sentiment_line:
sentiment = sentiment_line.group(1).capitalize()
# Extract score
if "Score:" in response:
score_line = re.search(r'Score:\s*([\d.]+)', response)
if score_line:
score = float(score_line.group(1))
score = max(0.0, min(1.0, score)) # Clamp between 0-1
# Extract reason/explanation
if "Reason:" in response:
reason_match = re.search(r'Reason:\s*(.+?)(?:\n|$)', response, re.IGNORECASE)
if reason_match:
explanation = reason_match.group(1).strip()
# Validate sentiment
if sentiment not in ["Positive", "Negative", "Neutral"]:
if "positive" in response.lower():
sentiment = "Positive"
elif "negative" in response.lower():
sentiment = "Negative"
else:
sentiment = "Neutral"
except Exception as e:
print(f"Parse error: {e}")
return {
"sentiment": sentiment,
"score": score,
"explanation": explanation
}
def _fallback_sentiment(self, text):
"""Fallback method ใช้ DistilBERT"""
try:
result = self.sentiment_pipeline(text[:512])[0]
# Convert to our format
sentiment = "Positive" if result['label'] == 'POSITIVE' else "Negative"
score = result['score']
return {
"sentiment": sentiment,
"score": score,
"explanation": f"Analyzed using fallback model with {score:.2%} confidence"
}
except:
return {
"sentiment": "Neutral",
"score": 0.5,
"explanation": "Analysis unavailable"
}
def analyze_batch(self, news_list):
"""
วิเคราะห์ sentiment หลายข่าวพร้อมกัน
Args:
news_list: list ของ dict ที่มี title และ summary
Returns:
list: รายการผลการวิเคราะห์
"""
results = []
for news in news_list:
# รวม title และ summary
combined_text = f"{news.get('title', '')} {news.get('summary', '')}"
sentiment_result = self.analyze_sentiment(combined_text)
results.append({
**news,
**sentiment_result
})
return results