File size: 6,634 Bytes
abaee64 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
import re
class SentimentAnalyzer:
def __init__(self, model_name="google/gemma-2-2b-it"):
"""
Initialize sentiment analyzer with Gemma model
Args:
model_name: Hugging Face model name (ใช้ gemma-2-2b-it แทน 3-4b ที่ยังไม่มี)
"""
print(f"Loading model: {model_name}")
self.device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {self.device}")
try:
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
self.model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
device_map="auto" if self.device == "cuda" else None,
low_cpu_mem_usage=True
)
if self.device == "cpu":
self.model = self.model.to(self.device)
print("Model loaded successfully!")
except Exception as e:
print(f"Error loading model: {e}")
# Fallback to sentiment pipeline
self.model = None
self.sentiment_pipeline = pipeline(
"sentiment-analysis",
model="distilbert-base-uncased-finetuned-sst-2-english"
)
def analyze_sentiment(self, text):
"""
วิเคราะห์ sentiment ของข้อความ
Args:
text: ข้อความที่ต้องการวิเคราะห์
Returns:
dict: {sentiment, score, explanation}
"""
if not text or len(text.strip()) == 0:
return {
"sentiment": "Neutral",
"score": 0.5,
"explanation": "No text to analyze"
}
# ถ้า model โหลดไม่สำเร็จ ใช้ fallback pipeline
if self.model is None:
return self._fallback_sentiment(text)
try:
# สร้าง prompt สำหรับ Gemma
prompt = f"""Analyze the sentiment of this financial news. Rate it as Positive, Negative, or Neutral with a confidence score (0-1).
News: {text[:500]}
Provide your analysis in this exact format:
Sentiment: [Positive/Negative/Neutral]
Score: [0.0-1.0]
Reason: [Brief explanation]"""
# Tokenize และ generate
inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
inputs = inputs.to(self.device)
with torch.no_grad():
outputs = self.model.generate(
**inputs,
max_new_tokens=150,
temperature=0.3,
do_sample=True,
pad_token_id=self.tokenizer.eos_token_id
)
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
# Parse response
return self._parse_llm_response(response)
except Exception as e:
print(f"Error in analysis: {e}")
return self._fallback_sentiment(text)
def _parse_llm_response(self, response):
"""แยก sentiment, score และ explanation จาก LLM response"""
sentiment = "Neutral"
score = 0.5
explanation = "Unable to analyze"
try:
# Extract sentiment
if "Sentiment:" in response:
sentiment_line = re.search(r'Sentiment:\s*(\w+)', response, re.IGNORECASE)
if sentiment_line:
sentiment = sentiment_line.group(1).capitalize()
# Extract score
if "Score:" in response:
score_line = re.search(r'Score:\s*([\d.]+)', response)
if score_line:
score = float(score_line.group(1))
score = max(0.0, min(1.0, score)) # Clamp between 0-1
# Extract reason/explanation
if "Reason:" in response:
reason_match = re.search(r'Reason:\s*(.+?)(?:\n|$)', response, re.IGNORECASE)
if reason_match:
explanation = reason_match.group(1).strip()
# Validate sentiment
if sentiment not in ["Positive", "Negative", "Neutral"]:
if "positive" in response.lower():
sentiment = "Positive"
elif "negative" in response.lower():
sentiment = "Negative"
else:
sentiment = "Neutral"
except Exception as e:
print(f"Parse error: {e}")
return {
"sentiment": sentiment,
"score": score,
"explanation": explanation
}
def _fallback_sentiment(self, text):
"""Fallback method ใช้ DistilBERT"""
try:
result = self.sentiment_pipeline(text[:512])[0]
# Convert to our format
sentiment = "Positive" if result['label'] == 'POSITIVE' else "Negative"
score = result['score']
return {
"sentiment": sentiment,
"score": score,
"explanation": f"Analyzed using fallback model with {score:.2%} confidence"
}
except:
return {
"sentiment": "Neutral",
"score": 0.5,
"explanation": "Analysis unavailable"
}
def analyze_batch(self, news_list):
"""
วิเคราะห์ sentiment หลายข่าวพร้อมกัน
Args:
news_list: list ของ dict ที่มี title และ summary
Returns:
list: รายการผลการวิเคราะห์
"""
results = []
for news in news_list:
# รวม title และ summary
combined_text = f"{news.get('title', '')} {news.get('summary', '')}"
sentiment_result = self.analyze_sentiment(combined_text)
results.append({
**news,
**sentiment_result
})
return results |