|
|
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline |
|
|
import torch |
|
|
import re |
|
|
|
|
|
class SentimentAnalyzer: |
|
|
def __init__(self, model_name="google/gemma-2-2b-it"): |
|
|
""" |
|
|
Initialize sentiment analyzer with Gemma model |
|
|
|
|
|
Args: |
|
|
model_name: Hugging Face model name (ใช้ gemma-2-2b-it แทน 3-4b ที่ยังไม่มี) |
|
|
""" |
|
|
print(f"Loading model: {model_name}") |
|
|
|
|
|
self.device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
print(f"Using device: {self.device}") |
|
|
|
|
|
try: |
|
|
self.tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
self.model = AutoModelForCausalLM.from_pretrained( |
|
|
model_name, |
|
|
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32, |
|
|
device_map="auto" if self.device == "cuda" else None, |
|
|
low_cpu_mem_usage=True |
|
|
) |
|
|
|
|
|
if self.device == "cpu": |
|
|
self.model = self.model.to(self.device) |
|
|
|
|
|
print("Model loaded successfully!") |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error loading model: {e}") |
|
|
|
|
|
self.model = None |
|
|
self.sentiment_pipeline = pipeline( |
|
|
"sentiment-analysis", |
|
|
model="distilbert-base-uncased-finetuned-sst-2-english" |
|
|
) |
|
|
|
|
|
def analyze_sentiment(self, text): |
|
|
""" |
|
|
วิเคราะห์ sentiment ของข้อความ |
|
|
|
|
|
Args: |
|
|
text: ข้อความที่ต้องการวิเคราะห์ |
|
|
|
|
|
Returns: |
|
|
dict: {sentiment, score, explanation} |
|
|
""" |
|
|
if not text or len(text.strip()) == 0: |
|
|
return { |
|
|
"sentiment": "Neutral", |
|
|
"score": 0.5, |
|
|
"explanation": "No text to analyze" |
|
|
} |
|
|
|
|
|
|
|
|
if self.model is None: |
|
|
return self._fallback_sentiment(text) |
|
|
|
|
|
try: |
|
|
|
|
|
prompt = f"""Analyze the sentiment of this financial news. Rate it as Positive, Negative, or Neutral with a confidence score (0-1). |
|
|
News: {text[:500]} |
|
|
Provide your analysis in this exact format: |
|
|
Sentiment: [Positive/Negative/Neutral] |
|
|
Score: [0.0-1.0] |
|
|
Reason: [Brief explanation]""" |
|
|
|
|
|
|
|
|
inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512) |
|
|
inputs = inputs.to(self.device) |
|
|
|
|
|
with torch.no_grad(): |
|
|
outputs = self.model.generate( |
|
|
**inputs, |
|
|
max_new_tokens=150, |
|
|
temperature=0.3, |
|
|
do_sample=True, |
|
|
pad_token_id=self.tokenizer.eos_token_id |
|
|
) |
|
|
|
|
|
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
|
|
|
|
|
return self._parse_llm_response(response) |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error in analysis: {e}") |
|
|
return self._fallback_sentiment(text) |
|
|
|
|
|
def _parse_llm_response(self, response): |
|
|
"""แยก sentiment, score และ explanation จาก LLM response""" |
|
|
sentiment = "Neutral" |
|
|
score = 0.5 |
|
|
explanation = "Unable to analyze" |
|
|
|
|
|
try: |
|
|
|
|
|
if "Sentiment:" in response: |
|
|
sentiment_line = re.search(r'Sentiment:\s*(\w+)', response, re.IGNORECASE) |
|
|
if sentiment_line: |
|
|
sentiment = sentiment_line.group(1).capitalize() |
|
|
|
|
|
|
|
|
if "Score:" in response: |
|
|
score_line = re.search(r'Score:\s*([\d.]+)', response) |
|
|
if score_line: |
|
|
score = float(score_line.group(1)) |
|
|
score = max(0.0, min(1.0, score)) |
|
|
|
|
|
|
|
|
if "Reason:" in response: |
|
|
reason_match = re.search(r'Reason:\s*(.+?)(?:\n|$)', response, re.IGNORECASE) |
|
|
if reason_match: |
|
|
explanation = reason_match.group(1).strip() |
|
|
|
|
|
|
|
|
if sentiment not in ["Positive", "Negative", "Neutral"]: |
|
|
if "positive" in response.lower(): |
|
|
sentiment = "Positive" |
|
|
elif "negative" in response.lower(): |
|
|
sentiment = "Negative" |
|
|
else: |
|
|
sentiment = "Neutral" |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Parse error: {e}") |
|
|
|
|
|
return { |
|
|
"sentiment": sentiment, |
|
|
"score": score, |
|
|
"explanation": explanation |
|
|
} |
|
|
|
|
|
def _fallback_sentiment(self, text): |
|
|
"""Fallback method ใช้ DistilBERT""" |
|
|
try: |
|
|
result = self.sentiment_pipeline(text[:512])[0] |
|
|
|
|
|
|
|
|
sentiment = "Positive" if result['label'] == 'POSITIVE' else "Negative" |
|
|
score = result['score'] |
|
|
|
|
|
return { |
|
|
"sentiment": sentiment, |
|
|
"score": score, |
|
|
"explanation": f"Analyzed using fallback model with {score:.2%} confidence" |
|
|
} |
|
|
except: |
|
|
return { |
|
|
"sentiment": "Neutral", |
|
|
"score": 0.5, |
|
|
"explanation": "Analysis unavailable" |
|
|
} |
|
|
|
|
|
def analyze_batch(self, news_list): |
|
|
""" |
|
|
วิเคราะห์ sentiment หลายข่าวพร้อมกัน |
|
|
|
|
|
Args: |
|
|
news_list: list ของ dict ที่มี title และ summary |
|
|
|
|
|
Returns: |
|
|
list: รายการผลการวิเคราะห์ |
|
|
""" |
|
|
results = [] |
|
|
|
|
|
for news in news_list: |
|
|
|
|
|
combined_text = f"{news.get('title', '')} {news.get('summary', '')}" |
|
|
|
|
|
sentiment_result = self.analyze_sentiment(combined_text) |
|
|
|
|
|
results.append({ |
|
|
**news, |
|
|
**sentiment_result |
|
|
}) |
|
|
|
|
|
return results |