Supitn commited on
Commit
abaee64
·
verified ·
1 Parent(s): d08f4c4

Create sentiment_analyzer.py

Browse files
Files changed (1) hide show
  1. sentiment_analyzer.py +181 -0
sentiment_analyzer.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
2
+ import torch
3
+ import re
4
+
5
+ class SentimentAnalyzer:
6
+ def __init__(self, model_name="google/gemma-2-2b-it"):
7
+ """
8
+ Initialize sentiment analyzer with Gemma model
9
+
10
+ Args:
11
+ model_name: Hugging Face model name (ใช้ gemma-2-2b-it แทน 3-4b ที่ยังไม่มี)
12
+ """
13
+ print(f"Loading model: {model_name}")
14
+
15
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
16
+ print(f"Using device: {self.device}")
17
+
18
+ try:
19
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name)
20
+ self.model = AutoModelForCausalLM.from_pretrained(
21
+ model_name,
22
+ torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
23
+ device_map="auto" if self.device == "cuda" else None,
24
+ low_cpu_mem_usage=True
25
+ )
26
+
27
+ if self.device == "cpu":
28
+ self.model = self.model.to(self.device)
29
+
30
+ print("Model loaded successfully!")
31
+
32
+ except Exception as e:
33
+ print(f"Error loading model: {e}")
34
+ # Fallback to sentiment pipeline
35
+ self.model = None
36
+ self.sentiment_pipeline = pipeline(
37
+ "sentiment-analysis",
38
+ model="distilbert-base-uncased-finetuned-sst-2-english"
39
+ )
40
+
41
+ def analyze_sentiment(self, text):
42
+ """
43
+ วิเคราะห์ sentiment ของข้อความ
44
+
45
+ Args:
46
+ text: ข้อความที่ต้องการวิเคราะห์
47
+
48
+ Returns:
49
+ dict: {sentiment, score, explanation}
50
+ """
51
+ if not text or len(text.strip()) == 0:
52
+ return {
53
+ "sentiment": "Neutral",
54
+ "score": 0.5,
55
+ "explanation": "No text to analyze"
56
+ }
57
+
58
+ # ถ้า model โหลดไม่สำเร็จ ใช้ fallback pipeline
59
+ if self.model is None:
60
+ return self._fallback_sentiment(text)
61
+
62
+ try:
63
+ # สร้าง prompt สำหรับ Gemma
64
+ prompt = f"""Analyze the sentiment of this financial news. Rate it as Positive, Negative, or Neutral with a confidence score (0-1).
65
+ News: {text[:500]}
66
+ Provide your analysis in this exact format:
67
+ Sentiment: [Positive/Negative/Neutral]
68
+ Score: [0.0-1.0]
69
+ Reason: [Brief explanation]"""
70
+
71
+ # Tokenize และ generate
72
+ inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
73
+ inputs = inputs.to(self.device)
74
+
75
+ with torch.no_grad():
76
+ outputs = self.model.generate(
77
+ **inputs,
78
+ max_new_tokens=150,
79
+ temperature=0.3,
80
+ do_sample=True,
81
+ pad_token_id=self.tokenizer.eos_token_id
82
+ )
83
+
84
+ response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
85
+
86
+ # Parse response
87
+ return self._parse_llm_response(response)
88
+
89
+ except Exception as e:
90
+ print(f"Error in analysis: {e}")
91
+ return self._fallback_sentiment(text)
92
+
93
+ def _parse_llm_response(self, response):
94
+ """แยก sentiment, score และ explanation จาก LLM response"""
95
+ sentiment = "Neutral"
96
+ score = 0.5
97
+ explanation = "Unable to analyze"
98
+
99
+ try:
100
+ # Extract sentiment
101
+ if "Sentiment:" in response:
102
+ sentiment_line = re.search(r'Sentiment:\s*(\w+)', response, re.IGNORECASE)
103
+ if sentiment_line:
104
+ sentiment = sentiment_line.group(1).capitalize()
105
+
106
+ # Extract score
107
+ if "Score:" in response:
108
+ score_line = re.search(r'Score:\s*([\d.]+)', response)
109
+ if score_line:
110
+ score = float(score_line.group(1))
111
+ score = max(0.0, min(1.0, score)) # Clamp between 0-1
112
+
113
+ # Extract reason/explanation
114
+ if "Reason:" in response:
115
+ reason_match = re.search(r'Reason:\s*(.+?)(?:\n|$)', response, re.IGNORECASE)
116
+ if reason_match:
117
+ explanation = reason_match.group(1).strip()
118
+
119
+ # Validate sentiment
120
+ if sentiment not in ["Positive", "Negative", "Neutral"]:
121
+ if "positive" in response.lower():
122
+ sentiment = "Positive"
123
+ elif "negative" in response.lower():
124
+ sentiment = "Negative"
125
+ else:
126
+ sentiment = "Neutral"
127
+
128
+ except Exception as e:
129
+ print(f"Parse error: {e}")
130
+
131
+ return {
132
+ "sentiment": sentiment,
133
+ "score": score,
134
+ "explanation": explanation
135
+ }
136
+
137
+ def _fallback_sentiment(self, text):
138
+ """Fallback method ใช้ DistilBERT"""
139
+ try:
140
+ result = self.sentiment_pipeline(text[:512])[0]
141
+
142
+ # Convert to our format
143
+ sentiment = "Positive" if result['label'] == 'POSITIVE' else "Negative"
144
+ score = result['score']
145
+
146
+ return {
147
+ "sentiment": sentiment,
148
+ "score": score,
149
+ "explanation": f"Analyzed using fallback model with {score:.2%} confidence"
150
+ }
151
+ except:
152
+ return {
153
+ "sentiment": "Neutral",
154
+ "score": 0.5,
155
+ "explanation": "Analysis unavailable"
156
+ }
157
+
158
+ def analyze_batch(self, news_list):
159
+ """
160
+ วิเคราะห์ sentiment หลายข่าวพร้อมกัน
161
+
162
+ Args:
163
+ news_list: list ของ dict ที่มี title และ summary
164
+
165
+ Returns:
166
+ list: รายการผลการวิเคราะห์
167
+ """
168
+ results = []
169
+
170
+ for news in news_list:
171
+ # รวม title และ summary
172
+ combined_text = f"{news.get('title', '')} {news.get('summary', '')}"
173
+
174
+ sentiment_result = self.analyze_sentiment(combined_text)
175
+
176
+ results.append({
177
+ **news,
178
+ **sentiment_result
179
+ })
180
+
181
+ return results