Warisamm748 commited on
Commit
488f611
·
verified ·
1 Parent(s): c8dffe6

Create sentiment_analyzer.py

Browse files
Files changed (1) hide show
  1. sentiment_analyzer.py +183 -0
sentiment_analyzer.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
2
+ import torch
3
+ import re
4
+
5
+ class SentimentAnalyzer:
6
+ def __init__(self, model_name="google/gemma-2-2b-it"):
7
+ """
8
+ Initialize sentiment analyzer with Gemma model
9
+
10
+ Args:
11
+ model_name: Hugging Face model name (ใช้ gemma-2-2b-it แทน 3-4b ที่ยังไม่มี)
12
+ """
13
+ print(f"Loading model: {model_name}")
14
+
15
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
16
+ print(f"Using device: {self.device}")
17
+
18
+ try:
19
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name)
20
+ self.model = AutoModelForCausalLM.from_pretrained(
21
+ model_name,
22
+ torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
23
+ device_map="auto" if self.device == "cuda" else None,
24
+ low_cpu_mem_usage=True
25
+ )
26
+
27
+ if self.device == "cpu":
28
+ self.model = self.model.to(self.device)
29
+
30
+ print("Model loaded successfully!")
31
+
32
+ except Exception as e:
33
+ print(f"Error loading model: {e}")
34
+ # Fallback to sentiment pipeline
35
+ self.model = None
36
+ self.sentiment_pipeline = pipeline(
37
+ "sentiment-analysis",
38
+ model="distilbert-base-uncased-finetuned-sst-2-english"
39
+ )
40
+
41
+ def analyze_sentiment(self, text):
42
+ """
43
+ วิเคราะห์ sentiment ของข้อความ
44
+
45
+ Args:
46
+ text: ข้อความที่ต้องการวิเคราะห์
47
+
48
+ Returns:
49
+ dict: {sentiment, score, explanation}
50
+ """
51
+ if not text or len(text.strip()) == 0:
52
+ return {
53
+ "sentiment": "Neutral",
54
+ "score": 0.5,
55
+ "explanation": "No text to analyze"
56
+ }
57
+
58
+ # ถ้า model โหลดไม่สำเร็จ ใช้ fallback pipeline
59
+ if self.model is None:
60
+ return self._fallback_sentiment(text)
61
+
62
+ try:
63
+ # สร้าง prompt สำหรับ Gemma
64
+ prompt = f"""Analyze the sentiment of this financial news. Rate it as Positive, Negative, or Neutral with a confidence score (0-1).
65
+
66
+ News: {text[:500]}
67
+
68
+ Provide your analysis in this exact format:
69
+ Sentiment: [Positive/Negative/Neutral]
70
+ Score: [0.0-1.0]
71
+ Reason: [Brief explanation]"""
72
+
73
+ # Tokenize และ generate
74
+ inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
75
+ inputs = inputs.to(self.device)
76
+
77
+ with torch.no_grad():
78
+ outputs = self.model.generate(
79
+ **inputs,
80
+ max_new_tokens=150,
81
+ temperature=0.3,
82
+ do_sample=True,
83
+ pad_token_id=self.tokenizer.eos_token_id
84
+ )
85
+
86
+ response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
87
+
88
+ # Parse response
89
+ return self._parse_llm_response(response)
90
+
91
+ except Exception as e:
92
+ print(f"Error in analysis: {e}")
93
+ return self._fallback_sentiment(text)
94
+
95
+ def _parse_llm_response(self, response):
96
+ """แยก sentiment, score และ explanation จาก LLM response"""
97
+ sentiment = "Neutral"
98
+ score = 0.5
99
+ explanation = "Unable to analyze"
100
+
101
+ try:
102
+ # Extract sentiment
103
+ if "Sentiment:" in response:
104
+ sentiment_line = re.search(r'Sentiment:\s*(\w+)', response, re.IGNORECASE)
105
+ if sentiment_line:
106
+ sentiment = sentiment_line.group(1).capitalize()
107
+
108
+ # Extract score
109
+ if "Score:" in response:
110
+ score_line = re.search(r'Score:\s*([\d.]+)', response)
111
+ if score_line:
112
+ score = float(score_line.group(1))
113
+ score = max(0.0, min(1.0, score)) # Clamp between 0-1
114
+
115
+ # Extract reason/explanation
116
+ if "Reason:" in response:
117
+ reason_match = re.search(r'Reason:\s*(.+?)(?:\n|$)', response, re.IGNORECASE)
118
+ if reason_match:
119
+ explanation = reason_match.group(1).strip()
120
+
121
+ # Validate sentiment
122
+ if sentiment not in ["Positive", "Negative", "Neutral"]:
123
+ if "positive" in response.lower():
124
+ sentiment = "Positive"
125
+ elif "negative" in response.lower():
126
+ sentiment = "Negative"
127
+ else:
128
+ sentiment = "Neutral"
129
+
130
+ except Exception as e:
131
+ print(f"Parse error: {e}")
132
+
133
+ return {
134
+ "sentiment": sentiment,
135
+ "score": score,
136
+ "explanation": explanation
137
+ }
138
+
139
+ def _fallback_sentiment(self, text):
140
+ """Fallback method ใช้ DistilBERT"""
141
+ try:
142
+ result = self.sentiment_pipeline(text[:512])[0]
143
+
144
+ # Convert to our format
145
+ sentiment = "Positive" if result['label'] == 'POSITIVE' else "Negative"
146
+ score = result['score']
147
+
148
+ return {
149
+ "sentiment": sentiment,
150
+ "score": score,
151
+ "explanation": f"Analyzed using fallback model with {score:.2%} confidence"
152
+ }
153
+ except:
154
+ return {
155
+ "sentiment": "Neutral",
156
+ "score": 0.5,
157
+ "explanation": "Analysis unavailable"
158
+ }
159
+
160
+ def analyze_batch(self, news_list):
161
+ """
162
+ วิเคราะห์ sentiment หลายข่าวพร้อมกัน
163
+
164
+ Args:
165
+ news_list: list ของ dict ที่มี title และ summary
166
+
167
+ Returns:
168
+ list: รายการผลการวิเคราะห์
169
+ """
170
+ results = []
171
+
172
+ for news in news_list:
173
+ # รวม title และ summary
174
+ combined_text = f"{news.get('title', '')} {news.get('summary', '')}"
175
+
176
+ sentiment_result = self.analyze_sentiment(combined_text)
177
+
178
+ results.append({
179
+ **news,
180
+ **sentiment_result
181
+ })
182
+
183
+ return results