File size: 6,634 Bytes
abaee64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
import re

class SentimentAnalyzer:
    def __init__(self, model_name="google/gemma-2-2b-it"):
        """
        Initialize sentiment analyzer with Gemma model
        
        Args:
            model_name: Hugging Face model name (ใช้ gemma-2-2b-it แทน 3-4b ที่ยังไม่มี)
        """
        print(f"Loading model: {model_name}")
        
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        print(f"Using device: {self.device}")
        
        try:
            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
            self.model = AutoModelForCausalLM.from_pretrained(
                model_name,
                torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
                device_map="auto" if self.device == "cuda" else None,
                low_cpu_mem_usage=True
            )
            
            if self.device == "cpu":
                self.model = self.model.to(self.device)
            
            print("Model loaded successfully!")
            
        except Exception as e:
            print(f"Error loading model: {e}")
            # Fallback to sentiment pipeline
            self.model = None
            self.sentiment_pipeline = pipeline(
                "sentiment-analysis",
                model="distilbert-base-uncased-finetuned-sst-2-english"
            )
    
    def analyze_sentiment(self, text):
        """
        วิเคราะห์ sentiment ของข้อความ
        
        Args:
            text: ข้อความที่ต้องการวิเคราะห์
        
        Returns:
            dict: {sentiment, score, explanation}
        """
        if not text or len(text.strip()) == 0:
            return {
                "sentiment": "Neutral",
                "score": 0.5,
                "explanation": "No text to analyze"
            }
        
        # ถ้า model โหลดไม่สำเร็จ ใช้ fallback pipeline
        if self.model is None:
            return self._fallback_sentiment(text)
        
        try:
            # สร้าง prompt สำหรับ Gemma
            prompt = f"""Analyze the sentiment of this financial news. Rate it as Positive, Negative, or Neutral with a confidence score (0-1).
News: {text[:500]}
Provide your analysis in this exact format:
Sentiment: [Positive/Negative/Neutral]
Score: [0.0-1.0]
Reason: [Brief explanation]"""

            # Tokenize และ generate
            inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
            inputs = inputs.to(self.device)
            
            with torch.no_grad():
                outputs = self.model.generate(
                    **inputs,
                    max_new_tokens=150,
                    temperature=0.3,
                    do_sample=True,
                    pad_token_id=self.tokenizer.eos_token_id
                )
            
            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
            
            # Parse response
            return self._parse_llm_response(response)
            
        except Exception as e:
            print(f"Error in analysis: {e}")
            return self._fallback_sentiment(text)
    
    def _parse_llm_response(self, response):
        """แยก sentiment, score และ explanation จาก LLM response"""
        sentiment = "Neutral"
        score = 0.5
        explanation = "Unable to analyze"
        
        try:
            # Extract sentiment
            if "Sentiment:" in response:
                sentiment_line = re.search(r'Sentiment:\s*(\w+)', response, re.IGNORECASE)
                if sentiment_line:
                    sentiment = sentiment_line.group(1).capitalize()
            
            # Extract score
            if "Score:" in response:
                score_line = re.search(r'Score:\s*([\d.]+)', response)
                if score_line:
                    score = float(score_line.group(1))
                    score = max(0.0, min(1.0, score))  # Clamp between 0-1
            
            # Extract reason/explanation
            if "Reason:" in response:
                reason_match = re.search(r'Reason:\s*(.+?)(?:\n|$)', response, re.IGNORECASE)
                if reason_match:
                    explanation = reason_match.group(1).strip()
            
            # Validate sentiment
            if sentiment not in ["Positive", "Negative", "Neutral"]:
                if "positive" in response.lower():
                    sentiment = "Positive"
                elif "negative" in response.lower():
                    sentiment = "Negative"
                else:
                    sentiment = "Neutral"
            
        except Exception as e:
            print(f"Parse error: {e}")
        
        return {
            "sentiment": sentiment,
            "score": score,
            "explanation": explanation
        }
    
    def _fallback_sentiment(self, text):
        """Fallback method ใช้ DistilBERT"""
        try:
            result = self.sentiment_pipeline(text[:512])[0]
            
            # Convert to our format
            sentiment = "Positive" if result['label'] == 'POSITIVE' else "Negative"
            score = result['score']
            
            return {
                "sentiment": sentiment,
                "score": score,
                "explanation": f"Analyzed using fallback model with {score:.2%} confidence"
            }
        except:
            return {
                "sentiment": "Neutral",
                "score": 0.5,
                "explanation": "Analysis unavailable"
            }
    
    def analyze_batch(self, news_list):
        """
        วิเคราะห์ sentiment หลายข่าวพร้อมกัน
        
        Args:
            news_list: list ของ dict ที่มี title และ summary
        
        Returns:
            list: รายการผลการวิเคราะห์
        """
        results = []
        
        for news in news_list:
            # รวม title และ summary
            combined_text = f"{news.get('title', '')} {news.get('summary', '')}"
            
            sentiment_result = self.analyze_sentiment(combined_text)
            
            results.append({
                **news,
                **sentiment_result
            })
        
        return results