Spaces:

Supitn
/

try_topic

No application file

App Files Files Community

Supitn commited on Nov 3

Commit

abaee64

verified ·

1 Parent(s): d08f4c4

Create sentiment_analyzer.py

Browse files

Files changed (1) hide show

sentiment_analyzer.py +181 -0

sentiment_analyzer.py ADDED Viewed

	@@ -0,0 +1,181 @@

+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+import torch
+import re
+class SentimentAnalyzer:
+    def __init__(self, model_name="google/gemma-2-2b-it"):
+        """
+        Initialize sentiment analyzer with Gemma model
+        Args:
+            model_name: Hugging Face model name (ใช้ gemma-2-2b-it แทน 3-4b ที่ยังไม่มี)
+        """
+        print(f"Loading model: {model_name}")
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        print(f"Using device: {self.device}")
+        try:
+            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+            self.model = AutoModelForCausalLM.from_pretrained(
+                model_name,
+                torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
+                device_map="auto" if self.device == "cuda" else None,
+                low_cpu_mem_usage=True
+            )
+            if self.device == "cpu":
+                self.model = self.model.to(self.device)
+            print("Model loaded successfully!")
+        except Exception as e:
+            print(f"Error loading model: {e}")
+            # Fallback to sentiment pipeline
+            self.model = None
+            self.sentiment_pipeline = pipeline(
+                "sentiment-analysis",
+                model="distilbert-base-uncased-finetuned-sst-2-english"
+            )
+    def analyze_sentiment(self, text):
+        """
+        วิเคราะห์ sentiment ของข้อความ
+        Args:
+            text: ข้อความที่ต้องการวิเคราะห์
+        Returns:
+            dict: {sentiment, score, explanation}
+        """
+        if not text or len(text.strip()) == 0:
+            return {
+                "sentiment": "Neutral",
+                "score": 0.5,
+                "explanation": "No text to analyze"
+            }
+        # ถ้า model โหลดไม่สำเร็จ ใช้ fallback pipeline
+        if self.model is None:
+            return self._fallback_sentiment(text)
+        try:
+            # สร้าง prompt สำหรับ Gemma
+            prompt = f"""Analyze the sentiment of this financial news. Rate it as Positive, Negative, or Neutral with a confidence score (0-1).
+News: {text[:500]}
+Provide your analysis in this exact format:
+Sentiment: [Positive/Negative/Neutral]
+Score: [0.0-1.0]
+Reason: [Brief explanation]"""
+            # Tokenize และ generate
+            inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
+            inputs = inputs.to(self.device)
+            with torch.no_grad():
+                outputs = self.model.generate(
+                    **inputs,
+                    max_new_tokens=150,
+                    temperature=0.3,
+                    do_sample=True,
+                    pad_token_id=self.tokenizer.eos_token_id
+                )
+            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            # Parse response
+            return self._parse_llm_response(response)
+        except Exception as e:
+            print(f"Error in analysis: {e}")
+            return self._fallback_sentiment(text)
+    def _parse_llm_response(self, response):
+        """แยก sentiment, score และ explanation จาก LLM response"""
+        sentiment = "Neutral"
+        score = 0.5
+        explanation = "Unable to analyze"
+        try:
+            # Extract sentiment
+            if "Sentiment:" in response:
+                sentiment_line = re.search(r'Sentiment:\s*(\w+)', response, re.IGNORECASE)
+                if sentiment_line:
+                    sentiment = sentiment_line.group(1).capitalize()
+            # Extract score
+            if "Score:" in response:
+                score_line = re.search(r'Score:\s*([\d.]+)', response)
+                if score_line:
+                    score = float(score_line.group(1))
+                    score = max(0.0, min(1.0, score))  # Clamp between 0-1
+            # Extract reason/explanation
+            if "Reason:" in response:
+                reason_match = re.search(r'Reason:\s*(.+?)(?:\n|$)', response, re.IGNORECASE)
+                if reason_match:
+                    explanation = reason_match.group(1).strip()
+            # Validate sentiment
+            if sentiment not in ["Positive", "Negative", "Neutral"]:
+                if "positive" in response.lower():
+                    sentiment = "Positive"
+                elif "negative" in response.lower():
+                    sentiment = "Negative"
+                else:
+                    sentiment = "Neutral"
+        except Exception as e:
+            print(f"Parse error: {e}")
+        return {
+            "sentiment": sentiment,
+            "score": score,
+            "explanation": explanation
+        }
+    def _fallback_sentiment(self, text):
+        """Fallback method ใช้ DistilBERT"""
+        try:
+            result = self.sentiment_pipeline(text[:512])[0]
+            # Convert to our format
+            sentiment = "Positive" if result['label'] == 'POSITIVE' else "Negative"
+            score = result['score']
+            return {
+                "sentiment": sentiment,
+                "score": score,
+                "explanation": f"Analyzed using fallback model with {score:.2%} confidence"
+            }
+        except:
+            return {
+                "sentiment": "Neutral",
+                "score": 0.5,
+                "explanation": "Analysis unavailable"
+            }
+    def analyze_batch(self, news_list):
+        """
+        วิเคราะห์ sentiment หลายข่าวพร้อมกัน
+        Args:
+            news_list: list ของ dict ที่มี title และ summary
+        Returns:
+            list: รายการผลการวิเคราะห์
+        """
+        results = []
+        for news in news_list:
+            # รวม title และ summary
+            combined_text = f"{news.get('title', '')} {news.get('summary', '')}"
+            sentiment_result = self.analyze_sentiment(combined_text)
+            results.append({
+                **news,
+                **sentiment_result
+            })
+        return results