Spaces:

Supitn
/

try_topic

No application file

File size: 6,634 Bytes

abaee64

from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
import re

class SentimentAnalyzer:
    def __init__(self, model_name="google/gemma-2-2b-it"):
        """
        Initialize sentiment analyzer with Gemma model
        
        Args:
            model_name: Hugging Face model name (ใช้ gemma-2-2b-it แทน 3-4b ที่ยังไม่มี)
        """
        print(f"Loading model: {model_name}")
        
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        print(f"Using device: {self.device}")
        
        try:
            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
            self.model = AutoModelForCausalLM.from_pretrained(
                model_name,
                torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
                device_map="auto" if self.device == "cuda" else None,
                low_cpu_mem_usage=True
            )
            
            if self.device == "cpu":
                self.model = self.model.to(self.device)
            
            print("Model loaded successfully!")
            
        except Exception as e:
            print(f"Error loading model: {e}")
            # Fallback to sentiment pipeline
            self.model = None
            self.sentiment_pipeline = pipeline(
                "sentiment-analysis",
                model="distilbert-base-uncased-finetuned-sst-2-english"
            )
    
    def analyze_sentiment(self, text):
        """
        วิเคราะห์ sentiment ของข้อความ
        
        Args:
            text: ข้อความที่ต้องการวิเคราะห์
        
        Returns:
            dict: {sentiment, score, explanation}
        """
        if not text or len(text.strip()) == 0:
            return {
                "sentiment": "Neutral",
                "score": 0.5,
                "explanation": "No text to analyze"
            }
        
        # ถ้า model โหลดไม่สำเร็จ ใช้ fallback pipeline
        if self.model is None:
            return self._fallback_sentiment(text)
        
        try:
            # สร้าง prompt สำหรับ Gemma
            prompt = f"""Analyze the sentiment of this financial news. Rate it as Positive, Negative, or Neutral with a confidence score (0-1).
News: {text[:500]}
Provide your analysis in this exact format:
Sentiment: [Positive/Negative/Neutral]
Score: [0.0-1.0]
Reason: [Brief explanation]"""

            # Tokenize และ generate
            inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
            inputs = inputs.to(self.device)
            
            with torch.no_grad():
                outputs = self.model.generate(
                    **inputs,
                    max_new_tokens=150,
                    temperature=0.3,
                    do_sample=True,
                    pad_token_id=self.tokenizer.eos_token_id
                )
            
            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
            
            # Parse response
            return self._parse_llm_response(response)
            
        except Exception as e:
            print(f"Error in analysis: {e}")
            return self._fallback_sentiment(text)
    
    def _parse_llm_response(self, response):
        """แยก sentiment, score และ explanation จาก LLM response"""
        sentiment = "Neutral"
        score = 0.5
        explanation = "Unable to analyze"
        
        try:
            # Extract sentiment
            if "Sentiment:" in response:
                sentiment_line = re.search(r'Sentiment:\s*(\w+)', response, re.IGNORECASE)
                if sentiment_line:
                    sentiment = sentiment_line.group(1).capitalize()
            
            # Extract score
            if "Score:" in response:
                score_line = re.search(r'Score:\s*([\d.]+)', response)
                if score_line:
                    score = float(score_line.group(1))
                    score = max(0.0, min(1.0, score))  # Clamp between 0-1
            
            # Extract reason/explanation
            if "Reason:" in response:
                reason_match = re.search(r'Reason:\s*(.+?)(?:\n|$)', response, re.IGNORECASE)
                if reason_match:
                    explanation = reason_match.group(1).strip()
            
            # Validate sentiment
            if sentiment not in ["Positive", "Negative", "Neutral"]:
                if "positive" in response.lower():
                    sentiment = "Positive"
                elif "negative" in response.lower():
                    sentiment = "Negative"
                else:
                    sentiment = "Neutral"
            
        except Exception as e:
            print(f"Parse error: {e}")
        
        return {
            "sentiment": sentiment,
            "score": score,
            "explanation": explanation
        }
    
    def _fallback_sentiment(self, text):
        """Fallback method ใช้ DistilBERT"""
        try:
            result = self.sentiment_pipeline(text[:512])[0]
            
            # Convert to our format
            sentiment = "Positive" if result['label'] == 'POSITIVE' else "Negative"
            score = result['score']
            
            return {
                "sentiment": sentiment,
                "score": score,
                "explanation": f"Analyzed using fallback model with {score:.2%} confidence"
            }
        except:
            return {
                "sentiment": "Neutral",
                "score": 0.5,
                "explanation": "Analysis unavailable"
            }
    
    def analyze_batch(self, news_list):
        """
        วิเคราะห์ sentiment หลายข่าวพร้อมกัน
        
        Args:
            news_list: list ของ dict ที่มี title และ summary
        
        Returns:
            list: รายการผลการวิเคราะห์
        """
        results = []
        
        for news in news_list:
            # รวม title และ summary
            combined_text = f"{news.get('title', '')} {news.get('summary', '')}"
            
            sentiment_result = self.analyze_sentiment(combined_text)
            
            results.append({
                **news,
                **sentiment_result
            })
        
        return results