from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline import torch import re class SentimentAnalyzer: def __init__(self, model_name="google/gemma-2-2b-it"): """ Initialize sentiment analyzer with Gemma model Args: model_name: Hugging Face model name (ใช้ gemma-2-2b-it แทน 3-4b ที่ยังไม่มี) """ print(f"Loading model: {model_name}") self.device = "cuda" if torch.cuda.is_available() else "cpu" print(f"Using device: {self.device}") try: self.tokenizer = AutoTokenizer.from_pretrained(model_name) self.model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float16 if self.device == "cuda" else torch.float32, device_map="auto" if self.device == "cuda" else None, low_cpu_mem_usage=True ) if self.device == "cpu": self.model = self.model.to(self.device) print("Model loaded successfully!") except Exception as e: print(f"Error loading model: {e}") # Fallback to sentiment pipeline self.model = None self.sentiment_pipeline = pipeline( "sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english" ) def analyze_sentiment(self, text): """ วิเคราะห์ sentiment ของข้อความ Args: text: ข้อความที่ต้องการวิเคราะห์ Returns: dict: {sentiment, score, explanation} """ if not text or len(text.strip()) == 0: return { "sentiment": "Neutral", "score": 0.5, "explanation": "No text to analyze" } # ถ้า model โหลดไม่สำเร็จ ใช้ fallback pipeline if self.model is None: return self._fallback_sentiment(text) try: # สร้าง prompt สำหรับ Gemma prompt = f"""Analyze the sentiment of this financial news. Rate it as Positive, Negative, or Neutral with a confidence score (0-1). News: {text[:500]} Provide your analysis in this exact format: Sentiment: [Positive/Negative/Neutral] Score: [0.0-1.0] Reason: [Brief explanation]""" # Tokenize และ generate inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512) inputs = inputs.to(self.device) with torch.no_grad(): outputs = self.model.generate( **inputs, max_new_tokens=150, temperature=0.3, do_sample=True, pad_token_id=self.tokenizer.eos_token_id ) response = self.tokenizer.decode(outputs[0], skip_special_tokens=True) # Parse response return self._parse_llm_response(response) except Exception as e: print(f"Error in analysis: {e}") return self._fallback_sentiment(text) def _parse_llm_response(self, response): """แยก sentiment, score และ explanation จาก LLM response""" sentiment = "Neutral" score = 0.5 explanation = "Unable to analyze" try: # Extract sentiment if "Sentiment:" in response: sentiment_line = re.search(r'Sentiment:\s*(\w+)', response, re.IGNORECASE) if sentiment_line: sentiment = sentiment_line.group(1).capitalize() # Extract score if "Score:" in response: score_line = re.search(r'Score:\s*([\d.]+)', response) if score_line: score = float(score_line.group(1)) score = max(0.0, min(1.0, score)) # Clamp between 0-1 # Extract reason/explanation if "Reason:" in response: reason_match = re.search(r'Reason:\s*(.+?)(?:\n|$)', response, re.IGNORECASE) if reason_match: explanation = reason_match.group(1).strip() # Validate sentiment if sentiment not in ["Positive", "Negative", "Neutral"]: if "positive" in response.lower(): sentiment = "Positive" elif "negative" in response.lower(): sentiment = "Negative" else: sentiment = "Neutral" except Exception as e: print(f"Parse error: {e}") return { "sentiment": sentiment, "score": score, "explanation": explanation } def _fallback_sentiment(self, text): """Fallback method ใช้ DistilBERT""" try: result = self.sentiment_pipeline(text[:512])[0] # Convert to our format sentiment = "Positive" if result['label'] == 'POSITIVE' else "Negative" score = result['score'] return { "sentiment": sentiment, "score": score, "explanation": f"Analyzed using fallback model with {score:.2%} confidence" } except: return { "sentiment": "Neutral", "score": 0.5, "explanation": "Analysis unavailable" } def analyze_batch(self, news_list): """ วิเคราะห์ sentiment หลายข่าวพร้อมกัน Args: news_list: list ของ dict ที่มี title และ summary Returns: list: รายการผลการวิเคราะห์ """ results = [] for news in news_list: # รวม title และ summary combined_text = f"{news.get('title', '')} {news.get('summary', '')}" sentiment_result = self.analyze_sentiment(combined_text) results.append({ **news, **sentiment_result }) return results