mai

Sleeping

App Files Files Community

Sooteemon commited on Nov 4, 2025

Commit

b5400ea

verified ·

1 Parent(s): 1798b32

Update sentiment_analyzer.py

Browse files

Files changed (1) hide show

sentiment_analyzer.py +28 -66

sentiment_analyzer.py CHANGED Viewed

@@ -2,7 +2,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import torch
 import re
-class NewsAnalyzer:  # --- MODIFIED: Renamed class ---
     def __init__(self, model_name="google/gemma-2-2b-it"):
         """
         Initialize news analyzer with Gemma model
@@ -28,38 +28,26 @@ class NewsAnalyzer:  # --- MODIFIED: Renamed class ---
         except Exception as e:
             print(f"Error loading model: {e}")
-            # Fallback to sentiment pipeline
             self.model = None
             self.sentiment_pipeline = pipeline(
                 "sentiment-analysis",
                 model="distilbert-base-uncased-finetuned-sst-2-english"
             )
-    def analyze_news_item(self, text): # --- MODIFIED: Renamed function ---
         """
         วิเคราะห์ข่าว (Sentiment, Theme, Impact)
-        Args:
-            text: ข้อความที่ต้องการวิเคราะห์
-        Returns:
-            dict: {sentiment, score, theme, impact, explanation}
         """
         if not text or len(text.strip()) == 0:
             return {
-                "sentiment": "Neutral",
-                "score": 0.5,
-                "theme": "Other",
-                "impact": "Neutral",
-                "explanation": "No text to analyze"
             }
-        # ถ้า model โหลดไม่สำเร็จ ใช้ fallback pipeline
         if self.model is None:
             return self._fallback_sentiment(text)
         try:
-            # --- MODIFIED: New comprehensive prompt ---
             prompt = f"""Analyze this financial news article. Provide your analysis in the *exact* format specified below.
 **Categories to use:**
@@ -77,128 +65,102 @@ Theme: [Selected Theme]
 Impact: [Selected Impact]
 Reason: [Brief explanation of your analysis]"""
-            # Tokenize และ generate
             inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
             inputs = inputs.to(self.device)
             with torch.no_grad():
                 outputs = self.model.generate(
                     **inputs,
-                    max_new_tokens=200, # Increased tokens for longer response
                     temperature=0.3,
                     do_sample=True,
                     pad_token_id=self.tokenizer.eos_token_id
                 )
-            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
-            # Parse response
-            return self._parse_llm_analysis(response) # --- MODIFIED ---
         except Exception as e:
             print(f"Error in analysis: {e}")
             return self._fallback_sentiment(text)
-    def _parse_llm_analysis(self, response): # --- MODIFIED: Renamed and updated parser ---
         """แยก sentiment, score, theme, impact และ explanation จาก LLM response"""
         sentiment = "Neutral"
         score = 0.5
-        theme = "Other"       # --- ADDED ---
-        impact = "Neutral"    # --- ADDED ---
-        explanation = "Unable to analyze"
         try:
-            # Extract sentiment
             sentiment_line = re.search(r'Sentiment:\s*(\w+)', response, re.IGNORECASE)
             if sentiment_line:
                 sentiment = sentiment_line.group(1).capitalize()
-            # Extract score
             score_line = re.search(r'Score:\s*([\d.]+)', response)
             if score_line:
                 score = float(score_line.group(1))
-                score = max(0.0, min(1.0, score)) # Clamp between 0-1
-            # --- ADDED: Extract Theme ---
             theme_line = re.search(r'Theme:\s*([\w\/ -]+)', response, re.IGNORECASE)
             if theme_line:
                 theme = theme_line.group(1).strip()
-            # --- ADDED: Extract Impact ---
             impact_line = re.search(r'Impact:\s*(\w+)', response, re.IGNORECASE)
             if impact_line:
                 impact = impact_line.group(1).capitalize().strip()
-            # Extract reason/explanation
-            reason_match = re.search(r'Reason:\s*(.+?)(?:\n|$)', response, re.DOTALL | re.IGNORECASE)
             if reason_match:
                 explanation = reason_match.group(1).strip()
-            # Validate sentiment
             if sentiment not in ["Positive", "Negative", "Neutral"]:
                 sentiment = "Neutral"
-            # Validate impact
             if impact not in ["Opportunity", "Risk", "Neutral"]:
                 impact = "Neutral"
         except Exception as e:
-            print(f"Parse error: {e}")
         return {
-            "sentiment": sentiment,
-            "score": score,
-            "theme": theme,
-            "impact": impact,
-            "explanation": explanation
         }
     def _fallback_sentiment(self, text):
         """Fallback method ใช้ DistilBERT"""
         try:
             result = self.sentiment_pipeline(text[:512])[0]
-            # Convert to our format
             sentiment = "Positive" if result['label'] == 'POSITIVE' else "Negative"
             score = result['score']
             return {
-                "sentiment": sentiment,
-                "score": score,
-                "theme": "N/A",       # --- ADDED ---
-                "impact": "N/A",      # --- ADDED ---
-                "explanation": f"Analyzed using fallback model with {score:.2%} confidence"
             }
         except:
             return {
-                "sentiment": "Neutral",
-                "score": 0.5,
-                "theme": "N/A",       # --- ADDED ---
-                "impact": "N/A",      # --- ADDED ---
-                "explanation": "Analysis unavailable"
             }
     def analyze_batch(self, news_list):
         """
         วิเคราะห์ sentiment หลายข่าวพร้อมกัน
-        Args:
-            news_list: list ของ dict ที่มี title และ summary
-        Returns:
-            list: รายการผลการวิเคราะห์
         """
         results = []
         for news in news_list:
-            # รวม title และ summary
             combined_text = f"{news.get('title', '')} {news.get('summary', '')}"
-            sentiment_result = self.analyze_news_item(combined_text) # --- MODIFIED ---
             results.append({
                 **news,
                 **sentiment_result
             })
         return results

 import torch
 import re
+class NewsAnalyzer:
     def __init__(self, model_name="google/gemma-2-2b-it"):
         """
         Initialize news analyzer with Gemma model
         except Exception as e:
             print(f"Error loading model: {e}")
             self.model = None
             self.sentiment_pipeline = pipeline(
                 "sentiment-analysis",
                 model="distilbert-base-uncased-finetuned-sst-2-english"
             )
+    def analyze_news_item(self, text):
         """
         วิเคราะห์ข่าว (Sentiment, Theme, Impact)
         """
         if not text or len(text.strip()) == 0:
             return {
+                "sentiment": "Neutral", "score": 0.5, "theme": "Other",
+                "impact": "Neutral", "explanation": "No text to analyze"
             }
         if self.model is None:
             return self._fallback_sentiment(text)
         try:
             prompt = f"""Analyze this financial news article. Provide your analysis in the *exact* format specified below.
 **Categories to use:**
 Impact: [Selected Impact]
 Reason: [Brief explanation of your analysis]"""
             inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
             inputs = inputs.to(self.device)
+            # --- MODIFIED: Get prompt length to slice output correctly ---
+            prompt_length = inputs['input_ids'].shape[1]
             with torch.no_grad():
                 outputs = self.model.generate(
                     **inputs,
+                    max_new_tokens=200,
                     temperature=0.3,
                     do_sample=True,
                     pad_token_id=self.tokenizer.eos_token_id
                 )
+            # --- MODIFIED: Decode *only* the new tokens, not the prompt ---
+            new_tokens = outputs[0][prompt_length:]
+            response = self.tokenizer.decode(new_tokens, skip_special_tokens=True)
+            return self._parse_llm_analysis(response)
         except Exception as e:
             print(f"Error in analysis: {e}")
             return self._fallback_sentiment(text)
+    def _parse_llm_analysis(self, response):
         """แยก sentiment, score, theme, impact และ explanation จาก LLM response"""
         sentiment = "Neutral"
         score = 0.5
+        theme = "Other"
+        impact = "Neutral"
+        explanation = "Unable to parse" # Default explanation if parse fails
         try:
             sentiment_line = re.search(r'Sentiment:\s*(\w+)', response, re.IGNORECASE)
             if sentiment_line:
                 sentiment = sentiment_line.group(1).capitalize()
             score_line = re.search(r'Score:\s*([\d.]+)', response)
             if score_line:
                 score = float(score_line.group(1))
+                score = max(0.0, min(1.0, score))
             theme_line = re.search(r'Theme:\s*([\w\/ -]+)', response, re.IGNORECASE)
             if theme_line:
                 theme = theme_line.group(1).strip()
             impact_line = re.search(r'Impact:\s*(\w+)', response, re.IGNORECASE)
             if impact_line:
                 impact = impact_line.group(1).capitalize().strip()
+            # --- MODIFIED: More robust regex for Reason (captures multi-line) ---
+            reason_match = re.search(r'Reason:\s*(.*)', response, re.DOTALL | re.IGNORECASE)
             if reason_match:
                 explanation = reason_match.group(1).strip()
+            # If parsing fails, explanation will remain "Unable to parse" or the last good value
             if sentiment not in ["Positive", "Negative", "Neutral"]:
                 sentiment = "Neutral"
             if impact not in ["Opportunity", "Risk", "Neutral"]:
                 impact = "Neutral"
         except Exception as e:
+            print(f"Parse error: {e}. Response was: {response}")
         return {
+            "sentiment": sentiment, "score": score, "theme": theme,
+            "impact": impact, "explanation": explanation
         }
     def _fallback_sentiment(self, text):
         """Fallback method ใช้ DistilBERT"""
         try:
             result = self.sentiment_pipeline(text[:512])[0]
             sentiment = "Positive" if result['label'] == 'POSITIVE' else "Negative"
             score = result['score']
             return {
+                "sentiment": sentiment, "score": score, "theme": "N/A",
+                "impact": "N/A", "explanation": f"Analyzed using fallback model"
             }
         except:
             return {
+                "sentiment": "Neutral", "score": 0.5, "theme": "N/A",
+                "impact": "N/A", "explanation": "Analysis unavailable"
             }
     def analyze_batch(self, news_list):
         """
         วิเคราะห์ sentiment หลายข่าวพร้อมกัน
         """
         results = []
         for news in news_list:
             combined_text = f"{news.get('title', '')} {news.get('summary', '')}"
+            sentiment_result = self.analyze_news_item(combined_text)
             results.append({
                 **news,
                 **sentiment_result
             })
         return results