mai

Sleeping

App Files Files Community

Sooteemon commited on Nov 4, 2025

Commit

57b6068

verified ·

1 Parent(s): 34c2f86

Update sentiment_analyzer.py

Browse files

Files changed (1) hide show

sentiment_analyzer.py +64 -130

sentiment_analyzer.py CHANGED Viewed

@@ -1,162 +1,94 @@
-from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import torch
 import re
-import os # --- ADDED ---
 class NewsAnalyzer:
-    def __init__(self, model_name="google/gemma-2-2b-it"):
         """
-        Initialize news analyzer with Gemma model
         """
-        print(f"Loading model: {model_name}")
-        self.device = "cuda" if torch.cuda.is_available() else "cpu"
-        print(f"Using device: {self.device}")
-        # --- ADDED: Get token from Space Secrets ---
-        hf_token = os.getenv("HF_TOKEN")
-        if not hf_token:
-            print("Warning: HF_TOKEN secret not found. May fail to load gated models.")
         try:
-            self.tokenizer = AutoTokenizer.from_pretrained(
-                model_name,
-                token=hf_token # --- ADDED ---
-            )
-            self.model = AutoModelForCausalLM.from_pretrained(
-                model_name,
-                token=hf_token, # --- ADDED ---
-                torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
-                device_map="auto" if self.device == "cuda" else None,
-                low_cpu_mem_usage=True
-            )
-            if self.device == "cpu":
-                self.model = self.model.to(self.device)
-            print("Model loaded successfully!")
-        except Exception as e:
-            print(f"Error loading model: {e}")
-            self.model = None
             self.sentiment_pipeline = pipeline(
                 "sentiment-analysis",
-                model="distilbert-base-uncased-finetuned-sst-2-english"
             )
-    def analyze_news_item(self, text):
-        """
-        วิเคราะห์ข่าว (Sentiment, Theme, Impact)
-        """
-        if not text or len(text.strip()) == 0:
-            return {
-                "sentiment": "Neutral", "score": 0.5, "theme": "Other",
-                "impact": "Neutral", "explanation": "No text to analyze"
-            }
-        if self.model is None:
-            return self._fallback_sentiment(text)
-        try:
-            prompt = f"""Analyze this financial news article. Provide your analysis in the *exact* format specified below.
-**Categories to use:**
-- **Theme:** [Choose one: Earnings/Finance, Product/Service, Legal/Regulation, Management/M&A, Market/Economy, Other]
-- **Impact:** [Choose one: Opportunity, Risk, Neutral]
-- **Sentiment:** [Choose one: Positive, Negative, Neutral]
-**News Article:**
-{text[:500]}
-**Your Analysis (Use this *exact* format):**
-Sentiment: [Positive/Negative/Neutral]
-Score: [0.0-1.0 confidence score]
-Theme: [Selected Theme]
-Impact: [Selected Impact]
-Reason: [Brief explanation of your analysis]"""
-            inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
-            inputs = inputs.to(self.device)
-            prompt_length = inputs['input_ids'].shape[1]
-            with torch.no_grad():
-                outputs = self.model.generate(
-                    **inputs,
-                    max_new_tokens=200,
-                    temperature=0.3,
-                    do_sample=True,
-                    pad_token_id=self.tokenizer.eos_token_id
-                )
-            new_tokens = outputs[0][prompt_length:]
-            response = self.tokenizer.decode(new_tokens, skip_special_tokens=True)
-            return self._parse_llm_analysis(response)
         except Exception as e:
-            print(f"Error in analysis: {e}")
-            return self._fallback_sentiment(text)
-    def _parse_llm_analysis(self, response):
         """
-        แยก sentiment, score, theme, impact และ explanation จาก LLM response
         """
-        sentiment = "Neutral"
-        score = 0.5
-        theme = "Other"
-        impact = "Neutral"
-        explanation = "Unable to parse"
         try:
-            sentiment_line = re.search(r'\**Sentiment:?\**\s*(\w+)', response, re.IGNORECASE)
-            if sentiment_line:
-                sentiment = sentiment_line.group(1).capitalize()
-            score_line = re.search(r'\**Score:?\**\s*([\d.]+)', response)
-            if score_line:
-                score = float(score_line.group(1))
-                score = max(0.0, min(1.0, score))
-            theme_line = re.search(r'\**Theme:?\**\s*([\w\/ -]+)', response, re.IGNORECASE)
-            if theme_line:
-                theme = theme_line.group(1).strip()
-            impact_line = re.search(r'\**Impact:?\**\s*(\w+)', response, re.IGNORECASE)
-            if impact_line:
-                impact = impact_line.group(1).capitalize().strip()
-            reason_match = re.search(r'\**Reason:?\**\s*(.*)', response, re.DOTALL | re.IGNORECASE)
-            if reason_match:
-                explanation = reason_match.group(1).strip()
-            if sentiment not in ["Positive", "Negative", "Neutral"]:
-                sentiment = "Neutral"
-            if impact not in ["Opportunity", "Risk", "Neutral"]:
-                impact = "Neutral"
-        except Exception as e:
-            print(f"Parse error: {e}. Response was: {response}")
-        return {
-            "sentiment": sentiment, "score": score, "theme": theme,
-            "impact": impact, "explanation": explanation
-        }
-    def _fallback_sentiment(self, text):
-        """Fallback method ใช้ DistilBERT"""
-        try:
-            result = self.sentiment_pipeline(text[:512])[0]
-            sentiment = "Positive" if result['label'] == 'POSITIVE' else "Negative"
-            score = result['score']
             return {
-                "sentiment": sentiment, "score": score, "theme": "N/A",
-                "impact": "N/A", "explanation": f"Analyzed using fallback model"
             }
-        except:
             return {
                 "sentiment": "Neutral", "score": 0.5, "theme": "N/A",
-                "impact": "N/A", "explanation": "Analysis unavailable"
             }
     def analyze_batch(self, news_list):
@@ -171,4 +103,6 @@ Reason: [Brief explanation of your analysis]"""
                 **news,
                 **sentiment_result
             })
-        return results

+from transformers import pipeline
 import torch
 import re
 class NewsAnalyzer:
+    def __init__(self, model_name=None): # Model_name is no longer needed
         """
+        Initialize news analyzer with fast, CPU-friendly Zero-Shot pipelines
         """
+        print("Initializing Zero-Shot News Analyzer...")
+        self.device = 0 if torch.cuda.is_available() else -1 # Use 0 for GPU, -1 for CPU
+        print(f"Using device: {'cuda' if self.device == 0 else 'cpu'}")
         try:
+            # Pipeline 1: For Sentiment Analysis
+            print("Loading Sentiment model...")
             self.sentiment_pipeline = pipeline(
                 "sentiment-analysis",
+                model="distilbert-base-uncased-finetuned-sst-2-english",
+                device=self.device
             )
+            # Pipeline 2: For Zero-Shot Classification (Theme & Impact)
+            print("Loading Zero-Shot model...")
+            self.classifier_pipeline = pipeline(
+                "zero-shot-classification",
+                model="Moritz/bart-large-mnli-fever-anli-ling-wanli",
+                device=self.device
+            )
+            print("Models loaded successfully!")
+            # Define the labels for classification
+            self.theme_labels = [
+                "Earnings/Finance", "Product/Service", "Legal/Regulation",
+                "Management/M&A", "Market/Economy", "Other"
+            ]
+            self.impact_labels = ["Opportunity", "Risk", "Neutral"]
         except Exception as e:
+            print(f"Fatal error loading models: {e}")
+            self.sentiment_pipeline = None
+            self.classifier_pipeline = None
+    def analyze_news_item(self, text):
         """
+        วิเคราะห์ข่าว (Sentiment, Theme, Impact) โดยใช้ Zero-Shot
         """
+        if not text or len(text.strip()) == 0 or not self.classifier_pipeline:
+            return {
+                "sentiment": "Neutral", "score": 0.5, "theme": "N/A",
+                "impact": "N/A", "explanation": "No text or model"
+            }
         try:
+            # 1. Analyze Sentiment
+            sentiment_result = self.sentiment_pipeline(text[:512])[0] # Truncate for speed
+            sentiment = sentiment_result['label'].capitalize()
+            score = sentiment_result['score']
+            # 2. Analyze Theme
+            theme_result = self.classifier_pipeline(
+                text[:512],
+                candidate_labels=self.theme_labels
+            )
+            theme = theme_result['labels'][0]
+            # 3. Analyze Impact
+            impact_result = self.classifier_pipeline(
+                text[:512],
+                candidate_labels=self.impact_labels
+            )
+            impact = impact_result['labels'][0]
+            # 4. Create an explanation
+            explanation = f"Classified as '{theme}' (Impact: {impact}) via zero-shot analysis."
             return {
+                "sentiment": "Positive" if sentiment == "Positive" else "Negative", # Simple conversion
+                "score": score,
+                "theme": theme,
+                "impact": impact,
+                "explanation": explanation
             }
+        except Exception as e:
+            print(f"Error in analysis: {e}")
             return {
                 "sentiment": "Neutral", "score": 0.5, "theme": "N/A",
+                "impact": "N/A", "explanation": "Analysis failed"
             }
     def analyze_batch(self, news_list):
                 **news,
                 **sentiment_result
             })
+        return results
+# --- ไม่ต้องใช้ฟังก์ชัน _parse หรือ _fallback อีกต่อไป ---