Spaces:

khjhs60199
/

pyCrawing

Sleeping

App Files Files Community

khjhs60199 commited on Sep 17, 2025

Commit

62fddb6

verified ·

1 Parent(s): 99f90dc

Update sentiment_analyzer.py

Browse files

Files changed (1) hide show

sentiment_analyzer.py +36 -14

sentiment_analyzer.py CHANGED Viewed

@@ -5,11 +5,12 @@ import re
 from typing import Dict, Tuple, Optional
 import jieba
 import emoji
 logger = logging.getLogger(__name__)
 class SentimentAnalyzer:
-    """中文新聞情緒分析器"""
     def __init__(self, model_name: str = "uer/roberta-base-finetuned-jd-binary-chinese"):
         self.model_name = model_name
@@ -18,6 +19,8 @@ class SentimentAnalyzer:
         self.classifier = None
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         # 初始化模型
         self._load_model()
@@ -35,26 +38,47 @@ class SentimentAnalyzer:
         }
     def _load_model(self):
-        """載入預訓練模型"""
         try:
-            logger.info(f"載入情緒分析模型: {self.model_name}")
-            self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
-            self.model = AutoModelForSequenceClassification.from_pretrained(self.model_name)
-            # 創建分類器管道
             self.classifier = pipeline(
                 "text-classification",
                 model=self.model,
                 tokenizer=self.tokenizer,
                 device=0 if self.device == "cuda" else -1,
-                return_all_scores=True
             )
-            logger.info("情緒分析模型載入成功")
         except Exception as e:
-            logger.error(f"載入模型時發生錯誤: {e}")
             self.classifier = None
     def _preprocess_text(self, text: str) -> str:
@@ -120,10 +144,8 @@ class SentimentAnalyzer:
                     # 處理模型結果
                     if results and len(results) > 0:
-                        scores = results[0]
                         # 找到最高分數的標籤
-                        best_result = max(scores, key=lambda x: x['score'])
                         # 標籤映射
                         label_mapping = {
@@ -186,7 +208,7 @@ class SentimentAnalyzer:
             results.append(result)
             # 避免GPU記憶體問題
-            if i % 10 == 0:
-                torch.cuda.empty_cache() if torch.cuda.is_available() else None
         return results

 from typing import Dict, Tuple, Optional
 import jieba
 import emoji
+import os
 logger = logging.getLogger(__name__)
 class SentimentAnalyzer:
+    """中文新聞情緒分析器 - 改進版"""
     def __init__(self, model_name: str = "uer/roberta-base-finetuned-jd-binary-chinese"):
         self.model_name = model_name
         self.classifier = None
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        logger.info(f"Device set to use {self.device}")
         # 初始化模型
         self._load_model()
         }
     def _load_model(self):
+        """載入預訓練模型 - 改進版"""
         try:
+            logger.info(f"開始載入情緒分析模型: {self.model_name}")
+            # 檢查模型是否已快取
+            cache_dir = os.path.expanduser("~/.cache/huggingface/transformers")
+            logger.info(f"模型快取目錄: {cache_dir}")
+            # 載入 tokenizer
+            logger.info("載入 tokenizer...")
+            self.tokenizer = AutoTokenizer.from_pretrained(
+                self.model_name,
+                trust_remote_code=True
+            )
+            # 載入模型
+            logger.info("載入模型...")
+            self.model = AutoModelForSequenceClassification.from_pretrained(
+                self.model_name,
+                trust_remote_code=True
+            )
+            # 移動到適當的設備
+            if self.device == "cuda":
+                self.model = self.model.cuda()
+            # 創建分類器管道 - 修正過時的參數
+            logger.info("創建分類器管道...")
             self.classifier = pipeline(
                 "text-classification",
                 model=self.model,
                 tokenizer=self.tokenizer,
                 device=0 if self.device == "cuda" else -1,
+                top_k=None  # 替代 return_all_scores=True
             )
+            logger.info("✅ 情緒分析模型載入成功")
         except Exception as e:
+            logger.error(f"❌ 載入模型時發生錯誤: {e}")
+            logger.info("將使用關鍵字分析作為備用方案")
             self.classifier = None
     def _preprocess_text(self, text: str) -> str:
                     # 處理模型結果
                     if results and len(results) > 0:
                         # 找到最高分數的標籤
+                        best_result = max(results, key=lambda x: x['score'])
                         # 標籤映射
                         label_mapping = {
             results.append(result)
             # 避免GPU記憶體問題
+            if i % 10 == 0 and torch.cuda.is_available():
+                torch.cuda.empty_cache()
         return results