Spaces:

kobezorro
/

text-sentiment-analyzer

Runtime error

App Files Files Community

kobezorro commited on Aug 28, 2025

Commit

791ccfc

verified ·

1 Parent(s): d35333e

提升中文情感分析准确性：使用专门的中英文模型和智能语言检测

Browse files

Files changed (2) hide show

__pycache__/app.cpython-312.pyc +0 -0
app.py +56 -14

__pycache__/app.cpython-312.pyc ADDED Viewed

Binary file (5.62 kB). View file

app.py CHANGED Viewed

@@ -1,16 +1,40 @@
 import gradio as gr
 from transformers import pipeline
-# 初始化情感分析模型
-sentiment_pipeline = pipeline(
     "sentiment-analysis",
     model="cardiffnlp/twitter-roberta-base-sentiment-latest",
     return_all_scores=True
 )
 def analyze_sentiment(text):
     """
-    分析文本的情感倾向
     Args:
         text (str): 输入的文本
@@ -22,16 +46,30 @@ def analyze_sentiment(text):
         return {"错误": "请输入有效的文本"}
     try:
-        # 执行情感分析
-        results = sentiment_pipeline(text)[0]
         # 处理结果
         sentiment_scores = {}
-        label_mapping = {
-            "LABEL_0": "负面 😔",
-            "LABEL_1": "中性 😐",
-            "LABEL_2": "正面 😊"
-        }
         for result in results:
             label = result['label']
@@ -43,8 +81,10 @@ def analyze_sentiment(text):
         max_result = max(results, key=lambda x: x['score'])
         max_label = label_mapping.get(max_result['label'], max_result['label'])
-        # 添加总结
         sentiment_scores["主要情感"] = f"{max_label} (置信度: {max_result['score']:.4f})"
         return sentiment_scores
@@ -54,11 +94,13 @@ def analyze_sentiment(text):
 # 创建Gradio界面
 with gr.Blocks(title="文本情感分析器", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
-    # 🎭 文本情感分析器
-    输入任何文本，AI将分析其情感倾向（正面、负面或中性）
-    **模型**: Cardiff NLP Twitter RoBERTa Base Sentiment
     """)
     with gr.Row():

 import gradio as gr
 from transformers import pipeline
+import re
+# 初始化多语言情感分析模型
+# 英文模型 - 原有的高质量英文情感分析
+english_sentiment_pipeline = pipeline(
     "sentiment-analysis",
     model="cardiffnlp/twitter-roberta-base-sentiment-latest",
     return_all_scores=True
 )
+# 中文模型 - 专门针对中文优化的情感分析
+chinese_sentiment_pipeline = pipeline(
+    "sentiment-analysis",
+    model="uer/roberta-base-finetuned-dianping-chinese",
+    return_all_scores=True
+)
+def detect_language(text):
+    """
+    简单的语言检测：检测文本是否主要包含中文字符
+    """
+    # 统计中文字符数量
+    chinese_chars = len(re.findall(r'[\u4e00-\u9fff]', text))
+    total_chars = len(re.sub(r'\s', '', text))  # 去除空格后的总字符数
+    if total_chars == 0:
+        return 'english'  # 默认英文
+    # 如果中文字符占比超过30%，认为是中文文本
+    chinese_ratio = chinese_chars / total_chars
+    return 'chinese' if chinese_ratio > 0.3 else 'english'
 def analyze_sentiment(text):
     """
+    智能多语言情感分析：自动检测语言并使用对应的最佳模型
     Args:
         text (str): 输入的文本
         return {"错误": "请输入有效的文本"}
     try:
+        # 检测语言
+        language = detect_language(text)
+        # 根据语言选择合适的模型
+        if language == 'chinese':
+            results = chinese_sentiment_pipeline(text)[0]
+            model_info = "中文专用模型 (UER RoBERTa-Dianping)"
+            # 中文模型的标签映射
+            label_mapping = {
+                "LABEL_0": "负面 😔",
+                "LABEL_1": "正面 😊"
+            }
+        else:
+            results = english_sentiment_pipeline(text)[0]
+            model_info = "英文专用模型 (Cardiff NLP Twitter RoBERTa)"
+            # 英文模型的标签映射
+            label_mapping = {
+                "LABEL_0": "负面 😔",
+                "LABEL_1": "中性 😐",
+                "LABEL_2": "正面 😊"
+            }
         # 处理结果
         sentiment_scores = {}
         for result in results:
             label = result['label']
         max_result = max(results, key=lambda x: x['score'])
         max_label = label_mapping.get(max_result['label'], max_result['label'])
+        # 添加详细信息
         sentiment_scores["主要情感"] = f"{max_label} (置信度: {max_result['score']:.4f})"
+        sentiment_scores["使用模型"] = model_info
+        sentiment_scores["检测语言"] = "中文" if language == 'chinese' else "英文"
         return sentiment_scores
 # 创建Gradio界面
 with gr.Blocks(title="文本情感分析器", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
+    # 🎭 智能多语言文本情感分析器
+    输入任何文本，AI将自动检测语言并使用最适合的模型分析情感倾向
+    **中文模型**: UER RoBERTa (大众点评数据微调)
+    **英文模型**: Cardiff NLP Twitter RoBERTa
+    **支持语言**: 中文、英文自动检测
     """)
     with gr.Row():