Spaces:
Runtime error
Runtime error
提升中文情感分析准确性:使用专门的中英文模型和智能语言检测
Browse files- __pycache__/app.cpython-312.pyc +0 -0
- app.py +56 -14
__pycache__/app.cpython-312.pyc
ADDED
|
Binary file (5.62 kB). View file
|
|
|
app.py
CHANGED
|
@@ -1,16 +1,40 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
from transformers import pipeline
|
|
|
|
| 3 |
|
| 4 |
-
# 初始化情感分析模型
|
| 5 |
-
|
|
|
|
| 6 |
"sentiment-analysis",
|
| 7 |
model="cardiffnlp/twitter-roberta-base-sentiment-latest",
|
| 8 |
return_all_scores=True
|
| 9 |
)
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
def analyze_sentiment(text):
|
| 12 |
"""
|
| 13 |
-
分析
|
| 14 |
|
| 15 |
Args:
|
| 16 |
text (str): 输入的文本
|
|
@@ -22,16 +46,30 @@ def analyze_sentiment(text):
|
|
| 22 |
return {"错误": "请输入有效的文本"}
|
| 23 |
|
| 24 |
try:
|
| 25 |
-
#
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
# 处理结果
|
| 29 |
sentiment_scores = {}
|
| 30 |
-
label_mapping = {
|
| 31 |
-
"LABEL_0": "负面 😔",
|
| 32 |
-
"LABEL_1": "中性 😐",
|
| 33 |
-
"LABEL_2": "正面 😊"
|
| 34 |
-
}
|
| 35 |
|
| 36 |
for result in results:
|
| 37 |
label = result['label']
|
|
@@ -43,8 +81,10 @@ def analyze_sentiment(text):
|
|
| 43 |
max_result = max(results, key=lambda x: x['score'])
|
| 44 |
max_label = label_mapping.get(max_result['label'], max_result['label'])
|
| 45 |
|
| 46 |
-
# 添加
|
| 47 |
sentiment_scores["主要情感"] = f"{max_label} (置信度: {max_result['score']:.4f})"
|
|
|
|
|
|
|
| 48 |
|
| 49 |
return sentiment_scores
|
| 50 |
|
|
@@ -54,11 +94,13 @@ def analyze_sentiment(text):
|
|
| 54 |
# 创建Gradio界面
|
| 55 |
with gr.Blocks(title="文本情感分析器", theme=gr.themes.Soft()) as demo:
|
| 56 |
gr.Markdown("""
|
| 57 |
-
# 🎭 文本情感分析器
|
| 58 |
|
| 59 |
-
输入任何文本,AI将分析
|
| 60 |
|
| 61 |
-
**模型**:
|
|
|
|
|
|
|
| 62 |
""")
|
| 63 |
|
| 64 |
with gr.Row():
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from transformers import pipeline
|
| 3 |
+
import re
|
| 4 |
|
| 5 |
+
# 初始化多语言情感分析模型
|
| 6 |
+
# 英文模型 - 原有的高质量英文情感分析
|
| 7 |
+
english_sentiment_pipeline = pipeline(
|
| 8 |
"sentiment-analysis",
|
| 9 |
model="cardiffnlp/twitter-roberta-base-sentiment-latest",
|
| 10 |
return_all_scores=True
|
| 11 |
)
|
| 12 |
|
| 13 |
+
# 中文模型 - 专门针对中文优化的情感分析
|
| 14 |
+
chinese_sentiment_pipeline = pipeline(
|
| 15 |
+
"sentiment-analysis",
|
| 16 |
+
model="uer/roberta-base-finetuned-dianping-chinese",
|
| 17 |
+
return_all_scores=True
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
def detect_language(text):
|
| 21 |
+
"""
|
| 22 |
+
简单的语言检测:检测文本是否主要包含中文字符
|
| 23 |
+
"""
|
| 24 |
+
# 统计中文字符数量
|
| 25 |
+
chinese_chars = len(re.findall(r'[\u4e00-\u9fff]', text))
|
| 26 |
+
total_chars = len(re.sub(r'\s', '', text)) # 去除空格后的总字符数
|
| 27 |
+
|
| 28 |
+
if total_chars == 0:
|
| 29 |
+
return 'english' # 默认英文
|
| 30 |
+
|
| 31 |
+
# 如果中文字符占比超过30%,认为是中文文本
|
| 32 |
+
chinese_ratio = chinese_chars / total_chars
|
| 33 |
+
return 'chinese' if chinese_ratio > 0.3 else 'english'
|
| 34 |
+
|
| 35 |
def analyze_sentiment(text):
|
| 36 |
"""
|
| 37 |
+
智能多语言情感分析:自动检测语言并使用对应的最佳模型
|
| 38 |
|
| 39 |
Args:
|
| 40 |
text (str): 输入的文本
|
|
|
|
| 46 |
return {"错误": "请输入有效的文本"}
|
| 47 |
|
| 48 |
try:
|
| 49 |
+
# 检测语言
|
| 50 |
+
language = detect_language(text)
|
| 51 |
+
|
| 52 |
+
# 根据语言选择合适的模型
|
| 53 |
+
if language == 'chinese':
|
| 54 |
+
results = chinese_sentiment_pipeline(text)[0]
|
| 55 |
+
model_info = "中文专用模型 (UER RoBERTa-Dianping)"
|
| 56 |
+
# 中文模型的标签映射
|
| 57 |
+
label_mapping = {
|
| 58 |
+
"LABEL_0": "负面 😔",
|
| 59 |
+
"LABEL_1": "正面 😊"
|
| 60 |
+
}
|
| 61 |
+
else:
|
| 62 |
+
results = english_sentiment_pipeline(text)[0]
|
| 63 |
+
model_info = "英文专用模型 (Cardiff NLP Twitter RoBERTa)"
|
| 64 |
+
# 英文模型的标签映射
|
| 65 |
+
label_mapping = {
|
| 66 |
+
"LABEL_0": "负面 😔",
|
| 67 |
+
"LABEL_1": "中性 😐",
|
| 68 |
+
"LABEL_2": "正面 😊"
|
| 69 |
+
}
|
| 70 |
|
| 71 |
# 处理结果
|
| 72 |
sentiment_scores = {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
|
| 74 |
for result in results:
|
| 75 |
label = result['label']
|
|
|
|
| 81 |
max_result = max(results, key=lambda x: x['score'])
|
| 82 |
max_label = label_mapping.get(max_result['label'], max_result['label'])
|
| 83 |
|
| 84 |
+
# 添加详细信息
|
| 85 |
sentiment_scores["主要情感"] = f"{max_label} (置信度: {max_result['score']:.4f})"
|
| 86 |
+
sentiment_scores["使用模型"] = model_info
|
| 87 |
+
sentiment_scores["检测语言"] = "中文" if language == 'chinese' else "英文"
|
| 88 |
|
| 89 |
return sentiment_scores
|
| 90 |
|
|
|
|
| 94 |
# 创建Gradio界面
|
| 95 |
with gr.Blocks(title="文本情感分析器", theme=gr.themes.Soft()) as demo:
|
| 96 |
gr.Markdown("""
|
| 97 |
+
# 🎭 智能多语言文本情感分析器
|
| 98 |
|
| 99 |
+
输入任何文本,AI将自动检测语言并使用最适合的模型分析情感倾向
|
| 100 |
|
| 101 |
+
**中文模型**: UER RoBERTa (大众点评数据微调)
|
| 102 |
+
**英文模型**: Cardiff NLP Twitter RoBERTa
|
| 103 |
+
**支持语言**: 中文、英文自动检测
|
| 104 |
""")
|
| 105 |
|
| 106 |
with gr.Row():
|