Spaces:
Sleeping
Sleeping
File size: 6,525 Bytes
c8b1f17 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 |
"""
快速测试情绪分析模型效果
对比规则引擎和微调模型
"""
import sys
import os
from pathlib import Path
# 添加项目路径
project_root = Path(__file__).parent.parent
sys.path.insert(0, str(project_root))
# 设置环境变量,避免模块导入问题
os.environ['PYTHONPATH'] = str(project_root)
# 导入必要的模块
import paddle
from paddlenlp.transformers import AutoModelForSequenceClassification, AutoTokenizer
class SentimentModelTester:
"""情绪模型测试器"""
def __init__(self, model_path: str):
"""初始化模型"""
print(f"加载模型: {model_path}")
self.tokenizer = AutoTokenizer.from_pretrained(model_path)
self.model = AutoModelForSequenceClassification.from_pretrained(model_path)
self.model.eval()
self.label_map = {0: 'positive', 1: 'neutral', 2: 'negative'}
def predict(self, text: str) -> dict:
"""预测单条文本"""
encoded = self.tokenizer(
text,
max_length=128,
padding='max_length',
truncation=True,
return_tensors='pd'
)
with paddle.no_grad():
logits = self.model(
encoded['input_ids'],
token_type_ids=encoded['token_type_ids']
)
probs = paddle.nn.functional.softmax(logits, axis=1).numpy()[0]
pred_id = int(probs.argmax())
return {
'text': text,
'emotion': self.label_map[pred_id],
'confidence': float(probs[pred_id]),
'probabilities': {
self.label_map[i]: float(prob)
for i, prob in enumerate(probs)
},
'method': 'finetuned_model'
}
def rule_based_sentiment(text: str) -> dict:
"""基于规则的情绪分析(简化版)"""
positive_keywords = ['好', '满意', '喜欢', '谢谢', '感谢', '可以', '行', '没问题',
'支持', '理解', '配合', '接受', '认可', '专业', '高效']
negative_keywords = ['不', '没', '不行', '不好', '讨厌', '烦', '生气', '愤怒',
'不满', '投诉', '举报', '错误', '失败', '拒绝', '反对']
pos_count = sum(1 for kw in positive_keywords if kw in text)
neg_count = sum(1 for kw in negative_keywords if kw in text)
if pos_count > neg_count:
emotion = 'positive'
confidence = min(0.9, 0.6 + pos_count * 0.1)
elif neg_count > pos_count:
emotion = 'negative'
confidence = min(0.9, 0.6 + neg_count * 0.1)
else:
emotion = 'neutral'
confidence = 0.5
return {
'emotion': emotion,
'confidence': confidence,
'method': 'rule_engine'
}
def main():
"""主测试函数"""
# HR场景测试用例
test_cases = [
# 积极情绪
("好的,谢谢你的帮助,非常满意!", "positive"),
("没问题,我理解公司的规定", "positive"),
("太好了,感谢你的解答", "positive"),
("流程很规范,我很认可", "positive"),
("专业高效的回复,感谢支持", "positive"),
# 中性情绪
("您好,请问有什么可以帮您?", "neutral"),
("请问申请年假需要什么材料", "neutral"),
("我需要了解培训的具体时间", "neutral"),
("好的,我知道了", "neutral"),
("请问还有什么需要补充的吗", "neutral"),
# 消极情绪
("我对这个处理结果很不满意", "negative"),
("这个制度太不合理了,我很生气", "negative"),
("为什么要强制执行这个规定", "negative"),
("你们的做法让我很失望", "negative"),
("我要投诉这个处理方式", "negative"),
]
print("=" * 80)
print("情绪分析模型对比测试")
print("=" * 80)
# 检查模型是否存在
model_path = project_root / "models" / "sentiment-hr" / "final_model"
if not model_path.exists():
print(f"\n错误: 微调模型不存在于 {model_path}")
print("仅测试规则引擎\n")
model_tester = None
else:
print(f"\n加载微调模型: {model_path}")
try:
model_tester = SentimentModelTester(str(model_path))
except Exception as e:
print(f"加载模型失败: {e}")
print("仅测试规则引擎\n")
model_tester = None
# 测试规则引擎
print("\n【方法1: 规则引擎】")
print("-" * 80)
correct_rule = 0
for text, expected in test_cases:
result = rule_based_sentiment(text)
predicted = result['emotion']
confidence = result.get('confidence', 0)
method = result.get('method', 'unknown')
status = "✓" if predicted == expected else "✗"
if predicted == expected:
correct_rule += 1
print(f"{status} {text[:35]:35} -> {predicted:8} (期望: {expected:8}) 置信度: {confidence:.2f} 方法: {method}")
accuracy_rule = correct_rule / len(test_cases) * 100
print(f"\n准确率: {correct_rule}/{len(test_cases)} = {accuracy_rule:.1f}%")
# 测试微调模型
if model_tester:
print(f"\n【方法2: 微调模型】")
print("-" * 80)
correct_finetuned = 0
for text, expected in test_cases:
result = model_tester.predict(text)
predicted = result['emotion']
confidence = result.get('confidence', 0)
method = result.get('method', 'unknown')
status = "✓" if predicted == expected else "✗"
if predicted == expected:
correct_finetuned += 1
print(f"{status} {text[:35]:35} -> {predicted:8} (期望: {expected:8}) 置信度: {confidence:.2f} 方法: {method}")
accuracy_finetuned = correct_finetuned / len(test_cases) * 100
print(f"\n准确率: {correct_finetuned}/{len(test_cases)} = {accuracy_finetuned:.1f}%")
# 对比提升
improvement = accuracy_finetuned - accuracy_rule
print(f"\n相比规则引擎提升: {improvement:+.1f}%")
# 总结
print("\n" + "=" * 80)
print("总结")
print("=" * 80)
print(f"规则引擎准确率: {accuracy_rule:.1f}%")
if model_tester:
print(f"微调模型准确率: {accuracy_finetuned:.1f}%")
print(f"性能提升: {improvement:+.1f}%")
print("=" * 80)
if __name__ == "__main__":
main()
|