Spaces:
Sleeping
Sleeping
File size: 6,381 Bytes
c8b1f17 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
"""
测试情绪分析模型效果
对比规则引擎、官方模型和微调模型
"""
import sys
import os
from pathlib import Path
# 添加项目路径
sys.path.insert(0, str(Path(__file__).parent.parent))
# 直接导入模块
from models.sentiment import SentimentAnalyzer
def test_sentiment_analyzer():
"""测试情绪分析器"""
# HR场景测试用例
test_cases = [
# 积极情绪
("好的,谢谢你的帮助,非常满意!", "positive"),
("没问题,我理解公司的规定", "positive"),
("太好了,感谢你的解答", "positive"),
("流程很规范,我很认可", "positive"),
("专业高效的回复,感谢支持", "positive"),
# 中性情绪
("您好,请问有什么可以帮您?", "neutral"),
("请问申请年假需要什么材料", "neutral"),
("我需要了解培训的具体时间", "neutral"),
("好的,我知道了", "neutral"),
("请问还有什么需要补充的吗", "neutral"),
# 消极情绪
("我对这个处理结果很不满意", "negative"),
("这个制度太不合理了,我很生气", "negative"),
("为什么要强制执行这个规定", "negative"),
("你们的做法让我很失望", "negative"),
("我要投诉这个处理方式", "negative"),
]
print("=" * 80)
print("情绪分析模型测试")
print("=" * 80)
# 测试规则引擎(强制不加载模型)
print("\n【方法1: 规则引擎】")
print("-" * 80)
analyzer_rule = SentimentAnalyzer(model_path=None)
correct_rule = 0
for text, expected in test_cases:
result = analyzer_rule.analyze_turn(text)
predicted = result['emotion']
confidence = result.get('confidence', 0)
method = result.get('method', 'unknown')
status = "✓" if predicted == expected else "✗"
if predicted == expected:
correct_rule += 1
print(f"{status} {text[:35]:35} -> {predicted:8} (期望: {expected:8}) 置信度: {confidence:.2f} 方法: {method}")
accuracy_rule = correct_rule / len(test_cases) * 100
print(f"\n准确率: {correct_rule}/{len(test_cases)} = {accuracy_rule:.1f}%")
# 测试微调模型(如果存在)
finetuned_model_paths = [
"./models/sentiment-hr/final_model",
"./models/sentiment-hr/checkpoint-epoch-3",
"./models/sentiment-hr/checkpoint-epoch-2",
"./models/sentiment-hr/checkpoint-epoch-1",
]
model_loaded = False
for model_path in finetuned_model_paths:
full_path = Path(__file__).parent.parent / model_path
if full_path.exists():
print(f"\n【方法2: 微调模型】({model_path})")
print("-" * 80)
try:
analyzer_finetuned = SentimentAnalyzer(model_path=str(full_path))
if analyzer_finetuned.use_model:
correct_finetuned = 0
for text, expected in test_cases:
result = analyzer_finetuned.analyze_turn(text)
predicted = result['emotion']
confidence = result.get('confidence', 0)
method = result.get('method', 'unknown')
status = "✓" if predicted == expected else "✗"
if predicted == expected:
correct_finetuned += 1
print(f"{status} {text[:35]:35} -> {predicted:8} (期望: {expected:8}) 置信度: {confidence:.2f} 方法: {method}")
accuracy_finetuned = correct_finetuned / len(test_cases) * 100
print(f"\n准确率: {correct_finetuned}/{len(test_cases)} = {accuracy_finetuned:.1f}%")
# 对比提升
improvement = accuracy_finetuned - accuracy_rule
print(f"\n相比规则引擎提升: {improvement:+.1f}%")
model_loaded = True
break
except Exception as e:
print(f"加载微调模型失败: {e}")
if not model_loaded:
print("\n【方法2: 微调模型】未找到微调模型")
print("提示: 运行以下命令训练模型")
print(" python scripts/prepare_sentiment_data.py")
print(" python scripts/train_sentiment.py --train_data ./data/processed/sentiment/train.json")
# 总结
print("\n" + "=" * 80)
print("总结")
print("=" * 80)
print(f"规则引擎准确率: {accuracy_rule:.1f}%")
if model_loaded:
print(f"微调模型准确率: {accuracy_finetuned:.1f}%")
print(f"性能提升: {improvement:+.1f}%")
print("=" * 80)
def interactive_test():
"""交互式测试"""
print("\n" + "=" * 80)
print("交互式情绪分析测试")
print("=" * 80)
print("输入文本进行分析,输入 'quit' 退出")
# 尝试加载微调模型
model_path = "./models/sentiment-hr/final_model"
full_path = Path(__file__).parent.parent / model_path
if full_path.exists():
print(f"使用微调模型: {model_path}")
analyzer = SentimentAnalyzer(model_path=str(full_path))
else:
print("未找到微调模型,使用规则引擎")
analyzer = SentimentAnalyzer(model_path=None)
while True:
text = input("\n请输入文本: ").strip()
if text.lower() in ['quit', 'exit', 'q']:
break
if not text:
continue
result = analyzer.analyze_turn(text)
print(f"\n分析结果:")
print(f" 情绪: {result['emotion']}")
print(f" 置信度: {result.get('confidence', 0):.4f}")
print(f" 方法: {result.get('method', 'unknown')}")
if 'probabilities' in result:
print(f" 概率分布:")
for label, prob in result['probabilities'].items():
print(f" {label}: {prob:.4f}")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="测试情绪分析模型")
parser.add_argument("--interactive", "-i", action="store_true",
help="启用交互式测试模式")
args = parser.parse_args()
if args.interactive:
interactive_test()
else:
test_sentiment_analyzer()
|