hr-eval-api-v2 / scripts /quick_test_sentiment.py
KarenYYH
Initial commit - HR Evaluation API v2
c8b1f17
"""
快速测试情绪分析模型效果
对比规则引擎和微调模型
"""
import sys
import os
from pathlib import Path
# 添加项目路径
project_root = Path(__file__).parent.parent
sys.path.insert(0, str(project_root))
# 设置环境变量,避免模块导入问题
os.environ['PYTHONPATH'] = str(project_root)
# 导入必要的模块
import paddle
from paddlenlp.transformers import AutoModelForSequenceClassification, AutoTokenizer
class SentimentModelTester:
"""情绪模型测试器"""
def __init__(self, model_path: str):
"""初始化模型"""
print(f"加载模型: {model_path}")
self.tokenizer = AutoTokenizer.from_pretrained(model_path)
self.model = AutoModelForSequenceClassification.from_pretrained(model_path)
self.model.eval()
self.label_map = {0: 'positive', 1: 'neutral', 2: 'negative'}
def predict(self, text: str) -> dict:
"""预测单条文本"""
encoded = self.tokenizer(
text,
max_length=128,
padding='max_length',
truncation=True,
return_tensors='pd'
)
with paddle.no_grad():
logits = self.model(
encoded['input_ids'],
token_type_ids=encoded['token_type_ids']
)
probs = paddle.nn.functional.softmax(logits, axis=1).numpy()[0]
pred_id = int(probs.argmax())
return {
'text': text,
'emotion': self.label_map[pred_id],
'confidence': float(probs[pred_id]),
'probabilities': {
self.label_map[i]: float(prob)
for i, prob in enumerate(probs)
},
'method': 'finetuned_model'
}
def rule_based_sentiment(text: str) -> dict:
"""基于规则的情绪分析(简化版)"""
positive_keywords = ['好', '满意', '喜欢', '谢谢', '感谢', '可以', '行', '没问题',
'支持', '理解', '配合', '接受', '认可', '专业', '高效']
negative_keywords = ['不', '没', '不行', '不好', '讨厌', '烦', '生气', '愤怒',
'不满', '投诉', '举报', '错误', '失败', '拒绝', '反对']
pos_count = sum(1 for kw in positive_keywords if kw in text)
neg_count = sum(1 for kw in negative_keywords if kw in text)
if pos_count > neg_count:
emotion = 'positive'
confidence = min(0.9, 0.6 + pos_count * 0.1)
elif neg_count > pos_count:
emotion = 'negative'
confidence = min(0.9, 0.6 + neg_count * 0.1)
else:
emotion = 'neutral'
confidence = 0.5
return {
'emotion': emotion,
'confidence': confidence,
'method': 'rule_engine'
}
def main():
"""主测试函数"""
# HR场景测试用例
test_cases = [
# 积极情绪
("好的,谢谢你的帮助,非常满意!", "positive"),
("没问题,我理解公司的规定", "positive"),
("太好了,感谢你的解答", "positive"),
("流程很规范,我很认可", "positive"),
("专业高效的回复,感谢支持", "positive"),
# 中性情绪
("您好,请问有什么可以帮您?", "neutral"),
("请问申请年假需要什么材料", "neutral"),
("我需要了解培训的具体时间", "neutral"),
("好的,我知道了", "neutral"),
("请问还有什么需要补充的吗", "neutral"),
# 消极情绪
("我对这个处理结果很不满意", "negative"),
("这个制度太不合理了,我很生气", "negative"),
("为什么要强制执行这个规定", "negative"),
("你们的做法让我很失望", "negative"),
("我要投诉这个处理方式", "negative"),
]
print("=" * 80)
print("情绪分析模型对比测试")
print("=" * 80)
# 检查模型是否存在
model_path = project_root / "models" / "sentiment-hr" / "final_model"
if not model_path.exists():
print(f"\n错误: 微调模型不存在于 {model_path}")
print("仅测试规则引擎\n")
model_tester = None
else:
print(f"\n加载微调模型: {model_path}")
try:
model_tester = SentimentModelTester(str(model_path))
except Exception as e:
print(f"加载模型失败: {e}")
print("仅测试规则引擎\n")
model_tester = None
# 测试规则引擎
print("\n【方法1: 规则引擎】")
print("-" * 80)
correct_rule = 0
for text, expected in test_cases:
result = rule_based_sentiment(text)
predicted = result['emotion']
confidence = result.get('confidence', 0)
method = result.get('method', 'unknown')
status = "✓" if predicted == expected else "✗"
if predicted == expected:
correct_rule += 1
print(f"{status} {text[:35]:35} -> {predicted:8} (期望: {expected:8}) 置信度: {confidence:.2f} 方法: {method}")
accuracy_rule = correct_rule / len(test_cases) * 100
print(f"\n准确率: {correct_rule}/{len(test_cases)} = {accuracy_rule:.1f}%")
# 测试微调模型
if model_tester:
print(f"\n【方法2: 微调模型】")
print("-" * 80)
correct_finetuned = 0
for text, expected in test_cases:
result = model_tester.predict(text)
predicted = result['emotion']
confidence = result.get('confidence', 0)
method = result.get('method', 'unknown')
status = "✓" if predicted == expected else "✗"
if predicted == expected:
correct_finetuned += 1
print(f"{status} {text[:35]:35} -> {predicted:8} (期望: {expected:8}) 置信度: {confidence:.2f} 方法: {method}")
accuracy_finetuned = correct_finetuned / len(test_cases) * 100
print(f"\n准确率: {correct_finetuned}/{len(test_cases)} = {accuracy_finetuned:.1f}%")
# 对比提升
improvement = accuracy_finetuned - accuracy_rule
print(f"\n相比规则引擎提升: {improvement:+.1f}%")
# 总结
print("\n" + "=" * 80)
print("总结")
print("=" * 80)
print(f"规则引擎准确率: {accuracy_rule:.1f}%")
if model_tester:
print(f"微调模型准确率: {accuracy_finetuned:.1f}%")
print(f"性能提升: {improvement:+.1f}%")
print("=" * 80)
if __name__ == "__main__":
main()