Spaces:

KarenYYH
/

hr-eval-api-v2

Sleeping

hr-eval-api-v2 / scripts /quick_test_sentiment.py

KarenYYH

Initial commit - HR Evaluation API v2

c8b1f17 8 days ago

6.53 kB

	"""
	快速测试情绪分析模型效果
	对比规则引擎和微调模型
	"""
	import sys
	import os
	from pathlib import Path

	# 添加项目路径
	project_root = Path(__file__).parent.parent
	sys.path.insert(0, str(project_root))

	# 设置环境变量，避免模块导入问题
	os.environ['PYTHONPATH'] = str(project_root)

	# 导入必要的模块
	import paddle
	from paddlenlp.transformers import AutoModelForSequenceClassification, AutoTokenizer


	class SentimentModelTester:
	"""情绪模型测试器"""

	def __init__(self, model_path: str):
	"""初始化模型"""
	print(f"加载模型: {model_path}")
	self.tokenizer = AutoTokenizer.from_pretrained(model_path)
	self.model = AutoModelForSequenceClassification.from_pretrained(model_path)
	self.model.eval()
	self.label_map = {0: 'positive', 1: 'neutral', 2: 'negative'}

	def predict(self, text: str) -> dict:
	"""预测单条文本"""
	encoded = self.tokenizer(
	text,
	max_length=128,
	padding='max_length',
	truncation=True,
	return_tensors='pd'
	)

	with paddle.no_grad():
	logits = self.model(
	encoded['input_ids'],
	token_type_ids=encoded['token_type_ids']
	)

	probs = paddle.nn.functional.softmax(logits, axis=1).numpy()[0]
	pred_id = int(probs.argmax())

	return {
	'text': text,
	'emotion': self.label_map[pred_id],
	'confidence': float(probs[pred_id]),
	'probabilities': {
	self.label_map[i]: float(prob)
	for i, prob in enumerate(probs)
	},
	'method': 'finetuned_model'
	}


	def rule_based_sentiment(text: str) -> dict:
	"""基于规则的情绪分析（简化版）"""
	positive_keywords = ['好', '满意', '喜欢', '谢谢', '感谢', '可以', '行', '没问题',
	'支持', '理解', '配合', '接受', '认可', '专业', '高效']
	negative_keywords = ['不', '没', '不行', '不好', '讨厌', '烦', '生气', '愤怒',
	'不满', '投诉', '举报', '错误', '失败', '拒绝', '反对']

	pos_count = sum(1 for kw in positive_keywords if kw in text)
	neg_count = sum(1 for kw in negative_keywords if kw in text)

	if pos_count > neg_count:
	emotion = 'positive'
	confidence = min(0.9, 0.6 + pos_count * 0.1)
	elif neg_count > pos_count:
	emotion = 'negative'
	confidence = min(0.9, 0.6 + neg_count * 0.1)
	else:
	emotion = 'neutral'
	confidence = 0.5

	return {
	'emotion': emotion,
	'confidence': confidence,
	'method': 'rule_engine'
	}


	def main():
	"""主测试函数"""
	# HR场景测试用例
	test_cases = [
	# 积极情绪
	("好的，谢谢你的帮助，非常满意！", "positive"),
	("没问题，我理解公司的规定", "positive"),
	("太好了，感谢你的解答", "positive"),
	("流程很规范，我很认可", "positive"),
	("专业高效的回复，感谢支持", "positive"),

	# 中性情绪
	("您好，请问有什么可以帮您？", "neutral"),
	("请问申请年假需要什么材料", "neutral"),
	("我需要了解培训的具体时间", "neutral"),
	("好的，我知道了", "neutral"),
	("请问还有什么需要补充的吗", "neutral"),

	# 消极情绪
	("我对这个处理结果很不满意", "negative"),
	("这个制度太不合理了，我很生气", "negative"),
	("为什么要强制执行这个规定", "negative"),
	("你们的做法让我很失望", "negative"),
	("我要投诉这个处理方式", "negative"),
	]

	print("=" * 80)
	print("情绪分析模型对比测试")
	print("=" * 80)

	# 检查模型是否存在
	model_path = project_root / "models" / "sentiment-hr" / "final_model"

	if not model_path.exists():
	print(f"\n错误: 微调模型不存在于 {model_path}")
	print("仅测试规则引擎\n")
	model_tester = None
	else:
	print(f"\n加载微调模型: {model_path}")
	try:
	model_tester = SentimentModelTester(str(model_path))
	except Exception as e:
	print(f"加载模型失败: {e}")
	print("仅测试规则引擎\n")
	model_tester = None

	# 测试规则引擎
	print("\n【方法1: 规则引擎】")
	print("-" * 80)
	correct_rule = 0
	for text, expected in test_cases:
	result = rule_based_sentiment(text)
	predicted = result['emotion']
	confidence = result.get('confidence', 0)
	method = result.get('method', 'unknown')
	status = "✓" if predicted == expected else "✗"
	if predicted == expected:
	correct_rule += 1
	print(f"{status} {text[:35]:35} -> {predicted:8} (期望: {expected:8}) 置信度: {confidence:.2f} 方法: {method}")

	accuracy_rule = correct_rule / len(test_cases) * 100
	print(f"\n准确率: {correct_rule}/{len(test_cases)} = {accuracy_rule:.1f}%")

	# 测试微调模型
	if model_tester:
	print(f"\n【方法2: 微调模型】")
	print("-" * 80)
	correct_finetuned = 0
	for text, expected in test_cases:
	result = model_tester.predict(text)
	predicted = result['emotion']
	confidence = result.get('confidence', 0)
	method = result.get('method', 'unknown')
	status = "✓" if predicted == expected else "✗"
	if predicted == expected:
	correct_finetuned += 1
	print(f"{status} {text[:35]:35} -> {predicted:8} (期望: {expected:8}) 置信度: {confidence:.2f} 方法: {method}")

	accuracy_finetuned = correct_finetuned / len(test_cases) * 100
	print(f"\n准确率: {correct_finetuned}/{len(test_cases)} = {accuracy_finetuned:.1f}%")

	# 对比提升
	improvement = accuracy_finetuned - accuracy_rule
	print(f"\n相比规则引擎提升: {improvement:+.1f}%")

	# 总结
	print("\n" + "=" * 80)
	print("总结")
	print("=" * 80)
	print(f"规则引擎准确率: {accuracy_rule:.1f}%")
	if model_tester:
	print(f"微调模型准确率: {accuracy_finetuned:.1f}%")
	print(f"性能提升: {improvement:+.1f}%")
	print("=" * 80)


	if __name__ == "__main__":
	main()