liumaolin commited on
Commit
6556ced
·
1 Parent(s): 469433f

Refactor `test_llm_dialogue.py` to enhance multi-dataset testing for LLM dialogue

Browse files

- Introduce multiple test datasets covering diverse topics in both Chinese and English.
- Replace `user_questions` with structured `test_datasets`.
- Add `_get_prompt_by_language` for language-specific prompt handling.
- Integrate `create_langchain_pipeline` for improved pipeline creation and execution logic.

Files changed (1) hide show
  1. tests/test_llm_dialogue.py +109 -28
tests/test_llm_dialogue.py CHANGED
@@ -16,6 +16,7 @@ if lib_path.exists() and lib_path.as_posix() not in sys.path:
16
 
17
  from voice_dialogue.config import paths
18
  from voice_dialogue.config.llm_config import get_llm_model_params
 
19
 
20
  CHINESE_SYSTEM_PROMPT = (
21
  "你是善于模拟真实的思考过程的AI助手。"
@@ -41,7 +42,7 @@ class TestLLMDialogue(unittest.TestCase):
41
  self.history_store = {}
42
 
43
  model_path = paths.LLM_MODELS_PATH / 'qwen' / 'Qwen3-8B-Q6_K.gguf'
44
- langchain_instance = ChatLlamaCpp(model_path=model_path.as_posix(), **model_params)
45
 
46
  system_message = SystemMessage(content=CHINESE_SYSTEM_PROMPT)
47
  human_message = HumanMessagePromptTemplate.from_template("{input}")
@@ -51,27 +52,96 @@ class TestLLMDialogue(unittest.TestCase):
51
  human_message
52
  ])
53
 
54
- chain = prompt | langchain_instance
55
  self.chain_with_history = RunnableWithMessageHistory(chain, self.get_session_history,
56
  history_messages_key='history')
57
  self.warmup()
58
 
59
- # 连续对话测试问题集
60
- self.user_questions = [
61
- # 第1轮:开放性话题引入
62
- "最近人工智能技术发展很快,你觉得AI对我们日常生活带来了哪些改变?",
63
-
64
- # 第2轮:基于前一个回答的深入探讨
65
- "你刚才提到的这些改变中,哪一个你认为是最重要的?为什么?",
66
-
67
- # 第3轮:转向具体场景和个人观点
68
- "如果让你选择一个AI应用来帮助解决教育领域的问题,你会选择什么?具体怎么实现?",
69
-
70
- # 第4轮:挑战性问题,测试逻辑思维
71
- "但是也有人担心AI在教育中会让学生过度依赖技术,失去独立思考能力。你怎么看待这个担忧?",
72
-
73
- # 第5轮:总结性问题,测试整合能力
74
- "综合我们刚才讨论的内容,你认为在AI快速发展的时代,普通人应该如何适应和准备?"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  ]
76
 
77
  def get_session_history(self, session_id: str) -> InMemoryChatMessageHistory:
@@ -97,14 +167,25 @@ class TestLLMDialogue(unittest.TestCase):
97
  for chunk in self.chain_with_history.stream(input={'input': 'This is a warmup step.'}, config=config):
98
  pass
99
 
 
 
 
 
 
 
 
100
  def test_dialogue(self):
101
- session_id = 'test_dialogue'
102
- for user_question in self.user_questions:
103
- print('User question:', user_question)
104
- config = {"configurable": {"session_id": session_id}}
105
- print(f'LLM answer: ', end='')
106
- for chunk in self.chain_with_history.stream(input={'input': user_question}, config=config):
107
- print(chunk.content, end='')
108
- print()
109
- print('-' * 80)
110
- print()
 
 
 
 
 
16
 
17
  from voice_dialogue.config import paths
18
  from voice_dialogue.config.llm_config import get_llm_model_params
19
+ from voice_dialogue.services.text.processor import create_langchain_pipeline
20
 
21
  CHINESE_SYSTEM_PROMPT = (
22
  "你是善于模拟真实的思考过程的AI助手。"
 
42
  self.history_store = {}
43
 
44
  model_path = paths.LLM_MODELS_PATH / 'qwen' / 'Qwen3-8B-Q6_K.gguf'
45
+ self.langchain_instance = ChatLlamaCpp(model_path=model_path.as_posix(), **model_params)
46
 
47
  system_message = SystemMessage(content=CHINESE_SYSTEM_PROMPT)
48
  human_message = HumanMessagePromptTemplate.from_template("{input}")
 
52
  human_message
53
  ])
54
 
55
+ chain = prompt | self.langchain_instance
56
  self.chain_with_history = RunnableWithMessageHistory(chain, self.get_session_history,
57
  history_messages_key='history')
58
  self.warmup()
59
 
60
+ self.test_datasets = [
61
+ {
62
+ 'session_id': 'test_dataset_1',
63
+ 'language': 'zh',
64
+ 'questions': [
65
+ # 第1轮:开放性话题引入
66
+ "最近人工智能技术发展很快,你觉得AI对我们日常生活带来了哪些改变?",
67
+
68
+ # 第2轮:基于前一个回答的深入探讨
69
+ "你刚才提到的这些改变中,哪一个你认为是最重要的?为什么?",
70
+
71
+ # 第3轮:转向具体场景和个人观点
72
+ "如果让你选择一个AI应用来帮助解决教育领域的问题,你会选择什么?具体怎么实现?",
73
+
74
+ # 第4轮:挑战性问题,测试逻辑思维
75
+ "但是也有人担心AI在教育中会让学生过度依赖技术,失去独立思考能力。你怎么看待这个担忧?",
76
+
77
+ # 第5轮:总结性问题,测试整合能力
78
+ "综合我们刚才讨论的内容,你认为在AI快速发展的时代,普通人应该如何适应和准备?"
79
+ ]
80
+ },
81
+ {
82
+ 'session_id': 'test_dataset_2',
83
+ 'language': 'zh',
84
+ 'questions': [
85
+ # 第1轮:开放性话题引入
86
+ "近年来环境问题越来越受到关注,你认为我们个人在日常生活中可以为环保做些什么?",
87
+ # 第2轮:基于前一个回答的深入探讨
88
+ "在这些环保行为中,你觉得哪一种最容易被大家接受和实践?原因是什么?",
89
+ # 第3rd轮:转向具体场景和个人观点
90
+ "如果让你设计一个推广垃圾分类的社区活动,你会怎么做?",
91
+ # 第4轮:挑战性问题,测试逻辑思维
92
+ "有些人认为,个人的环保努力相比工业污染只是杯水车薪,这种看法你怎么看?",
93
+ # 第5轮:总结性问题,测试整合能力
94
+ "总的来说,为了实现可持续发展,你认为政府、企业和个人应该分别扮演什么样的角色?"
95
+ ]
96
+ },
97
+ {
98
+ 'session_id': 'test_dataset_3',
99
+ 'language': 'zh',
100
+ 'questions': [
101
+ # 第1轮:开放性话题引入
102
+ "随着科技的发展,未来的工作模式可能会发生很大变化,你想象中未来的工作是什么样的?",
103
+ # 第2轮:基于前一个回答的深入探讨
104
+ "你提到的远程办公和灵活工作时间,对员工和公司来说,各自最大的好处和���战是什么?",
105
+ # 第3轮:转向具体场景和个人观点
106
+ "假设你是一名公司经理,你会如何利用技术工具来提高远程团队的协作效率?",
107
+ # 第4轮:挑战性问题,测试逻辑思维
108
+ "自动化和人工智能可能会取代一部分人的工作,这引起了很多人的焦虑。你认为我们应该如何应对这种“失业恐慌”?",
109
+ # 第5轮:总结性问题,测试整合能力
110
+ "面对未来工作的种种不确定性,你认为现在的年轻人最需要培养哪些核心能力?"
111
+ ]
112
+ },
113
+ {
114
+ 'session_id': 'test_dataset_4',
115
+ 'language': 'en',
116
+ 'questions': [
117
+ # Round 1: Open-ended topic introduction
118
+ "Mental health has become a more prominent topic recently. What are some common stressors you think people face in modern society?",
119
+ # Round 2: In-depth discussion based on the previous answer
120
+ "Of the stressors you mentioned, which one do you believe has the most significant impact on people's well-being, and why?",
121
+ # Round 3: Shift to specific scenarios and personal opinions
122
+ "If you were to design a mobile app to help people manage stress, what key features would you include?",
123
+ # Round 4: Challenging question, testing logical thinking
124
+ "Some argue that the increased focus on mental health can sometimes lead to over-diagnosis or the medicalization of normal emotions. What are your thoughts on this concern?",
125
+ # Round 5: Summarizing question, testing integration ability
126
+ "To sum up, what kind of societal changes do you think would be most effective in promoting better mental health for everyone?"
127
+ ]
128
+ },
129
+ {
130
+ 'session_id': 'test_dataset_5',
131
+ 'language': 'en',
132
+ 'questions': [
133
+ # Round 1: Open-ended topic introduction
134
+ "Humanity has always been fascinated by space. What do you see as the most exciting developments in space exploration right now?",
135
+ # Round 2: In-depth discussion based on the previous answer
136
+ "You mentioned the push towards colonizing Mars. What do you think are the biggest scientific and ethical challenges we need to overcome for that to become a reality?",
137
+ # Round 3: Shift to specific scenarios and personal opinions
138
+ "If you were given the chance to send a single message to an extraterrestrial civilization, what would it say?",
139
+ # Round 4: Challenging question, testing logical thinking
140
+ "There's a debate about whether the vast amounts of money spent on space exploration could be better used to solve problems here on Earth. How would you justify the continued investment in space programs?",
141
+ # Round 5: Summarizing question, testing integration ability
142
+ "Considering everything we've discussed, what long-term benefits do you believe humanity will gain from its ventures into space?"
143
+ ]
144
+ }
145
  ]
146
 
147
  def get_session_history(self, session_id: str) -> InMemoryChatMessageHistory:
 
167
  for chunk in self.chain_with_history.stream(input={'input': 'This is a warmup step.'}, config=config):
168
  pass
169
 
170
+ def _get_prompt_by_language(self, language: str) -> str:
171
+ """根据语言获取对应的 prompt"""
172
+ if language == "zh":
173
+ return CHINESE_SYSTEM_PROMPT
174
+ else:
175
+ return ENGLISH_SYSTEM_PROMPT
176
+
177
  def test_dialogue(self):
178
+ for test_dataset in self.test_datasets:
179
+ session_id = test_dataset.get('session_id')
180
+ print(f'Test dataset: {session_id}')
181
+ print('=' * 80)
182
+ for question in test_dataset.get('questions'):
183
+ print('Test question:', question)
184
+ config = {"configurable": {"session_id": session_id}}
185
+ prompt = self._get_prompt_by_language(test_dataset.get('language'))
186
+ pipeline = create_langchain_pipeline(self.langchain_instance, prompt, self.get_session_history)
187
+ print(f'LLM answer: ', end='')
188
+ for chunk in pipeline.stream(input={'input': question}, config=config):
189
+ print(chunk.content, end='')
190
+ print()
191
+ print('-' * 80)