SarahXia0405 commited on
Commit
337c831
·
verified ·
1 Parent(s): 6a691fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -1
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  from typing import List, Dict, Tuple, Optional
3
 
4
  import gradio as gr
@@ -192,6 +193,72 @@ def describe_cognitive_state(state: Optional[Dict[str, int]]) -> str:
192
  return "mixed or uncertain cognitive state."
193
 
194
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  # ---------- 构建 messages ----------
196
  def build_messages(
197
  user_message: str,
@@ -617,7 +684,7 @@ with gr.Blocks(title="Clare – Hanbridge AI Teaching Assistant") as demo:
617
  lines=8,
618
  )
619
 
620
- # 主对话逻辑:更新弱项 + 认知状态 + 调用 Clare
621
  def respond(
622
  message,
623
  chat_history,
@@ -629,9 +696,37 @@ with gr.Blocks(title="Clare – Hanbridge AI Teaching Assistant") as demo:
629
  learning_mode_val,
630
  doc_type_val,
631
  ):
 
632
  weaknesses = update_weaknesses_from_message(message, weaknesses or [])
633
  cognitive_state = update_cognitive_state_from_message(message, cognitive_state)
634
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
635
  answer, new_history = chat_with_clare(
636
  message=message,
637
  history=chat_history,
 
1
  import os
2
+ import re
3
  from typing import List, Dict, Tuple, Optional
4
 
5
  import gradio as gr
 
193
  return "mixed or uncertain cognitive state."
194
 
195
 
196
+ # ---------- Same Question Check helpers ----------
197
+ def _normalize_text(text: str) -> str:
198
+ """
199
+ 将文本转为小写、去除标点和多余空格,用于简单相似度计算。
200
+ """
201
+ text = text.lower().strip()
202
+ # 去掉标点符号,只保留字母数字和空格
203
+ text = re.sub(r"[^\w\s]", " ", text)
204
+ # 合并多余空格
205
+ text = re.sub(r"\s+", " ", text)
206
+ return text
207
+
208
+
209
+ def _jaccard_similarity(a: str, b: str) -> float:
210
+ tokens_a = set(a.split())
211
+ tokens_b = set(b.split())
212
+ if not tokens_a or not tokens_b:
213
+ return 0.0
214
+ return len(tokens_a & tokens_b) / len(tokens_a | tokens_b)
215
+
216
+
217
+ def find_similar_past_question(
218
+ message: str,
219
+ history: List[Tuple[str, str]],
220
+ similarity_threshold: float = 0.8,
221
+ max_turns_to_check: int = 6,
222
+ ) -> Optional[Tuple[str, str, float]]:
223
+ """
224
+ 在最近若干轮历史对话中查找与当前问题相似的既往问题。
225
+
226
+ 返回:
227
+ (past_question, past_answer, similarity) 或 None
228
+ """
229
+ norm_msg = _normalize_text(message)
230
+ if not norm_msg:
231
+ return None
232
+
233
+ best_sim = 0.0
234
+ best_pair: Optional[Tuple[str, str]] = None
235
+ checked = 0
236
+
237
+ # 从最近一轮往前看
238
+ for user_q, assistant_a in reversed(history):
239
+ checked += 1
240
+ if checked > max_turns_to_check:
241
+ break
242
+
243
+ norm_hist_q = _normalize_text(user_q)
244
+ if not norm_hist_q:
245
+ continue
246
+
247
+ # 完全相同直接返回
248
+ if norm_msg == norm_hist_q:
249
+ return user_q, assistant_a, 1.0
250
+
251
+ sim = _jaccard_similarity(norm_msg, norm_hist_q)
252
+ if sim > best_sim:
253
+ best_sim = sim
254
+ best_pair = (user_q, assistant_a)
255
+
256
+ if best_pair and best_sim >= similarity_threshold:
257
+ return best_pair[0], best_pair[1], best_sim
258
+
259
+ return None
260
+
261
+
262
  # ---------- 构建 messages ----------
263
  def build_messages(
264
  user_message: str,
 
684
  lines=8,
685
  )
686
 
687
+ # 主对话逻辑:Same Question Check + 更新弱项 + 认知状态 + 调用 Clare
688
  def respond(
689
  message,
690
  chat_history,
 
696
  learning_mode_val,
697
  doc_type_val,
698
  ):
699
+ # 先更新弱项和认知状态
700
  weaknesses = update_weaknesses_from_message(message, weaknesses or [])
701
  cognitive_state = update_cognitive_state_from_message(message, cognitive_state)
702
 
703
+ # ---------- Same Question Check ----------
704
+ dup = find_similar_past_question(message, chat_history)
705
+ if dup is not None:
706
+ past_q, past_a, sim = dup
707
+ # 直接复用之前回答,并给一个简短提示
708
+ prefix_en = (
709
+ "I noticed this question is very similar to one you asked earlier, "
710
+ "so I'm showing the previous explanation again. "
711
+ "If there's a specific part that's still unclear, tell me and I can "
712
+ "re-explain it in a different way.\n\n"
713
+ "**Earlier answer:**\n"
714
+ )
715
+ prefix_zh = (
716
+ "我注意到你现在的问题和之前问过的非常相似,"
717
+ "所以我先把当时的回答再展示一次。"
718
+ "如果还有具体不清楚的地方,可以告诉我,我会换一种方式解释。\n\n"
719
+ "**之前的回答:**\n"
720
+ )
721
+ if language_pref_val == "中文":
722
+ answer = prefix_zh + past_a
723
+ else:
724
+ answer = prefix_en + past_a
725
+
726
+ new_history = chat_history + [(message, answer)]
727
+ return "", new_history, weaknesses, cognitive_state
728
+
729
+ # ---------- 正常调用 Clare ----------
730
  answer, new_history = chat_with_clare(
731
  message=message,
732
  history=chat_history,