SarahXia0405 commited on
Commit
d4f2575
·
verified ·
1 Parent(s): 6e941de

Update api/clare_core.py

Browse files
Files changed (1) hide show
  1. api/clare_core.py +98 -276
api/clare_core.py CHANGED
@@ -1,4 +1,5 @@
1
- # clare_core.py
 
2
  import re
3
  import math
4
  from typing import List, Dict, Tuple, Optional
@@ -13,17 +14,23 @@ from .config import (
13
  CLARE_SYSTEM_PROMPT,
14
  LEARNING_MODE_INSTRUCTIONS,
15
  )
 
16
  from langsmith import traceable
17
  from langsmith.run_helpers import set_run_metadata
18
 
19
 
 
 
 
 
 
 
 
 
 
20
 
21
  # ---------- syllabus 解析 ----------
22
  def parse_syllabus_docx(file_path: str, max_lines: int = 15) -> List[str]:
23
- """
24
- 非常简单的 syllabus 解析:取前若干个非空段落当作主题行。
25
- 只是为了给 Clare 一些课程上下文,不追求超精确结构。
26
- """
27
  topics: List[str] = []
28
  try:
29
  doc = Document(file_path)
@@ -36,7 +43,6 @@ def parse_syllabus_docx(file_path: str, max_lines: int = 15) -> List[str]:
36
  break
37
  except Exception as e:
38
  topics = [f"[Error parsing syllabus: {e}]"]
39
-
40
  return topics
41
 
42
 
@@ -75,7 +81,7 @@ MASTERY_KEYWORDS = [
75
 
76
 
77
  def update_weaknesses_from_message(message: str, weaknesses: List[str]) -> List[str]:
78
- lower_msg = message.lower()
79
  if any(k in lower_msg for k in WEAKNESS_KEYWORDS):
80
  weaknesses = weaknesses or []
81
  weaknesses.append(message)
@@ -86,15 +92,10 @@ def update_cognitive_state_from_message(
86
  message: str,
87
  state: Optional[Dict[str, int]],
88
  ) -> Dict[str, int]:
89
- """
90
- 简单认知状态统计:
91
- - 遇到困惑类关键词 → confusion +1
92
- - 遇到掌握类关键词 → mastery +1
93
- """
94
  if state is None:
95
  state = {"confusion": 0, "mastery": 0}
96
 
97
- lower_msg = message.lower()
98
  if any(k in lower_msg for k in WEAKNESS_KEYWORDS):
99
  state["confusion"] = state.get("confusion", 0) + 1
100
  if any(k in lower_msg for k in MASTERY_KEYWORDS):
@@ -120,76 +121,54 @@ def build_session_memory_summary(
120
  history: List[Tuple[str, str]],
121
  weaknesses: Optional[List[str]],
122
  cognitive_state: Optional[Dict[str, int]],
123
- max_questions: int = 4,
124
- max_weaknesses: int = 3,
125
  ) -> str:
126
- """
127
- 只在本次会话内使用的“记忆摘要”:
128
- - 最近几条学生提问
129
- - 最近几条学生觉得难的问题
130
- - 当前的认知状态描述
131
- """
132
  parts: List[str] = []
133
 
134
- # 最近几条提问(只取 student)
135
  if history:
136
  recent_qs = [u for (u, _a) in history[-max_questions:]]
137
  trimmed_qs = []
138
  for q in recent_qs:
139
- q = q.strip()
140
  if len(q) > 120:
141
  q = q[:117] + "..."
142
  trimmed_qs.append(q)
143
  if trimmed_qs:
144
- parts.append("Recent student questions: " + " | ".join(trimmed_qs))
145
 
146
- # 最近几条“弱项”
147
  if weaknesses:
148
  recent_weak = weaknesses[-max_weaknesses:]
149
  trimmed_weak = []
150
  for w in recent_weak:
151
- w = w.strip()
152
  if len(w) > 120:
153
  w = w[:117] + "..."
154
  trimmed_weak.append(w)
155
- parts.append("Recent difficulties mentioned by the student: " + " | ".join(trimmed_weak))
 
156
 
157
- # 当前认知状态
158
  if cognitive_state:
159
- parts.append("Current cognitive state: " + describe_cognitive_state(cognitive_state))
160
 
161
  if not parts:
162
- return (
163
- "No prior session memory. You can treat this as an early stage of the conversation; "
164
- "start with simple explanations and ask a quick check-up question."
165
- )
166
 
167
  return " | ".join(parts)
168
 
169
 
170
- # ---------- 语言检测(用于 Auto 模式) ----------
171
  def detect_language(message: str, preference: str) -> str:
172
- """
173
- preference:
174
- - 'English' → 强制英文
175
- - '中文' → 强制中文
176
- - 'Auto' → 检测文本是否包含中文字符
177
- """
178
  if preference in ("English", "中文"):
179
  return preference
180
- # Auto 模式下简单检测是否含有中文字符
181
- if re.search(r"[\u4e00-\u9fff]", message):
182
  return "中文"
183
  return "English"
184
 
185
 
186
  def get_empty_input_prompt(lang: str) -> str:
187
- """
188
- 空输入时的友好提示,根据语言返回中/英文。
189
- """
190
  if lang == "中文":
191
  return "请先输入一个问题或想法,再按回车发送,我才能帮到你哦。"
192
- # 默认英文
193
  return "Please type a question or some text before sending, then hit Enter."
194
 
195
 
@@ -198,10 +177,6 @@ def build_error_message(
198
  lang: str,
199
  op: str = "chat",
200
  ) -> str:
201
- """
202
- 针对不同操作类型(普通对话 / quiz / summary)和语言,生成友好的错误提示。
203
- 不把原始异常直接暴露给学生,只在后台打印。
204
- """
205
  if lang == "中文":
206
  prefix = {
207
  "chat": "抱歉,刚刚在和模型对话时出现了一点问题。",
@@ -210,7 +185,6 @@ def build_error_message(
210
  }.get(op, "抱歉,刚刚出现了一点问题。")
211
  return prefix + " 请稍后再试一次,或者换个问法试试。"
212
 
213
- # 默认英文
214
  prefix_en = {
215
  "chat": "Sorry, I ran into a problem while talking to the model.",
216
  "quiz": "Sorry, there was a problem while generating the quiz.",
@@ -240,12 +214,9 @@ def render_session_status(
240
  return "\n".join(lines)
241
 
242
 
243
- # ---------- Same Question Check helpers ----------
244
  def _normalize_text(text: str) -> str:
245
- """
246
- 将文本转为小写、去除标点和多余空格,用于简单相似度计算。
247
- """
248
- text = text.lower().strip()
249
  text = re.sub(r"[^\w\s]", " ", text)
250
  text = re.sub(r"\s+", " ", text)
251
  return text
@@ -256,7 +227,7 @@ def _jaccard_similarity(a: str, b: str) -> float:
256
  tokens_b = set(b.split())
257
  if not tokens_a or not tokens_b:
258
  return 0.0
259
- return len(tokens_a & tokens_b) / len(tokens_a | tokens_b)
260
 
261
 
262
  def cosine_similarity(a: List[float], b: List[float]) -> float:
@@ -269,11 +240,9 @@ def cosine_similarity(a: List[float], b: List[float]) -> float:
269
  return 0.0
270
  return dot / (norm_a * norm_b)
271
 
 
272
  @traceable(run_type="embedding", name="get_embedding")
273
  def get_embedding(text: str) -> Optional[List[float]]:
274
- """
275
- 调用 OpenAI Embedding API,将文本编码为向量。
276
- """
277
  try:
278
  resp = client.embeddings.create(
279
  model=EMBEDDING_MODEL,
@@ -281,7 +250,6 @@ def get_embedding(text: str) -> Optional[List[float]]:
281
  )
282
  return resp.data[0].embedding
283
  except Exception as e:
284
- # 打到 Space 的 log,便于排查
285
  print(f"[Embedding error] {repr(e)}")
286
  return None
287
 
@@ -293,16 +261,10 @@ def find_similar_past_question(
293
  embedding_threshold: float = 0.85,
294
  max_turns_to_check: int = 6,
295
  ) -> Optional[Tuple[str, str, float]]:
296
- """
297
- 在最近若干轮历史对话中查找与当前问题相似的既往问题。
298
- 两级检测:先 Jaccard,再 Embedding。
299
- 返回 (past_question, past_answer, similarity_score) 或 None
300
- """
301
  norm_msg = _normalize_text(message)
302
  if not norm_msg:
303
  return None
304
 
305
- # 1) Jaccard
306
  best_sim_j = 0.0
307
  best_pair_j: Optional[Tuple[str, str]] = None
308
  checked = 0
@@ -327,7 +289,6 @@ def find_similar_past_question(
327
  if best_pair_j and best_sim_j >= jaccard_threshold:
328
  return best_pair_j[0], best_pair_j[1], best_sim_j
329
 
330
- # 2) Embedding 语义相似度
331
  if not history:
332
  return None
333
 
@@ -365,31 +326,20 @@ def safe_chat_completion(
365
  messages: List[Dict[str, str]],
366
  lang: str,
367
  op: str = "chat",
368
- temperature: float = 0.5,
369
  ) -> str:
370
- """
371
- 统一安全调用 OpenAI Chat Completion:
372
- - 最多尝试 2 次
373
- - 每次请求 timeout = 20 秒
374
- - 第一次用学生选择的模型;出错后,如果不是 DEFAULT_MODEL,则自动回退到 DEFAULT_MODEL 再试一次
375
- - 所有异常都会打印到后台 log,但对学生只返回友好的中/英文错误文案
376
- """
377
  preferred_model = model_name or DEFAULT_MODEL
378
  last_error: Optional[Exception] = None
379
 
380
  for attempt in range(2):
381
- # 第一次用学生指定模型,第二次(如果需要)切到默认模型
382
- if attempt == 0:
383
- current_model = preferred_model
384
- else:
385
- current_model = DEFAULT_MODEL
386
 
387
  try:
388
  resp = client.chat.completions.create(
389
  model=current_model,
390
  messages=messages,
391
  temperature=temperature,
392
- timeout=20, # 20 秒超时
393
  )
394
  return resp.choices[0].message.content
395
  except Exception as e:
@@ -398,16 +348,13 @@ def safe_chat_completion(
398
  f"failed with model={current_model}: {repr(e)}"
399
  )
400
  last_error = e
401
-
402
- # 如果已经用的是默认模型,或者已经是第二次尝试,就跳出循环
403
  if current_model == DEFAULT_MODEL or attempt == 1:
404
  break
405
 
406
- # 两次都失败,返回友好的错误文案
407
  return build_error_message(last_error or Exception("unknown error"), lang, op)
408
 
409
 
410
- # ---------- 构建 messages ----------
411
  def build_messages(
412
  user_message: str,
413
  history: List[Tuple[str, str]],
@@ -417,155 +364,79 @@ def build_messages(
417
  course_outline: Optional[List[str]],
418
  weaknesses: Optional[List[str]],
419
  cognitive_state: Optional[Dict[str, int]],
420
- rag_context: Optional[str] = None, # 新增:RAG 检索结果
421
  ) -> List[Dict[str, str]]:
422
- messages: List[Dict[str, str]] = [
423
- {"role": "system", "content": CLARE_SYSTEM_PROMPT}
424
- ]
 
425
 
426
- # 学习模式
427
  if learning_mode in LEARNING_MODE_INSTRUCTIONS:
428
- mode_instruction = LEARNING_MODE_INSTRUCTIONS[learning_mode]
429
- messages.append(
430
- {
431
- "role": "system",
432
- "content": f"Current learning mode: {learning_mode}. {mode_instruction}",
433
- }
434
- )
435
 
436
- # 课程大纲
437
  topics = course_outline if course_outline else DEFAULT_COURSE_TOPICS
438
- topics_text = " | ".join(topics)
439
- messages.append(
440
- {
441
- "role": "system",
442
- "content": (
443
- "Here is the course syllabus context. Use this to stay aligned "
444
- "with the course topics when answering: "
445
- + topics_text
446
- ),
447
- }
448
- )
449
 
450
- # 上传文件类型提示
451
  if doc_type and doc_type != "Syllabus":
452
- messages.append(
453
- {
454
- "role": "system",
455
- "content": (
456
- f"The student also uploaded a {doc_type} document as supporting material. "
457
- "You do not see the full content directly, but you may assume it is relevant "
458
- "to the same course and topics."
459
- ),
460
- }
461
- )
462
 
463
- # 学生弱项提示
464
  if weaknesses:
465
- weak_text = " | ".join(weaknesses[-5:])
466
- messages.append(
467
- {
468
- "role": "system",
469
- "content": (
470
- "The student seems to struggle with the following questions or topics. "
471
- "Be extra gentle and clear when these appear: " + weak_text
472
- ),
473
- }
474
- )
475
 
476
- # 认知状态提示
477
  if cognitive_state:
478
- confusion = cognitive_state.get("confusion", 0)
479
- mastery = cognitive_state.get("mastery", 0)
480
- if confusion >= 2 and confusion >= mastery + 1:
481
- messages.append(
482
- {
483
- "role": "system",
484
- "content": (
485
- "The student is currently under HIGH cognitive load. "
486
- "Use simpler language, shorter steps, and more concrete examples. "
487
- "Avoid long derivations in a single answer, and check understanding "
488
- "frequently."
489
- ),
490
- }
491
- )
492
- elif mastery >= 2 and mastery >= confusion + 1:
493
- messages.append(
494
- {
495
- "role": "system",
496
- "content": (
497
- "The student seems comfortable with the material. "
498
- "You may increase difficulty slightly, introduce deeper follow-up "
499
- "questions, and connect concepts across topics."
500
- ),
501
- }
502
- )
503
- else:
504
- messages.append(
505
- {
506
- "role": "system",
507
- "content": (
508
- "The student's cognitive state is mixed or uncertain. "
509
- "Keep explanations clear and moderately paced, and probe for "
510
- "understanding with short questions."
511
- ),
512
- }
513
- )
514
 
515
- # 语言偏好控制
516
- if language_preference == "English":
517
- messages.append(
518
- {"role": "system", "content": "Please answer in English."}
519
- )
520
- elif language_preference == "中文":
521
- messages.append(
522
- {"role": "system", "content": "请用中文回答学生的问题。"}
523
- )
524
-
525
- # Session 内记忆摘要
526
  session_memory_text = build_session_memory_summary(
527
  history=history,
528
  weaknesses=weaknesses,
529
  cognitive_state=cognitive_state,
 
 
530
  )
531
- messages.append(
532
- {
533
- "role": "system",
534
- "content": (
535
- "Here is a short summary of this session's memory (only within the current chat; "
536
- "it is not persisted across sessions). Use it to stay consistent with the "
537
- "student's previous questions, difficulties, and cognitive state: "
538
- + session_memory_text
539
- ),
540
- }
541
- )
542
 
543
- # RAG 检索结果
 
 
 
544
  if rag_context:
545
  messages.append(
546
  {
547
  "role": "system",
548
  "content": (
549
- "Here are some relevant excerpts from the course materials. "
550
- "Use them as the primary factual grounding when answering the student's question. "
551
- "If there is any conflict between these excerpts and your prior knowledge, "
552
- "prefer the excerpts.\n\n"
553
  + rag_context
554
  ),
555
  }
556
  )
557
 
558
- # 历史对话
559
- for user, assistant in history:
 
560
  messages.append({"role": "user", "content": user})
561
  if assistant is not None:
562
  messages.append({"role": "assistant", "content": assistant})
563
 
564
- # 当前输入
565
  messages.append({"role": "user", "content": user_message})
566
  return messages
567
 
568
- # 装饰器
569
  @traceable(run_type="chain", name="chat_with_clare")
570
  def chat_with_clare(
571
  message: str,
@@ -588,8 +459,6 @@ def chat_with_clare(
588
  except Exception as e:
589
  print(f"[LangSmith metadata error in chat_with_clare] {repr(e)}")
590
 
591
-
592
- # 构建 messages
593
  messages = build_messages(
594
  user_message=message,
595
  history=history,
@@ -602,16 +471,15 @@ def chat_with_clare(
602
  rag_context=rag_context,
603
  )
604
 
605
- # 统一安全调用
606
  answer = safe_chat_completion(
607
  model_name=model_name,
608
  messages=messages,
609
  lang=language_preference,
610
  op="chat",
611
- temperature=0.5,
612
  )
613
 
614
- history = history + [(message, answer)]
615
  return answer, history
616
 
617
 
@@ -626,16 +494,16 @@ def export_conversation(
626
  lines: List[str] = []
627
  lines.append("# Clare – Conversation Export\n")
628
  lines.append(f"- Learning mode: **{learning_mode_val}**\n")
629
- lines.append("- Course topics (short): " + "; ".join(course_outline[:5]) + "\n")
630
  lines.append(f"- Cognitive state snapshot: {describe_cognitive_state(cognitive_state)}\n")
631
 
632
  if weaknesses:
633
  lines.append("- Observed student difficulties:\n")
634
- for w in weaknesses[-5:]:
635
  lines.append(f" - {w}\n")
636
  lines.append("\n---\n\n")
637
 
638
- for user, assistant in history:
639
  lines.append(f"**Student:** {user}\n\n")
640
  lines.append(f"**Clare:** {assistant}\n\n")
641
  lines.append("---\n\n")
@@ -643,9 +511,7 @@ def export_conversation(
643
  return "".join(lines)
644
 
645
 
646
- # ---------- 生成 3 个 quiz 题目 ----------
647
- from langsmith import traceable
648
-
649
  @traceable(run_type="chain", name="generate_quiz_from_history")
650
  def generate_quiz_from_history(
651
  history: List[Tuple[str, str]],
@@ -656,11 +522,11 @@ def generate_quiz_from_history(
656
  language_preference: str,
657
  ) -> str:
658
  conversation_text = ""
659
- for user, assistant in history[-8:]:
660
  conversation_text += f"Student: {user}\nClare: {assistant}\n"
661
 
662
- topics_text = "; ".join(course_outline[:8])
663
- weakness_text = "; ".join(weaknesses[-5:]) if weaknesses else "N/A"
664
  cog_text = describe_cognitive_state(cognitive_state)
665
 
666
  messages = [
@@ -668,43 +534,21 @@ def generate_quiz_from_history(
668
  {
669
  "role": "system",
670
  "content": (
671
- "Now your task is to create a **short concept quiz** for the student. "
672
- "Based on the conversation and course topics, generate **3 questions** "
673
- "(a mix of multiple-choice and short-answer is fine). After listing the "
674
- "questions, provide an answer key at the end under a heading 'Answer Key'. "
675
- "Number the questions Q1, Q2, Q3. Adjust the difficulty according to the "
676
- "student's cognitive state."
677
  ),
678
  },
679
- {
680
- "role": "system",
681
- "content": f"Course topics: {topics_text}",
682
- },
683
- {
684
- "role": "system",
685
- "content": f"Student known difficulties: {weakness_text}",
686
- },
687
- {
688
- "role": "system",
689
- "content": f"Student cognitive state: {cog_text}",
690
- },
691
  {
692
  "role": "user",
693
- "content": (
694
- "Here is the recent conversation between you and the student:\n\n"
695
- + conversation_text
696
- + "\n\nPlease create the quiz now."
697
- ),
698
  },
699
  ]
700
 
701
  if language_preference == "中文":
702
- messages.append(
703
- {
704
- "role": "system",
705
- "content": "请用中文给出问题和答案。",
706
- }
707
- )
708
 
709
  quiz_text = safe_chat_completion(
710
  model_name=model_name,
@@ -716,7 +560,7 @@ def generate_quiz_from_history(
716
  return quiz_text
717
 
718
 
719
- # ---------- 概念总结(知识点摘要) ----------
720
  @traceable(run_type="chain", name="summarize_conversation")
721
  def summarize_conversation(
722
  history: List[Tuple[str, str]],
@@ -727,11 +571,11 @@ def summarize_conversation(
727
  language_preference: str,
728
  ) -> str:
729
  conversation_text = ""
730
- for user, assistant in history[-10:]:
731
  conversation_text += f"Student: {user}\nClare: {assistant}\n"
732
 
733
- topics_text = "; ".join(course_outline[:8])
734
- weakness_text = "; ".join(weaknesses[-5:]) if weaknesses else "N/A"
735
  cog_text = describe_cognitive_state(cognitive_state)
736
 
737
  messages = [
@@ -739,43 +583,21 @@ def summarize_conversation(
739
  {
740
  "role": "system",
741
  "content": (
742
- "Your task now is to produce a **concept-only summary** of this tutoring "
743
- "session. Only include knowledge points, definitions, key formulas, "
744
- "examples, and main takeaways. Do **not** include any personal remarks, "
745
- "jokes, or off-topic chat. Write in clear bullet points. This summary "
746
- "should be suitable for the student to paste into their study notes. "
747
- "Take into account what the student struggled with and their cognitive state."
748
  ),
749
  },
750
- {
751
- "role": "system",
752
- "content": f"Course topics context: {topics_text}",
753
- },
754
- {
755
- "role": "system",
756
- "content": f"Student known difficulties: {weakness_text}",
757
- },
758
- {
759
- "role": "system",
760
- "content": f"Student cognitive state: {cog_text}",
761
- },
762
  {
763
  "role": "user",
764
- "content": (
765
- "Here is the recent conversation between you and the student:\n\n"
766
- + conversation_text
767
- + "\n\nPlease summarize only the concepts and key ideas learned."
768
- ),
769
  },
770
  ]
771
 
772
  if language_preference == "中文":
773
- messages.append(
774
- {
775
- "role": "system",
776
- "content": "请用中文给出要点总结,只保留知识点和结论,使用条目符号。"
777
- }
778
- )
779
 
780
  summary_text = safe_chat_completion(
781
  model_name=model_name,
 
1
+ # api/clare_core.py
2
+ import os
3
  import re
4
  import math
5
  from typing import List, Dict, Tuple, Optional
 
14
  CLARE_SYSTEM_PROMPT,
15
  LEARNING_MODE_INSTRUCTIONS,
16
  )
17
+
18
  from langsmith import traceable
19
  from langsmith.run_helpers import set_run_metadata
20
 
21
 
22
+ # ----------------------------
23
+ # Speed/Prompt controls
24
+ # ----------------------------
25
+ # ✅ limit how much history we send to the model (token reduction = speed up)
26
+ MAX_HISTORY_TURNS = int(os.getenv("CLARE_MAX_HISTORY_TURNS", "6").strip()) # user+assistant pairs
27
+ MAX_TOPICS = int(os.getenv("CLARE_MAX_TOPICS", "10").strip())
28
+ MAX_WEAKNESSES = int(os.getenv("CLARE_MAX_WEAKNESSES", "3").strip())
29
+ MAX_SESSION_MEMORY_QS = int(os.getenv("CLARE_MAX_SESSION_MEMORY_QS", "3").strip())
30
+
31
 
32
  # ---------- syllabus 解析 ----------
33
  def parse_syllabus_docx(file_path: str, max_lines: int = 15) -> List[str]:
 
 
 
 
34
  topics: List[str] = []
35
  try:
36
  doc = Document(file_path)
 
43
  break
44
  except Exception as e:
45
  topics = [f"[Error parsing syllabus: {e}]"]
 
46
  return topics
47
 
48
 
 
81
 
82
 
83
  def update_weaknesses_from_message(message: str, weaknesses: List[str]) -> List[str]:
84
+ lower_msg = (message or "").lower()
85
  if any(k in lower_msg for k in WEAKNESS_KEYWORDS):
86
  weaknesses = weaknesses or []
87
  weaknesses.append(message)
 
92
  message: str,
93
  state: Optional[Dict[str, int]],
94
  ) -> Dict[str, int]:
 
 
 
 
 
95
  if state is None:
96
  state = {"confusion": 0, "mastery": 0}
97
 
98
+ lower_msg = (message or "").lower()
99
  if any(k in lower_msg for k in WEAKNESS_KEYWORDS):
100
  state["confusion"] = state.get("confusion", 0) + 1
101
  if any(k in lower_msg for k in MASTERY_KEYWORDS):
 
121
  history: List[Tuple[str, str]],
122
  weaknesses: Optional[List[str]],
123
  cognitive_state: Optional[Dict[str, int]],
124
+ max_questions: int = 3,
125
+ max_weaknesses: int = 2,
126
  ) -> str:
 
 
 
 
 
 
127
  parts: List[str] = []
128
 
 
129
  if history:
130
  recent_qs = [u for (u, _a) in history[-max_questions:]]
131
  trimmed_qs = []
132
  for q in recent_qs:
133
+ q = (q or "").strip()
134
  if len(q) > 120:
135
  q = q[:117] + "..."
136
  trimmed_qs.append(q)
137
  if trimmed_qs:
138
+ parts.append("Recent questions: " + " | ".join(trimmed_qs))
139
 
 
140
  if weaknesses:
141
  recent_weak = weaknesses[-max_weaknesses:]
142
  trimmed_weak = []
143
  for w in recent_weak:
144
+ w = (w or "").strip()
145
  if len(w) > 120:
146
  w = w[:117] + "..."
147
  trimmed_weak.append(w)
148
+ if trimmed_weak:
149
+ parts.append("Recent difficulties: " + " | ".join(trimmed_weak))
150
 
 
151
  if cognitive_state:
152
+ parts.append("Cognitive state: " + describe_cognitive_state(cognitive_state))
153
 
154
  if not parts:
155
+ return "No prior session memory."
 
 
 
156
 
157
  return " | ".join(parts)
158
 
159
 
160
+ # ---------- 语言检测 ----------
161
  def detect_language(message: str, preference: str) -> str:
 
 
 
 
 
 
162
  if preference in ("English", "中文"):
163
  return preference
164
+ if re.search(r"[\u4e00-\u9fff]", message or ""):
 
165
  return "中文"
166
  return "English"
167
 
168
 
169
  def get_empty_input_prompt(lang: str) -> str:
 
 
 
170
  if lang == "中文":
171
  return "请先输入一个问题或想法,再按回车发送,我才能帮到你哦。"
 
172
  return "Please type a question or some text before sending, then hit Enter."
173
 
174
 
 
177
  lang: str,
178
  op: str = "chat",
179
  ) -> str:
 
 
 
 
180
  if lang == "中文":
181
  prefix = {
182
  "chat": "抱歉,刚刚在和模型对话时出现了一点问题。",
 
185
  }.get(op, "抱歉,刚刚出现了一点问题。")
186
  return prefix + " 请稍后再试一次,或者换个问法试试。"
187
 
 
188
  prefix_en = {
189
  "chat": "Sorry, I ran into a problem while talking to the model.",
190
  "quiz": "Sorry, there was a problem while generating the quiz.",
 
214
  return "\n".join(lines)
215
 
216
 
217
+ # ---------- Similarity helpers (kept; not called by server currently) ----------
218
  def _normalize_text(text: str) -> str:
219
+ text = (text or "").lower().strip()
 
 
 
220
  text = re.sub(r"[^\w\s]", " ", text)
221
  text = re.sub(r"\s+", " ", text)
222
  return text
 
227
  tokens_b = set(b.split())
228
  if not tokens_a or not tokens_b:
229
  return 0.0
230
+ return len(a_set := (tokens_a & tokens_b)) / len(tokens_a | tokens_b)
231
 
232
 
233
  def cosine_similarity(a: List[float], b: List[float]) -> float:
 
240
  return 0.0
241
  return dot / (norm_a * norm_b)
242
 
243
+
244
  @traceable(run_type="embedding", name="get_embedding")
245
  def get_embedding(text: str) -> Optional[List[float]]:
 
 
 
246
  try:
247
  resp = client.embeddings.create(
248
  model=EMBEDDING_MODEL,
 
250
  )
251
  return resp.data[0].embedding
252
  except Exception as e:
 
253
  print(f"[Embedding error] {repr(e)}")
254
  return None
255
 
 
261
  embedding_threshold: float = 0.85,
262
  max_turns_to_check: int = 6,
263
  ) -> Optional[Tuple[str, str, float]]:
 
 
 
 
 
264
  norm_msg = _normalize_text(message)
265
  if not norm_msg:
266
  return None
267
 
 
268
  best_sim_j = 0.0
269
  best_pair_j: Optional[Tuple[str, str]] = None
270
  checked = 0
 
289
  if best_pair_j and best_sim_j >= jaccard_threshold:
290
  return best_pair_j[0], best_pair_j[1], best_sim_j
291
 
 
292
  if not history:
293
  return None
294
 
 
326
  messages: List[Dict[str, str]],
327
  lang: str,
328
  op: str = "chat",
329
+ temperature: float = 0.4, # ✅ slightly lower for stability/speed
330
  ) -> str:
 
 
 
 
 
 
 
331
  preferred_model = model_name or DEFAULT_MODEL
332
  last_error: Optional[Exception] = None
333
 
334
  for attempt in range(2):
335
+ current_model = preferred_model if attempt == 0 else DEFAULT_MODEL
 
 
 
 
336
 
337
  try:
338
  resp = client.chat.completions.create(
339
  model=current_model,
340
  messages=messages,
341
  temperature=temperature,
342
+ timeout=20,
343
  )
344
  return resp.choices[0].message.content
345
  except Exception as e:
 
348
  f"failed with model={current_model}: {repr(e)}"
349
  )
350
  last_error = e
 
 
351
  if current_model == DEFAULT_MODEL or attempt == 1:
352
  break
353
 
 
354
  return build_error_message(last_error or Exception("unknown error"), lang, op)
355
 
356
 
357
+ # ---------- 构建 messages (optimized) ----------
358
  def build_messages(
359
  user_message: str,
360
  history: List[Tuple[str, str]],
 
364
  course_outline: Optional[List[str]],
365
  weaknesses: Optional[List[str]],
366
  cognitive_state: Optional[Dict[str, int]],
367
+ rag_context: Optional[str] = None,
368
  ) -> List[Dict[str, str]]:
369
+ messages: List[Dict[str, str]] = [{"role": "system", "content": CLARE_SYSTEM_PROMPT}]
370
+
371
+ # ✅ consolidate most system context into ONE message to reduce overhead
372
+ sys_parts: List[str] = []
373
 
374
+ # mode
375
  if learning_mode in LEARNING_MODE_INSTRUCTIONS:
376
+ sys_parts.append(f"Learning mode: {learning_mode}. {LEARNING_MODE_INSTRUCTIONS[learning_mode]}")
 
 
 
 
 
 
377
 
378
+ # syllabus/topics (limit)
379
  topics = course_outline if course_outline else DEFAULT_COURSE_TOPICS
380
+ topics = (topics or [])[:MAX_TOPICS]
381
+ if topics:
382
+ sys_parts.append("Course topics: " + " | ".join(topics))
 
 
 
 
 
 
 
 
383
 
384
+ # doc_type hint
385
  if doc_type and doc_type != "Syllabus":
386
+ sys_parts.append(f"Supporting doc uploaded: {doc_type}.")
 
 
 
 
 
 
 
 
 
387
 
388
+ # weaknesses (limit)
389
  if weaknesses:
390
+ ww = weaknesses[-MAX_WEAKNESSES:]
391
+ sys_parts.append("Student difficulties (recent): " + " | ".join(ww))
 
 
 
 
 
 
 
 
392
 
393
+ # cognitive state (short)
394
  if cognitive_state:
395
+ sys_parts.append("Cognitive state: " + describe_cognitive_state(cognitive_state))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
396
 
397
+ # session memory (short + limited)
 
 
 
 
 
 
 
 
 
 
398
  session_memory_text = build_session_memory_summary(
399
  history=history,
400
  weaknesses=weaknesses,
401
  cognitive_state=cognitive_state,
402
+ max_questions=MAX_SESSION_MEMORY_QS,
403
+ max_weaknesses=min(2, MAX_WEAKNESSES),
404
  )
405
+ if session_memory_text:
406
+ sys_parts.append("Session memory: " + session_memory_text)
407
+
408
+ # language preference
409
+ if language_preference == "English":
410
+ sys_parts.append("Answer in English.")
411
+ elif language_preference == "中文":
412
+ sys_parts.append("请用中文回答。")
 
 
 
413
 
414
+ if sys_parts:
415
+ messages.append({"role": "system", "content": "\n".join(sys_parts)})
416
+
417
+ # rag context (keep as separate system block, but already capped in rag_engine)
418
  if rag_context:
419
  messages.append(
420
  {
421
  "role": "system",
422
  "content": (
423
+ "Relevant excerpts (use as grounding; prefer these if conflict):\n\n"
 
 
 
424
  + rag_context
425
  ),
426
  }
427
  )
428
 
429
+ # ✅ limit history turns for speed
430
+ hist = history[-MAX_HISTORY_TURNS:] if history else []
431
+ for user, assistant in hist:
432
  messages.append({"role": "user", "content": user})
433
  if assistant is not None:
434
  messages.append({"role": "assistant", "content": assistant})
435
 
 
436
  messages.append({"role": "user", "content": user_message})
437
  return messages
438
 
439
+
440
  @traceable(run_type="chain", name="chat_with_clare")
441
  def chat_with_clare(
442
  message: str,
 
459
  except Exception as e:
460
  print(f"[LangSmith metadata error in chat_with_clare] {repr(e)}")
461
 
 
 
462
  messages = build_messages(
463
  user_message=message,
464
  history=history,
 
471
  rag_context=rag_context,
472
  )
473
 
 
474
  answer = safe_chat_completion(
475
  model_name=model_name,
476
  messages=messages,
477
  lang=language_preference,
478
  op="chat",
479
+ temperature=0.4,
480
  )
481
 
482
+ history = (history or []) + [(message, answer)]
483
  return answer, history
484
 
485
 
 
494
  lines: List[str] = []
495
  lines.append("# Clare – Conversation Export\n")
496
  lines.append(f"- Learning mode: **{learning_mode_val}**\n")
497
+ lines.append("- Course topics (short): " + "; ".join((course_outline or [])[:5]) + "\n")
498
  lines.append(f"- Cognitive state snapshot: {describe_cognitive_state(cognitive_state)}\n")
499
 
500
  if weaknesses:
501
  lines.append("- Observed student difficulties:\n")
502
+ for w in (weaknesses or [])[-5:]:
503
  lines.append(f" - {w}\n")
504
  lines.append("\n---\n\n")
505
 
506
+ for user, assistant in history or []:
507
  lines.append(f"**Student:** {user}\n\n")
508
  lines.append(f"**Clare:** {assistant}\n\n")
509
  lines.append("---\n\n")
 
511
  return "".join(lines)
512
 
513
 
514
+ # ---------- 生成 quiz ----------
 
 
515
  @traceable(run_type="chain", name="generate_quiz_from_history")
516
  def generate_quiz_from_history(
517
  history: List[Tuple[str, str]],
 
522
  language_preference: str,
523
  ) -> str:
524
  conversation_text = ""
525
+ for user, assistant in (history or [])[-6:]:
526
  conversation_text += f"Student: {user}\nClare: {assistant}\n"
527
 
528
+ topics_text = "; ".join((course_outline or [])[:8])
529
+ weakness_text = "; ".join((weaknesses or [])[-5:]) if weaknesses else "N/A"
530
  cog_text = describe_cognitive_state(cognitive_state)
531
 
532
  messages = [
 
534
  {
535
  "role": "system",
536
  "content": (
537
+ "Create a short concept quiz with 3 questions (mix MCQ + short answer). "
538
+ "Add 'Answer Key' at end. Adapt difficulty to student state."
 
 
 
 
539
  ),
540
  },
541
+ {"role": "system", "content": f"Course topics: {topics_text}"},
542
+ {"role": "system", "content": f"Student difficulties: {weakness_text}"},
543
+ {"role": "system", "content": f"Cognitive state: {cog_text}"},
 
 
 
 
 
 
 
 
 
544
  {
545
  "role": "user",
546
+ "content": "Recent conversation:\n\n" + conversation_text + "\n\nCreate the quiz now.",
 
 
 
 
547
  },
548
  ]
549
 
550
  if language_preference == "中文":
551
+ messages.append({"role": "system", "content": "请用中文给出问题和答案。"})
 
 
 
 
 
552
 
553
  quiz_text = safe_chat_completion(
554
  model_name=model_name,
 
560
  return quiz_text
561
 
562
 
563
+ # ---------- 总结 ----------
564
  @traceable(run_type="chain", name="summarize_conversation")
565
  def summarize_conversation(
566
  history: List[Tuple[str, str]],
 
571
  language_preference: str,
572
  ) -> str:
573
  conversation_text = ""
574
+ for user, assistant in (history or [])[-8:]:
575
  conversation_text += f"Student: {user}\nClare: {assistant}\n"
576
 
577
+ topics_text = "; ".join((course_outline or [])[:8])
578
+ weakness_text = "; ".join((weaknesses or [])[-5:]) if weaknesses else "N/A"
579
  cog_text = describe_cognitive_state(cognitive_state)
580
 
581
  messages = [
 
583
  {
584
  "role": "system",
585
  "content": (
586
+ "Produce a concept-only summary in bullet points: definitions, key ideas, "
587
+ "formulas, examples, takeaways. No personal chat."
 
 
 
 
588
  ),
589
  },
590
+ {"role": "system", "content": f"Course topics: {topics_text}"},
591
+ {"role": "system", "content": f"Student difficulties: {weakness_text}"},
592
+ {"role": "system", "content": f"Cognitive state: {cog_text}"},
 
 
 
 
 
 
 
 
 
593
  {
594
  "role": "user",
595
+ "content": "Recent conversation:\n\n" + conversation_text + "\n\nSummarize key concepts.",
 
 
 
 
596
  },
597
  ]
598
 
599
  if language_preference == "中文":
600
+ messages.append({"role": "system", "content": "请用中文给出要点总结,只保留知识点,使用条目符号。"})
 
 
 
 
 
601
 
602
  summary_text = safe_chat_completion(
603
  model_name=model_name,