AI_Agent_Final

Sleeping

App Files Files Community

SarahXia0405 commited on Dec 20, 2025

Commit

d4f2575

verified ·

1 Parent(s): 6e941de

Update api/clare_core.py

Browse files

Files changed (1) hide show

api/clare_core.py +98 -276

api/clare_core.py CHANGED Viewed

@@ -1,4 +1,5 @@
-# clare_core.py
 import re
 import math
 from typing import List, Dict, Tuple, Optional
@@ -13,17 +14,23 @@ from .config import (
     CLARE_SYSTEM_PROMPT,
     LEARNING_MODE_INSTRUCTIONS,
 )
 from langsmith import traceable
 from langsmith.run_helpers import set_run_metadata
 # ---------- syllabus 解析 ----------
 def parse_syllabus_docx(file_path: str, max_lines: int = 15) -> List[str]:
-    """
-    非常简单的 syllabus 解析：取前若干个非空段落当作主题行。
-    只是为了给 Clare 一些课程上下文，不追求超精确结构。
-    """
     topics: List[str] = []
     try:
         doc = Document(file_path)
@@ -36,7 +43,6 @@ def parse_syllabus_docx(file_path: str, max_lines: int = 15) -> List[str]:
                 break
     except Exception as e:
         topics = [f"[Error parsing syllabus: {e}]"]
     return topics
@@ -75,7 +81,7 @@ MASTERY_KEYWORDS = [
 def update_weaknesses_from_message(message: str, weaknesses: List[str]) -> List[str]:
-    lower_msg = message.lower()
     if any(k in lower_msg for k in WEAKNESS_KEYWORDS):
         weaknesses = weaknesses or []
         weaknesses.append(message)
@@ -86,15 +92,10 @@ def update_cognitive_state_from_message(
     message: str,
     state: Optional[Dict[str, int]],
 ) -> Dict[str, int]:
-    """
-    简单认知状态统计：
-    - 遇到困惑类关键词 → confusion +1
-    - 遇到掌握类关键词 → mastery +1
-    """
     if state is None:
         state = {"confusion": 0, "mastery": 0}
-    lower_msg = message.lower()
     if any(k in lower_msg for k in WEAKNESS_KEYWORDS):
         state["confusion"] = state.get("confusion", 0) + 1
     if any(k in lower_msg for k in MASTERY_KEYWORDS):
@@ -120,76 +121,54 @@ def build_session_memory_summary(
     history: List[Tuple[str, str]],
     weaknesses: Optional[List[str]],
     cognitive_state: Optional[Dict[str, int]],
-    max_questions: int = 4,
-    max_weaknesses: int = 3,
 ) -> str:
-    """
-    只在本次会话内使用的“记忆摘要”：
-    - 最近几条学生提问
-    - 最近几条学生觉得难的问题
-    - 当前的认知状态描述
-    """
     parts: List[str] = []
-    # 最近几条提问（只取 student）
     if history:
         recent_qs = [u for (u, _a) in history[-max_questions:]]
         trimmed_qs = []
         for q in recent_qs:
-            q = q.strip()
             if len(q) > 120:
                 q = q[:117] + "..."
             trimmed_qs.append(q)
         if trimmed_qs:
-            parts.append("Recent student questions: " + " | ".join(trimmed_qs))
-    # 最近几条“弱项”
     if weaknesses:
         recent_weak = weaknesses[-max_weaknesses:]
         trimmed_weak = []
         for w in recent_weak:
-            w = w.strip()
             if len(w) > 120:
                 w = w[:117] + "..."
             trimmed_weak.append(w)
-        parts.append("Recent difficulties mentioned by the student: " + " | ".join(trimmed_weak))
-    # 当前认知状态
     if cognitive_state:
-        parts.append("Current cognitive state: " + describe_cognitive_state(cognitive_state))
     if not parts:
-        return (
-            "No prior session memory. You can treat this as an early stage of the conversation; "
-            "start with simple explanations and ask a quick check-up question."
-        )
     return " | ".join(parts)
-# ---------- 语言检测（用于 Auto 模式） ----------
 def detect_language(message: str, preference: str) -> str:
-    """
-    preference:
-      - 'English' → 强制英文
-      - '中文' → 强制中文
-      - 'Auto' → 检测文本是否包含中文字符
-    """
     if preference in ("English", "中文"):
         return preference
-    # Auto 模式下简单检测是否含有中文字符
-    if re.search(r"[\u4e00-\u9fff]", message):
         return "中文"
     return "English"
 def get_empty_input_prompt(lang: str) -> str:
-    """
-    空输入时的友好提示，根据语言返回中/英文。
-    """
     if lang == "中文":
         return "请先输入一个问题或想法，再按回车发送，我才能帮到你哦。"
-    # 默认英文
     return "Please type a question or some text before sending, then hit Enter."
@@ -198,10 +177,6 @@ def build_error_message(
     lang: str,
     op: str = "chat",
 ) -> str:
-    """
-    针对不同操作类型（普通对话 / quiz / summary）和语言，生成友好的错误提示。
-    不把原始异常直接暴露给学生，只在后台打印。
-    """
     if lang == "中文":
         prefix = {
             "chat": "抱歉，刚刚在和模型对话时出现了一点问题。",
@@ -210,7 +185,6 @@ def build_error_message(
         }.get(op, "抱歉，刚刚出现了一点问题。")
         return prefix + " 请稍后再试一次，或者换个问法试试。"
-    # 默认英文
     prefix_en = {
         "chat": "Sorry, I ran into a problem while talking to the model.",
         "quiz": "Sorry, there was a problem while generating the quiz.",
@@ -240,12 +214,9 @@ def render_session_status(
     return "\n".join(lines)
-# ---------- Same Question Check helpers ----------
 def _normalize_text(text: str) -> str:
-    """
-    将文本转为小写、去除标点和多余空格，用于简单相似度计算。
-    """
-    text = text.lower().strip()
     text = re.sub(r"[^\w\s]", " ", text)
     text = re.sub(r"\s+", " ", text)
     return text
@@ -256,7 +227,7 @@ def _jaccard_similarity(a: str, b: str) -> float:
     tokens_b = set(b.split())
     if not tokens_a or not tokens_b:
         return 0.0
-    return len(tokens_a & tokens_b) / len(tokens_a | tokens_b)
 def cosine_similarity(a: List[float], b: List[float]) -> float:
@@ -269,11 +240,9 @@ def cosine_similarity(a: List[float], b: List[float]) -> float:
         return 0.0
     return dot / (norm_a * norm_b)
 @traceable(run_type="embedding", name="get_embedding")
 def get_embedding(text: str) -> Optional[List[float]]:
-    """
-    调用 OpenAI Embedding API，将文本编码为向量。
-    """
     try:
         resp = client.embeddings.create(
             model=EMBEDDING_MODEL,
@@ -281,7 +250,6 @@ def get_embedding(text: str) -> Optional[List[float]]:
         )
         return resp.data[0].embedding
     except Exception as e:
-        # 打到 Space 的 log，便于排查
         print(f"[Embedding error] {repr(e)}")
         return None
@@ -293,16 +261,10 @@ def find_similar_past_question(
     embedding_threshold: float = 0.85,
     max_turns_to_check: int = 6,
 ) -> Optional[Tuple[str, str, float]]:
-    """
-    在最近若干轮历史对话中查找与当前问题相似的既往问题。
-    两级检测：先 Jaccard，再 Embedding。
-    返回 (past_question, past_answer, similarity_score) 或 None
-    """
     norm_msg = _normalize_text(message)
     if not norm_msg:
         return None
-    # 1) Jaccard
     best_sim_j = 0.0
     best_pair_j: Optional[Tuple[str, str]] = None
     checked = 0
@@ -327,7 +289,6 @@ def find_similar_past_question(
     if best_pair_j and best_sim_j >= jaccard_threshold:
         return best_pair_j[0], best_pair_j[1], best_sim_j
-    # 2) Embedding 语义相似度
     if not history:
         return None
@@ -365,31 +326,20 @@ def safe_chat_completion(
     messages: List[Dict[str, str]],
     lang: str,
     op: str = "chat",
-    temperature: float = 0.5,
 ) -> str:
-    """
-    统一安全调用 OpenAI Chat Completion：
-    - 最多尝试 2 次
-    - 每次请求 timeout = 20 秒
-    - 第一次用学生选择的模型；出错后，如果不是 DEFAULT_MODEL，则自动回退到 DEFAULT_MODEL 再试一次
-    - 所有异常都会打印到后台 log，但对学生只返回友好的中/英文错误文案
-    """
     preferred_model = model_name or DEFAULT_MODEL
     last_error: Optional[Exception] = None
     for attempt in range(2):
-        # 第一次用学生指定模型，第二次（如果需要）切到默认模型
-        if attempt == 0:
-            current_model = preferred_model
-        else:
-            current_model = DEFAULT_MODEL
         try:
             resp = client.chat.completions.create(
                 model=current_model,
                 messages=messages,
                 temperature=temperature,
-                timeout=20,  # 20 秒超时
             )
             return resp.choices[0].message.content
         except Exception as e:
@@ -398,16 +348,13 @@ def safe_chat_completion(
                 f"failed with model={current_model}: {repr(e)}"
             )
             last_error = e
-            # 如果已经用的是默认模型，或者已经是第二次尝试，就跳出循环
             if current_model == DEFAULT_MODEL or attempt == 1:
                 break
-    # 两次都失败，返回友好的错误文案
     return build_error_message(last_error or Exception("unknown error"), lang, op)
-# ---------- 构建 messages ----------
 def build_messages(
     user_message: str,
     history: List[Tuple[str, str]],
@@ -417,155 +364,79 @@ def build_messages(
     course_outline: Optional[List[str]],
     weaknesses: Optional[List[str]],
     cognitive_state: Optional[Dict[str, int]],
-    rag_context: Optional[str] = None,   # 新增：RAG 检索结果
 ) -> List[Dict[str, str]]:
-    messages: List[Dict[str, str]] = [
-        {"role": "system", "content": CLARE_SYSTEM_PROMPT}
-    ]
-    # 学习模式
     if learning_mode in LEARNING_MODE_INSTRUCTIONS:
-        mode_instruction = LEARNING_MODE_INSTRUCTIONS[learning_mode]
-        messages.append(
-            {
-                "role": "system",
-                "content": f"Current learning mode: {learning_mode}. {mode_instruction}",
-            }
-        )
-    # 课程大纲
     topics = course_outline if course_outline else DEFAULT_COURSE_TOPICS
-    topics_text = " | ".join(topics)
-    messages.append(
-        {
-            "role": "system",
-            "content": (
-                "Here is the course syllabus context. Use this to stay aligned "
-                "with the course topics when answering: "
-                + topics_text
-            ),
-        }
-    )
-    # 上传文件类型提示
     if doc_type and doc_type != "Syllabus":
-        messages.append(
-            {
-                "role": "system",
-                "content": (
-                    f"The student also uploaded a {doc_type} document as supporting material. "
-                    "You do not see the full content directly, but you may assume it is relevant "
-                    "to the same course and topics."
-                ),
-            }
-        )
-    # 学生弱项提示
     if weaknesses:
-        weak_text = " | ".join(weaknesses[-5:])
-        messages.append(
-            {
-                "role": "system",
-                "content": (
-                    "The student seems to struggle with the following questions or topics. "
-                    "Be extra gentle and clear when these appear: " + weak_text
-                ),
-            }
-        )
-    # 认知状态提示
     if cognitive_state:
-        confusion = cognitive_state.get("confusion", 0)
-        mastery = cognitive_state.get("mastery", 0)
-        if confusion >= 2 and confusion >= mastery + 1:
-            messages.append(
-                {
-                    "role": "system",
-                    "content": (
-                        "The student is currently under HIGH cognitive load. "
-                        "Use simpler language, shorter steps, and more concrete examples. "
-                        "Avoid long derivations in a single answer, and check understanding "
-                        "frequently."
-                    ),
-                }
-            )
-        elif mastery >= 2 and mastery >= confusion + 1:
-            messages.append(
-                {
-                    "role": "system",
-                    "content": (
-                        "The student seems comfortable with the material. "
-                        "You may increase difficulty slightly, introduce deeper follow-up "
-                        "questions, and connect concepts across topics."
-                    ),
-                }
-            )
-        else:
-            messages.append(
-                {
-                    "role": "system",
-                    "content": (
-                        "The student's cognitive state is mixed or uncertain. "
-                        "Keep explanations clear and moderately paced, and probe for "
-                        "understanding with short questions."
-                    ),
-                }
-            )
-    # 语言偏好控制
-    if language_preference == "English":
-        messages.append(
-            {"role": "system", "content": "Please answer in English."}
-        )
-    elif language_preference == "中文":
-        messages.append(
-            {"role": "system", "content": "请用中文回答学生的问题。"}
-        )
-    # Session 内记忆摘要
     session_memory_text = build_session_memory_summary(
         history=history,
         weaknesses=weaknesses,
         cognitive_state=cognitive_state,
     )
-    messages.append(
-        {
-            "role": "system",
-            "content": (
-                "Here is a short summary of this session's memory (only within the current chat; "
-                "it is not persisted across sessions). Use it to stay consistent with the "
-                "student's previous questions, difficulties, and cognitive state: "
-                + session_memory_text
-            ),
-        }
-    )
-    # RAG 检索结果
     if rag_context:
         messages.append(
             {
                 "role": "system",
                 "content": (
-                    "Here are some relevant excerpts from the course materials. "
-                    "Use them as the primary factual grounding when answering the student's question. "
-                    "If there is any conflict between these excerpts and your prior knowledge, "
-                    "prefer the excerpts.\n\n"
                     + rag_context
                 ),
             }
         )
-    # 历史对话
-    for user, assistant in history:
         messages.append({"role": "user", "content": user})
         if assistant is not None:
             messages.append({"role": "assistant", "content": assistant})
-    # 当前输入
     messages.append({"role": "user", "content": user_message})
     return messages
-# 装饰器
 @traceable(run_type="chain", name="chat_with_clare")
 def chat_with_clare(
     message: str,
@@ -588,8 +459,6 @@ def chat_with_clare(
     except Exception as e:
         print(f"[LangSmith metadata error in chat_with_clare] {repr(e)}")
-    # 构建 messages
     messages = build_messages(
         user_message=message,
         history=history,
@@ -602,16 +471,15 @@ def chat_with_clare(
         rag_context=rag_context,
     )
-    # 统一安全调用
     answer = safe_chat_completion(
         model_name=model_name,
         messages=messages,
         lang=language_preference,
         op="chat",
-        temperature=0.5,
     )
-    history = history + [(message, answer)]
     return answer, history
@@ -626,16 +494,16 @@ def export_conversation(
     lines: List[str] = []
     lines.append("# Clare – Conversation Export\n")
     lines.append(f"- Learning mode: **{learning_mode_val}**\n")
-    lines.append("- Course topics (short): " + "; ".join(course_outline[:5]) + "\n")
     lines.append(f"- Cognitive state snapshot: {describe_cognitive_state(cognitive_state)}\n")
     if weaknesses:
         lines.append("- Observed student difficulties:\n")
-        for w in weaknesses[-5:]:
             lines.append(f"  - {w}\n")
     lines.append("\n---\n\n")
-    for user, assistant in history:
         lines.append(f"**Student:** {user}\n\n")
         lines.append(f"**Clare:** {assistant}\n\n")
         lines.append("---\n\n")
@@ -643,9 +511,7 @@ def export_conversation(
     return "".join(lines)
-# ---------- 生成 3 个 quiz 题目 ----------
-from langsmith import traceable
 @traceable(run_type="chain", name="generate_quiz_from_history")
 def generate_quiz_from_history(
     history: List[Tuple[str, str]],
@@ -656,11 +522,11 @@ def generate_quiz_from_history(
     language_preference: str,
 ) -> str:
     conversation_text = ""
-    for user, assistant in history[-8:]:
         conversation_text += f"Student: {user}\nClare: {assistant}\n"
-    topics_text = "; ".join(course_outline[:8])
-    weakness_text = "; ".join(weaknesses[-5:]) if weaknesses else "N/A"
     cog_text = describe_cognitive_state(cognitive_state)
     messages = [
@@ -668,43 +534,21 @@ def generate_quiz_from_history(
         {
             "role": "system",
             "content": (
-                "Now your task is to create a **short concept quiz** for the student. "
-                "Based on the conversation and course topics, generate **3 questions** "
-                "(a mix of multiple-choice and short-answer is fine). After listing the "
-                "questions, provide an answer key at the end under a heading 'Answer Key'. "
-                "Number the questions Q1, Q2, Q3. Adjust the difficulty according to the "
-                "student's cognitive state."
             ),
         },
-        {
-            "role": "system",
-            "content": f"Course topics: {topics_text}",
-        },
-        {
-            "role": "system",
-            "content": f"Student known difficulties: {weakness_text}",
-        },
-        {
-            "role": "system",
-            "content": f"Student cognitive state: {cog_text}",
-        },
         {
             "role": "user",
-            "content": (
-                "Here is the recent conversation between you and the student:\n\n"
-                + conversation_text
-                + "\n\nPlease create the quiz now."
-            ),
         },
     ]
     if language_preference == "中文":
-        messages.append(
-            {
-                "role": "system",
-                "content": "请用中文给出问题和答案。",
-            }
-        )
     quiz_text = safe_chat_completion(
         model_name=model_name,
@@ -716,7 +560,7 @@ def generate_quiz_from_history(
     return quiz_text
-# ---------- 概念总结（知识点摘要） ----------
 @traceable(run_type="chain", name="summarize_conversation")
 def summarize_conversation(
     history: List[Tuple[str, str]],
@@ -727,11 +571,11 @@ def summarize_conversation(
     language_preference: str,
 ) -> str:
     conversation_text = ""
-    for user, assistant in history[-10:]:
         conversation_text += f"Student: {user}\nClare: {assistant}\n"
-    topics_text = "; ".join(course_outline[:8])
-    weakness_text = "; ".join(weaknesses[-5:]) if weaknesses else "N/A"
     cog_text = describe_cognitive_state(cognitive_state)
     messages = [
@@ -739,43 +583,21 @@ def summarize_conversation(
         {
             "role": "system",
             "content": (
-                "Your task now is to produce a **concept-only summary** of this tutoring "
-                "session. Only include knowledge points, definitions, key formulas, "
-                "examples, and main takeaways. Do **not** include any personal remarks, "
-                "jokes, or off-topic chat. Write in clear bullet points. This summary "
-                "should be suitable for the student to paste into their study notes. "
-                "Take into account what the student struggled with and their cognitive state."
             ),
         },
-        {
-            "role": "system",
-            "content": f"Course topics context: {topics_text}",
-        },
-        {
-            "role": "system",
-            "content": f"Student known difficulties: {weakness_text}",
-        },
-        {
-            "role": "system",
-            "content": f"Student cognitive state: {cog_text}",
-        },
         {
             "role": "user",
-            "content": (
-                "Here is the recent conversation between you and the student:\n\n"
-                + conversation_text
-                + "\n\nPlease summarize only the concepts and key ideas learned."
-            ),
         },
     ]
     if language_preference == "中文":
-        messages.append(
-            {
-                "role": "system",
-                "content": "请用中文给出要点总结，只保留知识点和结论，使用条目符号。"
-            }
-        )
     summary_text = safe_chat_completion(
         model_name=model_name,

+# api/clare_core.py
+import os
 import re
 import math
 from typing import List, Dict, Tuple, Optional
     CLARE_SYSTEM_PROMPT,
     LEARNING_MODE_INSTRUCTIONS,
 )
 from langsmith import traceable
 from langsmith.run_helpers import set_run_metadata
+# ----------------------------
+# Speed/Prompt controls
+# ----------------------------
+# ✅ limit how much history we send to the model (token reduction = speed up)
+MAX_HISTORY_TURNS = int(os.getenv("CLARE_MAX_HISTORY_TURNS", "6").strip())  # user+assistant pairs
+MAX_TOPICS = int(os.getenv("CLARE_MAX_TOPICS", "10").strip())
+MAX_WEAKNESSES = int(os.getenv("CLARE_MAX_WEAKNESSES", "3").strip())
+MAX_SESSION_MEMORY_QS = int(os.getenv("CLARE_MAX_SESSION_MEMORY_QS", "3").strip())
 # ---------- syllabus 解析 ----------
 def parse_syllabus_docx(file_path: str, max_lines: int = 15) -> List[str]:
     topics: List[str] = []
     try:
         doc = Document(file_path)
                 break
     except Exception as e:
         topics = [f"[Error parsing syllabus: {e}]"]
     return topics
 def update_weaknesses_from_message(message: str, weaknesses: List[str]) -> List[str]:
+    lower_msg = (message or "").lower()
     if any(k in lower_msg for k in WEAKNESS_KEYWORDS):
         weaknesses = weaknesses or []
         weaknesses.append(message)
     message: str,
     state: Optional[Dict[str, int]],
 ) -> Dict[str, int]:
     if state is None:
         state = {"confusion": 0, "mastery": 0}
+    lower_msg = (message or "").lower()
     if any(k in lower_msg for k in WEAKNESS_KEYWORDS):
         state["confusion"] = state.get("confusion", 0) + 1
     if any(k in lower_msg for k in MASTERY_KEYWORDS):
     history: List[Tuple[str, str]],
     weaknesses: Optional[List[str]],
     cognitive_state: Optional[Dict[str, int]],
+    max_questions: int = 3,
+    max_weaknesses: int = 2,
 ) -> str:
     parts: List[str] = []
     if history:
         recent_qs = [u for (u, _a) in history[-max_questions:]]
         trimmed_qs = []
         for q in recent_qs:
+            q = (q or "").strip()
             if len(q) > 120:
                 q = q[:117] + "..."
             trimmed_qs.append(q)
         if trimmed_qs:
+            parts.append("Recent questions: " + " | ".join(trimmed_qs))
     if weaknesses:
         recent_weak = weaknesses[-max_weaknesses:]
         trimmed_weak = []
         for w in recent_weak:
+            w = (w or "").strip()
             if len(w) > 120:
                 w = w[:117] + "..."
             trimmed_weak.append(w)
+        if trimmed_weak:
+            parts.append("Recent difficulties: " + " | ".join(trimmed_weak))
     if cognitive_state:
+        parts.append("Cognitive state: " + describe_cognitive_state(cognitive_state))
     if not parts:
+        return "No prior session memory."
     return " | ".join(parts)
+# ---------- 语言检测 ----------
 def detect_language(message: str, preference: str) -> str:
     if preference in ("English", "中文"):
         return preference
+    if re.search(r"[\u4e00-\u9fff]", message or ""):
         return "中文"
     return "English"
 def get_empty_input_prompt(lang: str) -> str:
     if lang == "中文":
         return "请先输入一个问题或想法，再按回车发送，我才能帮到你哦。"
     return "Please type a question or some text before sending, then hit Enter."
     lang: str,
     op: str = "chat",
 ) -> str:
     if lang == "中文":
         prefix = {
             "chat": "抱歉，刚刚在和模型对话时出现了一点问题。",
         }.get(op, "抱歉，刚刚出现了一点问题。")
         return prefix + " 请稍后再试一次，或者换个问法试试。"
     prefix_en = {
         "chat": "Sorry, I ran into a problem while talking to the model.",
         "quiz": "Sorry, there was a problem while generating the quiz.",
     return "\n".join(lines)
+# ---------- Similarity helpers (kept; not called by server currently) ----------
 def _normalize_text(text: str) -> str:
+    text = (text or "").lower().strip()
     text = re.sub(r"[^\w\s]", " ", text)
     text = re.sub(r"\s+", " ", text)
     return text
     tokens_b = set(b.split())
     if not tokens_a or not tokens_b:
         return 0.0
+    return len(a_set := (tokens_a & tokens_b)) / len(tokens_a | tokens_b)
 def cosine_similarity(a: List[float], b: List[float]) -> float:
         return 0.0
     return dot / (norm_a * norm_b)
 @traceable(run_type="embedding", name="get_embedding")
 def get_embedding(text: str) -> Optional[List[float]]:
     try:
         resp = client.embeddings.create(
             model=EMBEDDING_MODEL,
         )
         return resp.data[0].embedding
     except Exception as e:
         print(f"[Embedding error] {repr(e)}")
         return None
     embedding_threshold: float = 0.85,
     max_turns_to_check: int = 6,
 ) -> Optional[Tuple[str, str, float]]:
     norm_msg = _normalize_text(message)
     if not norm_msg:
         return None
     best_sim_j = 0.0
     best_pair_j: Optional[Tuple[str, str]] = None
     checked = 0
     if best_pair_j and best_sim_j >= jaccard_threshold:
         return best_pair_j[0], best_pair_j[1], best_sim_j
     if not history:
         return None
     messages: List[Dict[str, str]],
     lang: str,
     op: str = "chat",
+    temperature: float = 0.4,  # ✅ slightly lower for stability/speed
 ) -> str:
     preferred_model = model_name or DEFAULT_MODEL
     last_error: Optional[Exception] = None
     for attempt in range(2):
+        current_model = preferred_model if attempt == 0 else DEFAULT_MODEL
         try:
             resp = client.chat.completions.create(
                 model=current_model,
                 messages=messages,
                 temperature=temperature,
+                timeout=20,
             )
             return resp.choices[0].message.content
         except Exception as e:
                 f"failed with model={current_model}: {repr(e)}"
             )
             last_error = e
             if current_model == DEFAULT_MODEL or attempt == 1:
                 break
     return build_error_message(last_error or Exception("unknown error"), lang, op)
+# ---------- 构建 messages (optimized) ----------
 def build_messages(
     user_message: str,
     history: List[Tuple[str, str]],
     course_outline: Optional[List[str]],
     weaknesses: Optional[List[str]],
     cognitive_state: Optional[Dict[str, int]],
+    rag_context: Optional[str] = None,
 ) -> List[Dict[str, str]]:
+    messages: List[Dict[str, str]] = [{"role": "system", "content": CLARE_SYSTEM_PROMPT}]
+    # ✅ consolidate most system context into ONE message to reduce overhead
+    sys_parts: List[str] = []
+    # mode
     if learning_mode in LEARNING_MODE_INSTRUCTIONS:
+        sys_parts.append(f"Learning mode: {learning_mode}. {LEARNING_MODE_INSTRUCTIONS[learning_mode]}")
+    # syllabus/topics (limit)
     topics = course_outline if course_outline else DEFAULT_COURSE_TOPICS
+    topics = (topics or [])[:MAX_TOPICS]
+    if topics:
+        sys_parts.append("Course topics: " + " | ".join(topics))
+    # doc_type hint
     if doc_type and doc_type != "Syllabus":
+        sys_parts.append(f"Supporting doc uploaded: {doc_type}.")
+    # weaknesses (limit)
     if weaknesses:
+        ww = weaknesses[-MAX_WEAKNESSES:]
+        sys_parts.append("Student difficulties (recent): " + " | ".join(ww))
+    # cognitive state (short)
     if cognitive_state:
+        sys_parts.append("Cognitive state: " + describe_cognitive_state(cognitive_state))
+    # session memory (short + limited)
     session_memory_text = build_session_memory_summary(
         history=history,
         weaknesses=weaknesses,
         cognitive_state=cognitive_state,
+        max_questions=MAX_SESSION_MEMORY_QS,
+        max_weaknesses=min(2, MAX_WEAKNESSES),
     )
+    if session_memory_text:
+        sys_parts.append("Session memory: " + session_memory_text)
+    # language preference
+    if language_preference == "English":
+        sys_parts.append("Answer in English.")
+    elif language_preference == "中文":
+        sys_parts.append("请用中文回答。")
+    if sys_parts:
+        messages.append({"role": "system", "content": "\n".join(sys_parts)})
+    # rag context (keep as separate system block, but already capped in rag_engine)
     if rag_context:
         messages.append(
             {
                 "role": "system",
                 "content": (
+                    "Relevant excerpts (use as grounding; prefer these if conflict):\n\n"
                     + rag_context
                 ),
             }
         )
+    # ✅ limit history turns for speed
+    hist = history[-MAX_HISTORY_TURNS:] if history else []
+    for user, assistant in hist:
         messages.append({"role": "user", "content": user})
         if assistant is not None:
             messages.append({"role": "assistant", "content": assistant})
     messages.append({"role": "user", "content": user_message})
     return messages
 @traceable(run_type="chain", name="chat_with_clare")
 def chat_with_clare(
     message: str,
     except Exception as e:
         print(f"[LangSmith metadata error in chat_with_clare] {repr(e)}")
     messages = build_messages(
         user_message=message,
         history=history,
         rag_context=rag_context,
     )
     answer = safe_chat_completion(
         model_name=model_name,
         messages=messages,
         lang=language_preference,
         op="chat",
+        temperature=0.4,
     )
+    history = (history or []) + [(message, answer)]
     return answer, history
     lines: List[str] = []
     lines.append("# Clare – Conversation Export\n")
     lines.append(f"- Learning mode: **{learning_mode_val}**\n")
+    lines.append("- Course topics (short): " + "; ".join((course_outline or [])[:5]) + "\n")
     lines.append(f"- Cognitive state snapshot: {describe_cognitive_state(cognitive_state)}\n")
     if weaknesses:
         lines.append("- Observed student difficulties:\n")
+        for w in (weaknesses or [])[-5:]:
             lines.append(f"  - {w}\n")
     lines.append("\n---\n\n")
+    for user, assistant in history or []:
         lines.append(f"**Student:** {user}\n\n")
         lines.append(f"**Clare:** {assistant}\n\n")
         lines.append("---\n\n")
     return "".join(lines)
+# ---------- 生成 quiz ----------
 @traceable(run_type="chain", name="generate_quiz_from_history")
 def generate_quiz_from_history(
     history: List[Tuple[str, str]],
     language_preference: str,
 ) -> str:
     conversation_text = ""
+    for user, assistant in (history or [])[-6:]:
         conversation_text += f"Student: {user}\nClare: {assistant}\n"
+    topics_text = "; ".join((course_outline or [])[:8])
+    weakness_text = "; ".join((weaknesses or [])[-5:]) if weaknesses else "N/A"
     cog_text = describe_cognitive_state(cognitive_state)
     messages = [
         {
             "role": "system",
             "content": (
+                "Create a short concept quiz with 3 questions (mix MCQ + short answer). "
+                "Add 'Answer Key' at end. Adapt difficulty to student state."
             ),
         },
+        {"role": "system", "content": f"Course topics: {topics_text}"},
+        {"role": "system", "content": f"Student difficulties: {weakness_text}"},
+        {"role": "system", "content": f"Cognitive state: {cog_text}"},
         {
             "role": "user",
+            "content": "Recent conversation:\n\n" + conversation_text + "\n\nCreate the quiz now.",
         },
     ]
     if language_preference == "中文":
+        messages.append({"role": "system", "content": "请用中文给出问题和答案。"})
     quiz_text = safe_chat_completion(
         model_name=model_name,
     return quiz_text
+# ---------- 总结 ----------
 @traceable(run_type="chain", name="summarize_conversation")
 def summarize_conversation(
     history: List[Tuple[str, str]],
     language_preference: str,
 ) -> str:
     conversation_text = ""
+    for user, assistant in (history or [])[-8:]:
         conversation_text += f"Student: {user}\nClare: {assistant}\n"
+    topics_text = "; ".join((course_outline or [])[:8])
+    weakness_text = "; ".join((weaknesses or [])[-5:]) if weaknesses else "N/A"
     cog_text = describe_cognitive_state(cognitive_state)
     messages = [
         {
             "role": "system",
             "content": (
+                "Produce a concept-only summary in bullet points: definitions, key ideas, "
+                "formulas, examples, takeaways. No personal chat."
             ),
         },
+        {"role": "system", "content": f"Course topics: {topics_text}"},
+        {"role": "system", "content": f"Student difficulties: {weakness_text}"},
+        {"role": "system", "content": f"Cognitive state: {cog_text}"},
         {
             "role": "user",
+            "content": "Recent conversation:\n\n" + conversation_text + "\n\nSummarize key concepts.",
         },
     ]
     if language_preference == "中文":
+        messages.append({"role": "system", "content": "请用中文给出要点总结，只保留知识点，使用条目符号。"})
     summary_text = safe_chat_completion(
         model_name=model_name,