AI_Agent_Final

Sleeping

App Files Files Community

SarahXia0405 commited on Dec 20, 2025

Commit

072270b

verified ·

1 Parent(s): 82b3136

Update api/clare_core.py

Browse files

Files changed (1) hide show

api/clare_core.py +116 -134

api/clare_core.py CHANGED Viewed

@@ -1,5 +1,4 @@
 # api/clare_core.py
-import os
 import re
 import math
 from typing import List, Dict, Tuple, Optional
@@ -19,14 +18,13 @@ from langsmith import traceable
 from langsmith.run_helpers import set_run_metadata
-# ----------------------------
-# Speed/Prompt controls
-# ----------------------------
-# ✅ limit how much history we send to the model (token reduction = speed up)
-MAX_HISTORY_TURNS = int(os.getenv("CLARE_MAX_HISTORY_TURNS", "6").strip())  # user+assistant pairs
-MAX_TOPICS = int(os.getenv("CLARE_MAX_TOPICS", "10").strip())
-MAX_WEAKNESSES = int(os.getenv("CLARE_MAX_WEAKNESSES", "3").strip())
-MAX_SESSION_MEMORY_QS = int(os.getenv("CLARE_MAX_SESSION_MEMORY_QS", "3").strip())
 # ---------- syllabus 解析 ----------
@@ -46,42 +44,20 @@ def parse_syllabus_docx(file_path: str, max_lines: int = 15) -> List[str]:
     return topics
-# ---------- 简单“弱项”检测 ----------
 WEAKNESS_KEYWORDS = [
-    "don't understand",
-    "do not understand",
-    "not understand",
-    "not sure",
-    "confused",
-    "hard to",
-    "difficult",
-    "struggle",
-    "不会",
-    "不懂",
-    "看不懂",
-    "搞不清",
-    "很难",
 ]
-# ---------- 简单“掌握”检测 ----------
 MASTERY_KEYWORDS = [
-    "got it",
-    "makes sense",
-    "now i see",
-    "i see",
-    "understand now",
-    "clear now",
-    "easy",
-    "no problem",
-    "没问题",
-    "懂了",
-    "明白了",
-    "清楚了",
 ]
 def update_weaknesses_from_message(message: str, weaknesses: List[str]) -> List[str]:
-    lower_msg = (message or "").lower()
     if any(k in lower_msg for k in WEAKNESS_KEYWORDS):
         weaknesses = weaknesses or []
         weaknesses.append(message)
@@ -95,7 +71,7 @@ def update_cognitive_state_from_message(
     if state is None:
         state = {"confusion": 0, "mastery": 0}
-    lower_msg = (message or "").lower()
     if any(k in lower_msg for k in WEAKNESS_KEYWORDS):
         state["confusion"] = state.get("confusion", 0) + 1
     if any(k in lower_msg for k in MASTERY_KEYWORDS):
@@ -116,13 +92,12 @@ def describe_cognitive_state(state: Optional[Dict[str, int]]) -> str:
         return "mixed or uncertain cognitive state."
-# ---------- Session Memory ----------
 def build_session_memory_summary(
     history: List[Tuple[str, str]],
     weaknesses: Optional[List[str]],
     cognitive_state: Optional[Dict[str, int]],
-    max_questions: int = 3,
-    max_weaknesses: int = 2,
 ) -> str:
     parts: List[str] = []
@@ -130,53 +105,44 @@ def build_session_memory_summary(
         recent_qs = [u for (u, _a) in history[-max_questions:]]
         trimmed_qs = []
         for q in recent_qs:
-            q = (q or "").strip()
             if len(q) > 120:
                 q = q[:117] + "..."
             trimmed_qs.append(q)
         if trimmed_qs:
-            parts.append("Recent questions: " + " | ".join(trimmed_qs))
     if weaknesses:
         recent_weak = weaknesses[-max_weaknesses:]
         trimmed_weak = []
         for w in recent_weak:
-            w = (w or "").strip()
             if len(w) > 120:
                 w = w[:117] + "..."
             trimmed_weak.append(w)
-        if trimmed_weak:
-            parts.append("Recent difficulties: " + " | ".join(trimmed_weak))
     if cognitive_state:
-        parts.append("Cognitive state: " + describe_cognitive_state(cognitive_state))
     if not parts:
-        return "No prior session memory."
     return " | ".join(parts)
-# ---------- 语言检测 ----------
 def detect_language(message: str, preference: str) -> str:
     if preference in ("English", "中文"):
         return preference
-    if re.search(r"[\u4e00-\u9fff]", message or ""):
         return "中文"
     return "English"
-def get_empty_input_prompt(lang: str) -> str:
-    if lang == "中文":
-        return "请先输入一个问题或想法，再按回车发送，我才能帮到你哦。"
-    return "Please type a question or some text before sending, then hit Enter."
-def build_error_message(
-    e: Exception,
-    lang: str,
-    op: str = "chat",
-) -> str:
     if lang == "中文":
         prefix = {
             "chat": "抱歉，刚刚在和模型对话时出现了一点问题。",
@@ -193,7 +159,6 @@ def build_error_message(
     return prefix_en + " Please try again in a moment or rephrase your request."
-# ---------- Session 状态展示 ----------
 def render_session_status(
     learning_mode: str,
     weaknesses: Optional[List[str]],
@@ -214,9 +179,11 @@ def render_session_status(
     return "\n".join(lines)
-# ---------- Similarity helpers (kept; not called by server currently) ----------
 def _normalize_text(text: str) -> str:
-    text = (text or "").lower().strip()
     text = re.sub(r"[^\w\s]", " ", text)
     text = re.sub(r"\s+", " ", text)
     return text
@@ -227,7 +194,7 @@ def _jaccard_similarity(a: str, b: str) -> float:
     tokens_b = set(b.split())
     if not tokens_a or not tokens_b:
         return 0.0
-    return len(a_set := (tokens_a & tokens_b)) / len(tokens_a | tokens_b)
 def cosine_similarity(a: List[float], b: List[float]) -> float:
@@ -326,7 +293,7 @@ def safe_chat_completion(
     messages: List[Dict[str, str]],
     lang: str,
     op: str = "chat",
-    temperature: float = 0.4,  # ✅ slightly lower for stability/speed
 ) -> str:
     preferred_model = model_name or DEFAULT_MODEL
     last_error: Optional[Exception] = None
@@ -354,7 +321,14 @@ def safe_chat_completion(
     return build_error_message(last_error or Exception("unknown error"), lang, op)
-# ---------- 构建 messages (optimized) ----------
 def build_messages(
     user_message: str,
     history: List[Tuple[str, str]],
@@ -366,72 +340,81 @@ def build_messages(
     cognitive_state: Optional[Dict[str, int]],
     rag_context: Optional[str] = None,
 ) -> List[Dict[str, str]]:
-    messages: List[Dict[str, str]] = [{"role": "system", "content": CLARE_SYSTEM_PROMPT}]
-    # ✅ consolidate most system context into ONE message to reduce overhead
-    sys_parts: List[str] = []
-    # mode
-    if learning_mode in LEARNING_MODE_INSTRUCTIONS:
-        sys_parts.append(f"Learning mode: {learning_mode}. {LEARNING_MODE_INSTRUCTIONS[learning_mode]}")
-    # syllabus/topics (limit)
     topics = course_outline if course_outline else DEFAULT_COURSE_TOPICS
     topics = (topics or [])[:MAX_TOPICS]
-    if topics:
-        sys_parts.append("Course topics: " + " | ".join(topics))
-    # doc_type hint
-    if doc_type and doc_type != "Syllabus":
-        sys_parts.append(f"Supporting doc uploaded: {doc_type}.")
-    # weaknesses (limit)
-    if weaknesses:
-        ww = weaknesses[-MAX_WEAKNESSES:]
-        sys_parts.append("Student difficulties (recent): " + " | ".join(ww))
-    # cognitive state (short)
-    if cognitive_state:
-        sys_parts.append("Cognitive state: " + describe_cognitive_state(cognitive_state))
-    # session memory (short + limited)
     session_memory_text = build_session_memory_summary(
-        history=history,
-        weaknesses=weaknesses,
         cognitive_state=cognitive_state,
-        max_questions=MAX_SESSION_MEMORY_QS,
-        max_weaknesses=min(2, MAX_WEAKNESSES),
     )
-    if session_memory_text:
-        sys_parts.append("Session memory: " + session_memory_text)
-    # language preference
     if language_preference == "English":
-        sys_parts.append("Answer in English.")
     elif language_preference == "中文":
-        sys_parts.append("请用中文回答。")
-    if sys_parts:
-        messages.append({"role": "system", "content": "\n".join(sys_parts)})
-    # rag context (keep as separate system block, but already capped in rag_engine)
-    if rag_context:
-        messages.append(
-            {
-                "role": "system",
-                "content": (
-                    "Relevant excerpts (use as grounding; prefer these if conflict):\n\n"
-                    + rag_context
-                ),
-            }
-        )
-    # ✅ limit history turns for speed
-    hist = history[-MAX_HISTORY_TURNS:] if history else []
-    for user, assistant in hist:
-        messages.append({"role": "user", "content": user})
-        if assistant is not None:
-            messages.append({"role": "assistant", "content": assistant})
     messages.append({"role": "user", "content": user_message})
     return messages
@@ -476,14 +459,13 @@ def chat_with_clare(
         messages=messages,
         lang=language_preference,
         op="chat",
-        temperature=0.4,
     )
-    history = (history or []) + [(message, answer)]
     return answer, history
-# ---------- 导出对话为 Markdown ----------
 def export_conversation(
     history: List[Tuple[str, str]],
     course_outline: List[str],
@@ -499,11 +481,11 @@ def export_conversation(
     if weaknesses:
         lines.append("- Observed student difficulties:\n")
-        for w in (weaknesses or [])[-5:]:
             lines.append(f"  - {w}\n")
     lines.append("\n---\n\n")
-    for user, assistant in history or []:
         lines.append(f"**Student:** {user}\n\n")
         lines.append(f"**Clare:** {assistant}\n\n")
         lines.append("---\n\n")
@@ -511,7 +493,6 @@ def export_conversation(
     return "".join(lines)
-# ---------- 生成 quiz ----------
 @traceable(run_type="chain", name="generate_quiz_from_history")
 def generate_quiz_from_history(
     history: List[Tuple[str, str]],
@@ -522,7 +503,7 @@ def generate_quiz_from_history(
     language_preference: str,
 ) -> str:
     conversation_text = ""
-    for user, assistant in (history or [])[-6:]:
         conversation_text += f"Student: {user}\nClare: {assistant}\n"
     topics_text = "; ".join((course_outline or [])[:8])
@@ -534,8 +515,8 @@ def generate_quiz_from_history(
         {
             "role": "system",
             "content": (
-                "Create a short concept quiz with 3 questions (mix MCQ + short answer). "
-                "Add 'Answer Key' at end. Adapt difficulty to student state."
             ),
         },
         {"role": "system", "content": f"Course topics: {topics_text}"},
@@ -550,6 +531,7 @@ def generate_quiz_from_history(
     if language_preference == "中文":
         messages.append({"role": "system", "content": "请用中文给出问题和答案。"})
     quiz_text = safe_chat_completion(
         model_name=model_name,
         messages=messages,
@@ -560,7 +542,6 @@ def generate_quiz_from_history(
     return quiz_text
-# ---------- 总结 ----------
 @traceable(run_type="chain", name="summarize_conversation")
 def summarize_conversation(
     history: List[Tuple[str, str]],
@@ -571,7 +552,7 @@ def summarize_conversation(
     language_preference: str,
 ) -> str:
     conversation_text = ""
-    for user, assistant in (history or [])[-8:]:
         conversation_text += f"Student: {user}\nClare: {assistant}\n"
     topics_text = "; ".join((course_outline or [])[:8])
@@ -583,8 +564,8 @@ def summarize_conversation(
         {
             "role": "system",
             "content": (
-                "Produce a concept-only summary in bullet points: definitions, key ideas, "
-                "formulas, examples, takeaways. No personal chat."
             ),
         },
         {"role": "system", "content": f"Course topics: {topics_text}"},
@@ -592,12 +573,13 @@ def summarize_conversation(
         {"role": "system", "content": f"Cognitive state: {cog_text}"},
         {
             "role": "user",
-            "content": "Recent conversation:\n\n" + conversation_text + "\n\nSummarize key concepts.",
         },
     ]
     if language_preference == "中文":
-        messages.append({"role": "system", "content": "请用中文给出要点总结，只保留知识点，使用条目符号。"})
     summary_text = safe_chat_completion(
         model_name=model_name,

 # api/clare_core.py
 import re
 import math
 from typing import List, Dict, Tuple, Optional
 from langsmith.run_helpers import set_run_metadata
+# -----------------------------
+# Speed controls (token budget)
+# -----------------------------
+MAX_HISTORY_TURNS = 6          # 只带最近 N 轮（每轮=1个user+1个assistant）
+MAX_TOPICS = 8                 # syllabus topics 只带前 N 条
+MAX_WEAKNESSES = 3             # 只带最后 N 条
+MAX_RAG_CHARS_IN_PROMPT = 1200 # rag_context 再截断一次（双保险）
 # ---------- syllabus 解析 ----------
     return topics
 WEAKNESS_KEYWORDS = [
+    "don't understand", "do not understand", "not understand", "not sure", "confused",
+    "hard to", "difficult", "struggle",
+    "不会", "不懂", "看不懂", "搞不清", "很难",
 ]
 MASTERY_KEYWORDS = [
+    "got it", "makes sense", "now i see", "i see", "understand now", "clear now", "easy", "no problem",
+    "没问题", "懂了", "明白了", "清楚了",
 ]
 def update_weaknesses_from_message(message: str, weaknesses: List[str]) -> List[str]:
+    lower_msg = message.lower()
     if any(k in lower_msg for k in WEAKNESS_KEYWORDS):
         weaknesses = weaknesses or []
         weaknesses.append(message)
     if state is None:
         state = {"confusion": 0, "mastery": 0}
+    lower_msg = message.lower()
     if any(k in lower_msg for k in WEAKNESS_KEYWORDS):
         state["confusion"] = state.get("confusion", 0) + 1
     if any(k in lower_msg for k in MASTERY_KEYWORDS):
         return "mixed or uncertain cognitive state."
 def build_session_memory_summary(
     history: List[Tuple[str, str]],
     weaknesses: Optional[List[str]],
     cognitive_state: Optional[Dict[str, int]],
+    max_questions: int = 4,
+    max_weaknesses: int = 3,
 ) -> str:
     parts: List[str] = []
         recent_qs = [u for (u, _a) in history[-max_questions:]]
         trimmed_qs = []
         for q in recent_qs:
+            q = q.strip()
             if len(q) > 120:
                 q = q[:117] + "..."
             trimmed_qs.append(q)
         if trimmed_qs:
+            parts.append("Recent student questions: " + " | ".join(trimmed_qs))
     if weaknesses:
         recent_weak = weaknesses[-max_weaknesses:]
         trimmed_weak = []
         for w in recent_weak:
+            w = w.strip()
             if len(w) > 120:
                 w = w[:117] + "..."
             trimmed_weak.append(w)
+        parts.append("Recent difficulties mentioned by the student: " + " | ".join(trimmed_weak))
     if cognitive_state:
+        parts.append("Current cognitive state: " + describe_cognitive_state(cognitive_state))
     if not parts:
+        return (
+            "No prior session memory. Treat this as early stage of the conversation; "
+            "start simple and ask a quick check-up question."
+        )
     return " | ".join(parts)
 def detect_language(message: str, preference: str) -> str:
     if preference in ("English", "中文"):
         return preference
+    if re.search(r"[\u4e00-\u9fff]", message):
         return "中文"
     return "English"
+def build_error_message(e: Exception, lang: str, op: str = "chat") -> str:
     if lang == "中文":
         prefix = {
             "chat": "抱歉，刚刚在和模型对话时出现了一点问题。",
     return prefix_en + " Please try again in a moment or rephrase your request."
 def render_session_status(
     learning_mode: str,
     weaknesses: Optional[List[str]],
     return "\n".join(lines)
+# -----------------------
+# Similarity helpers (kept)
+# -----------------------
 def _normalize_text(text: str) -> str:
+    text = text.lower().strip()
     text = re.sub(r"[^\w\s]", " ", text)
     text = re.sub(r"\s+", " ", text)
     return text
     tokens_b = set(b.split())
     if not tokens_a or not tokens_b:
         return 0.0
+    return len(tokens_a & tokens_b) / len(tokens_a | tokens_b)
 def cosine_similarity(a: List[float], b: List[float]) -> float:
     messages: List[Dict[str, str]],
     lang: str,
     op: str = "chat",
+    temperature: float = 0.5,
 ) -> str:
     preferred_model = model_name or DEFAULT_MODEL
     last_error: Optional[Exception] = None
     return build_error_message(last_error or Exception("unknown error"), lang, op)
+def _take_recent_history(history: List[Tuple[str, str]], max_turns: int) -> List[Tuple[str, str]]:
+    if not history:
+        return []
+    if max_turns <= 0:
+        return []
+    return history[-max_turns:]
 def build_messages(
     user_message: str,
     history: List[Tuple[str, str]],
     cognitive_state: Optional[Dict[str, int]],
     rag_context: Optional[str] = None,
 ) -> List[Dict[str, str]]:
+    """
+    SPEED: reduce tokens by:
+    - one consolidated system message
+    - limit history turns
+    - limit topics / weaknesses
+    - truncate rag_context
+    """
+    trimmed_history = _take_recent_history(history, MAX_HISTORY_TURNS)
     topics = course_outline if course_outline else DEFAULT_COURSE_TOPICS
     topics = (topics or [])[:MAX_TOPICS]
+    topics_text = " | ".join(topics)
+    weak_list = (weaknesses or [])[-MAX_WEAKNESSES:]
+    weak_text = " | ".join(weak_list) if weak_list else ""
     session_memory_text = build_session_memory_summary(
+        history=trimmed_history,
+        weaknesses=weak_list,
         cognitive_state=cognitive_state,
     )
+    mode_instruction = LEARNING_MODE_INSTRUCTIONS.get(learning_mode, "")
+    # RAG context double-safety truncate
+    rag_block = ""
+    if rag_context:
+        rag_context = rag_context[:MAX_RAG_CHARS_IN_PROMPT]
+        rag_block = (
+            "\n\nRelevant excerpts (use as primary grounding; prefer excerpts if conflict):\n"
+            + rag_context
+        )
+    # Language directive
+    lang_line = ""
     if language_preference == "English":
+        lang_line = "\nAnswer in English."
     elif language_preference == "中文":
+        lang_line = "\n请用中文回答。"
+    # Cognitive state directive (short)
+    cog_line = ""
+    if cognitive_state:
+        confusion = cognitive_state.get("confusion", 0)
+        mastery = cognitive_state.get("mastery", 0)
+        if confusion >= 2 and confusion >= mastery + 1:
+            cog_line = "\nStudent is under HIGH cognitive load: be concise, stepwise, concrete; check understanding."
+        elif mastery >= 2 and mastery >= confusion + 1:
+            cog_line = "\nStudent seems comfortable: you may go slightly deeper and connect concepts."
+        else:
+            cog_line = "\nStudent state is mixed: keep moderate pace and ask brief check questions."
+    # Doc type hint (short)
+    doc_line = ""
+    if doc_type and doc_type != "Syllabus":
+        doc_line = f"\nStudent uploaded supporting material: {doc_type}."
+    consolidated_system = (
+        CLARE_SYSTEM_PROMPT
+        + f"\n\nLearning mode: {learning_mode}. {mode_instruction}"
+        + f"\n\nCourse topics context: {topics_text}"
+        + (f"\nStudent difficulties (recent): {weak_text}" if weak_text else "\nStudent difficulties (recent): none")
+        + f"\nSession memory (this chat only): {session_memory_text}"
+        + doc_line
+        + cog_line
+        + lang_line
+        + rag_block
+    )
+    messages: List[Dict[str, str]] = [{"role": "system", "content": consolidated_system}]
+    for u, a in trimmed_history:
+        messages.append({"role": "user", "content": u})
+        if a is not None:
+            messages.append({"role": "assistant", "content": a})
     messages.append({"role": "user", "content": user_message})
     return messages
         messages=messages,
         lang=language_preference,
         op="chat",
+        temperature=0.5,
     )
+    history = history + [(message, answer)]
     return answer, history
 def export_conversation(
     history: List[Tuple[str, str]],
     course_outline: List[str],
     if weaknesses:
         lines.append("- Observed student difficulties:\n")
+        for w in weaknesses[-5:]:
             lines.append(f"  - {w}\n")
     lines.append("\n---\n\n")
+    for user, assistant in history:
         lines.append(f"**Student:** {user}\n\n")
         lines.append(f"**Clare:** {assistant}\n\n")
         lines.append("---\n\n")
     return "".join(lines)
 @traceable(run_type="chain", name="generate_quiz_from_history")
 def generate_quiz_from_history(
     history: List[Tuple[str, str]],
     language_preference: str,
 ) -> str:
     conversation_text = ""
+    for user, assistant in history[-8:]:
         conversation_text += f"Student: {user}\nClare: {assistant}\n"
     topics_text = "; ".join((course_outline or [])[:8])
         {
             "role": "system",
             "content": (
+                "Create a short concept quiz (3 questions). Mix MCQ and short-answer. "
+                "Then provide an Answer Key. Adjust difficulty to cognitive state."
             ),
         },
         {"role": "system", "content": f"Course topics: {topics_text}"},
     if language_preference == "中文":
         messages.append({"role": "system", "content": "请用中文给出问题和答案。"})
     quiz_text = safe_chat_completion(
         model_name=model_name,
         messages=messages,
     return quiz_text
 @traceable(run_type="chain", name="summarize_conversation")
 def summarize_conversation(
     history: List[Tuple[str, str]],
     language_preference: str,
 ) -> str:
     conversation_text = ""
+    for user, assistant in history[-10:]:
         conversation_text += f"Student: {user}\nClare: {assistant}\n"
     topics_text = "; ".join((course_outline or [])[:8])
         {
             "role": "system",
             "content": (
+                "Produce a concept-only summary in bullet points. "
+                "Include definitions, key ideas, examples, takeaways. No personal chatter."
             ),
         },
         {"role": "system", "content": f"Course topics: {topics_text}"},
         {"role": "system", "content": f"Cognitive state: {cog_text}"},
         {
             "role": "user",
+            "content": "Recent conversation:\n\n" + conversation_text + "\n\nSummarize key concepts only.",
         },
     ]
     if language_preference == "中文":
+        messages.append({"role": "system", "content": "请用中文要点总结，只保留知识点，使用条目符号。"})
     summary_text = safe_chat_completion(
         model_name=model_name,