test_AI_Agent

Sleeping

App Files Files Community

SarahXia0405 commited on Dec 2, 2025

Commit

74f25bb

verified ·

1 Parent(s): b3aa8c6

Create clare_core.py

Browse files

Files changed (1) hide show

clare_core.py +670 -0

clare_core.py ADDED Viewed

	@@ -0,0 +1,670 @@

+# clare_core.py
+import re
+import math
+from typing import List, Dict, Tuple, Optional
+from docx import Document
+from config import (
+    client,
+    DEFAULT_MODEL,
+    EMBEDDING_MODEL,
+    DEFAULT_COURSE_TOPICS,
+    CLARE_SYSTEM_PROMPT,
+    LEARNING_MODE_INSTRUCTIONS,
+)
+# ---------- syllabus 解析 ----------
+def parse_syllabus_docx(file_path: str, max_lines: int = 15) -> List[str]:
+    """
+    非常简单的 syllabus 解析：取前若干个非空段落当作主题行。
+    只是为了给 Clare 一些课程上下文，不追求超精确结构。
+    """
+    topics: List[str] = []
+    try:
+        doc = Document(file_path)
+        for para in doc.paragraphs:
+            text = para.text.strip()
+            if not text:
+                continue
+            topics.append(text)
+            if len(topics) >= max_lines:
+                break
+    except Exception as e:
+        topics = [f"[Error parsing syllabus: {e}]"]
+    return topics
+# ---------- 简单“弱项”检测 ----------
+WEAKNESS_KEYWORDS = [
+    "don't understand",
+    "do not understand",
+    "not understand",
+    "not sure",
+    "confused",
+    "hard to",
+    "difficult",
+    "struggle",
+    "不会",
+    "不懂",
+    "看不懂",
+    "搞不清",
+    "很难",
+]
+# ---------- 简单“掌握”检测 ----------
+MASTERY_KEYWORDS = [
+    "got it",
+    "makes sense",
+    "now i see",
+    "i see",
+    "understand now",
+    "clear now",
+    "easy",
+    "no problem",
+    "没问题",
+    "懂了",
+    "明白了",
+    "清楚了",
+]
+def update_weaknesses_from_message(message: str, weaknesses: List[str]) -> List[str]:
+    lower_msg = message.lower()
+    if any(k in lower_msg for k in WEAKNESS_KEYWORDS):
+        weaknesses = weaknesses or []
+        weaknesses.append(message)
+    return weaknesses
+def update_cognitive_state_from_message(
+    message: str,
+    state: Optional[Dict[str, int]],
+) -> Dict[str, int]:
+    """
+    简单认知状态统计：
+    - 遇到困惑类关键词 → confusion +1
+    - 遇到掌握类关键词 → mastery +1
+    """
+    if state is None:
+        state = {"confusion": 0, "mastery": 0}
+    lower_msg = message.lower()
+    if any(k in lower_msg for k in WEAKNESS_KEYWORDS):
+        state["confusion"] = state.get("confusion", 0) + 1
+    if any(k in lower_msg for k in MASTERY_KEYWORDS):
+        state["mastery"] = state.get("mastery", 0) + 1
+    return state
+def describe_cognitive_state(state: Optional[Dict[str, int]]) -> str:
+    if not state:
+        return "unknown"
+    confusion = state.get("confusion", 0)
+    mastery = state.get("mastery", 0)
+    if confusion >= 2 and confusion >= mastery + 1:
+        return "student shows signs of HIGH cognitive load (often confused)."
+    elif mastery >= 2 and mastery >= confusion + 1:
+        return "student seems COMFORTABLE; material may be slightly easy."
+    else:
+        return "mixed or uncertain cognitive state."
+# ---------- Session Memory ----------
+def build_session_memory_summary(
+    history: List[Tuple[str, str]],
+    weaknesses: Optional[List[str]],
+    cognitive_state: Optional[Dict[str, int]],
+    max_questions: int = 4,
+    max_weaknesses: int = 3,
+) -> str:
+    """
+    只在本次会话内使用的“记忆摘要”：
+    - 最近几条学生提问
+    - 最近几条学生觉得难的问题
+    - 当前的认知状态描述
+    """
+    parts: List[str] = []
+    # 最近几条提问（只取 student）
+    if history:
+        recent_qs = [u for (u, _a) in history[-max_questions:]]
+        trimmed_qs = []
+        for q in recent_qs:
+            q = q.strip()
+            if len(q) > 120:
+                q = q[:117] + "..."
+            trimmed_qs.append(q)
+        if trimmed_qs:
+            parts.append("Recent student questions: " + " | ".join(trimmed_qs))
+    # 最近几条“弱项”
+    if weaknesses:
+        recent_weak = weaknesses[-max_weaknesses:]
+        trimmed_weak = []
+        for w in recent_weak:
+            w = w.strip()
+            if len(w) > 120:
+                w = w[:117] + "..."
+            trimmed_weak.append(w)
+        parts.append("Recent difficulties mentioned by the student: " + " | ".join(trimmed_weak))
+    # 当前认知状态
+    if cognitive_state:
+        parts.append("Current cognitive state: " + describe_cognitive_state(cognitive_state))
+    if not parts:
+        return (
+            "No prior session memory. You can treat this as an early stage of the conversation; "
+            "start with simple explanations and ask a quick check-up question."
+        )
+    return " | ".join(parts)
+# ---------- 语言检测（用于 Auto 模式） ----------
+def detect_language(message: str, preference: str) -> str:
+    """
+    preference:
+      - 'English' → 强制英文
+      - '中文' → 强制中文
+      - 'Auto' → 检测文本是否包含中文字符
+    """
+    if preference in ("English", "中文"):
+        return preference
+    # Auto 模式下简单检测是否含有中文字符
+    if re.search(r"[\u4e00-\u9fff]", message):
+        return "中文"
+    return "English"
+# ---------- Session 状态展示 ----------
+def render_session_status(
+    learning_mode: str,
+    weaknesses: Optional[List[str]],
+    cognitive_state: Optional[Dict[str, int]],
+) -> str:
+    lines: List[str] = []
+    lines.append("### Session status\n")
+    lines.append(f"- Learning mode: **{learning_mode}**")
+    lines.append(f"- Cognitive state: {describe_cognitive_state(cognitive_state)}")
+    if weaknesses:
+        lines.append("- Recent difficulties (last 3):")
+        for w in weaknesses[-3:]:
+            lines.append(f"  - {w}")
+    else:
+        lines.append("- Recent difficulties: *(none yet)*")
+    return "\n".join(lines)
+# ---------- Same Question Check helpers ----------
+def _normalize_text(text: str) -> str:
+    """
+    将文本转为小写、去除标点和多余空格，用于简单相似度计算。
+    """
+    text = text.lower().strip()
+    text = re.sub(r"[^\w\s]", " ", text)
+    text = re.sub(r"\s+", " ", text)
+    return text
+def _jaccard_similarity(a: str, b: str) -> float:
+    tokens_a = set(a.split())
+    tokens_b = set(b.split())
+    if not tokens_a or not tokens_b:
+        return 0.0
+    return len(tokens_a & tokens_b) / len(tokens_a | tokens_b)
+def cosine_similarity(a: List[float], b: List[float]) -> float:
+    if not a or not b or len(a) != len(b):
+        return 0.0
+    dot = sum(x * y for x, y in zip(a, b))
+    norm_a = math.sqrt(sum(x * x for x in a))
+    norm_b = math.sqrt(sum(y * y for y in b))
+    if norm_a == 0 or norm_b == 0:
+        return 0.0
+    return dot / (norm_a * norm_b)
+def get_embedding(text: str) -> Optional[List[float]]:
+    """
+    调用 OpenAI Embedding API，将文本编码为向量。
+    """
+    try:
+        resp = client.embeddings.create(
+            model=EMBEDDING_MODEL,
+            input=[text],
+        )
+        return resp.data[0].embedding
+    except Exception as e:
+        # 打到 Space 的 log，便于排查
+        print(f"[Embedding error] {repr(e)}")
+        return None
+def find_similar_past_question(
+    message: str,
+    history: List[Tuple[str, str]],
+    jaccard_threshold: float = 0.65,
+    embedding_threshold: float = 0.85,
+    max_turns_to_check: int = 6,
+) -> Optional[Tuple[str, str, float]]:
+    """
+    在最近若干轮历史对话中查找与当前问题相似的既往问题。
+    两级检测：先 Jaccard，再 Embedding。
+    返回 (past_question, past_answer, similarity_score) 或 None
+    """
+    norm_msg = _normalize_text(message)
+    if not norm_msg:
+        return None
+    # 1) Jaccard
+    best_sim_j = 0.0
+    best_pair_j: Optional[Tuple[str, str]] = None
+    checked = 0
+    for user_q, assistant_a in reversed(history):
+        checked += 1
+        if checked > max_turns_to_check:
+            break
+        norm_hist_q = _normalize_text(user_q)
+        if not norm_hist_q:
+            continue
+        if norm_msg == norm_hist_q:
+            return user_q, assistant_a, 1.0
+        sim_j = _jaccard_similarity(norm_msg, norm_hist_q)
+        if sim_j > best_sim_j:
+            best_sim_j = sim_j
+            best_pair_j = (user_q, assistant_a)
+    if best_pair_j and best_sim_j >= jaccard_threshold:
+        return best_pair_j[0], best_pair_j[1], best_sim_j
+    # 2) Embedding 语义相似度
+    if not history:
+        return None
+    msg_emb = get_embedding(message)
+    if msg_emb is None:
+        return None
+    best_sim_e = 0.0
+    best_pair_e: Optional[Tuple[str, str]] = None
+    checked = 0
+    for user_q, assistant_a in reversed(history):
+        checked += 1
+        if checked > max_turns_to_check:
+            break
+        hist_emb = get_embedding(user_q)
+        if hist_emb is None:
+            continue
+        sim_e = cosine_similarity(msg_emb, hist_emb)
+        if sim_e > best_sim_e:
+            best_sim_e = sim_e
+            best_pair_e = (user_q, assistant_a)
+    if best_pair_e and best_sim_e >= embedding_threshold:
+        return best_pair_e[0], best_pair_e[1], best_sim_e
+    return None
+# ---------- 构建 messages ----------
+def build_messages(
+    user_message: str,
+    history: List[Tuple[str, str]],
+    language_preference: str,
+    learning_mode: str,
+    doc_type: str,
+    course_outline: Optional[List[str]],
+    weaknesses: Optional[List[str]],
+    cognitive_state: Optional[Dict[str, int]],
+) -> List[Dict[str, str]]:
+    messages: List[Dict[str, str]] = [
+        {"role": "system", "content": CLARE_SYSTEM_PROMPT}
+    ]
+    # 学习模式
+    if learning_mode in LEARNING_MODE_INSTRUCTIONS:
+        mode_instruction = LEARNING_MODE_INSTRUCTIONS[learning_mode]
+        messages.append(
+            {
+                "role": "system",
+                "content": f"Current learning mode: {learning_mode}. {mode_instruction}",
+            }
+        )
+    # 课程大纲
+    topics = course_outline if course_outline else DEFAULT_COURSE_TOPICS
+    topics_text = " | ".join(topics)
+    messages.append(
+        {
+            "role": "system",
+            "content": (
+                "Here is the course syllabus context. Use this to stay aligned "
+                "with the course topics when answering: "
+                + topics_text
+            ),
+        }
+    )
+    # 上传文件类型提示
+    if doc_type and doc_type != "Syllabus":
+        messages.append(
+            {
+                "role": "system",
+                "content": (
+                    f"The student also uploaded a {doc_type} document as supporting material. "
+                    "You do not see the full content directly, but you may assume it is relevant "
+                    "to the same course and topics."
+                ),
+            }
+        )
+    # 学生弱项提示
+    if weaknesses:
+        weak_text = " | ".join(weaknesses[-5:])
+        messages.append(
+            {
+                "role": "system",
+                "content": (
+                    "The student seems to struggle with the following questions or topics. "
+                    "Be extra gentle and clear when these appear: " + weak_text
+                ),
+            }
+        )
+    # 认知状态提示
+    if cognitive_state:
+        confusion = cognitive_state.get("confusion", 0)
+        mastery = cognitive_state.get("mastery", 0)
+        if confusion >= 2 and confusion >= mastery + 1:
+            messages.append(
+                {
+                    "role": "system",
+                    "content": (
+                        "The student is currently under HIGH cognitive load. "
+                        "Use simpler language, shorter steps, and more concrete examples. "
+                        "Avoid long derivations in a single answer, and check understanding "
+                        "frequently."
+                    ),
+                }
+            )
+        elif mastery >= 2 and mastery >= confusion + 1:
+            messages.append(
+                {
+                    "role": "system",
+                    "content": (
+                        "The student seems comfortable with the material. "
+                        "You may increase difficulty slightly, introduce deeper follow-up "
+                        "questions, and connect concepts across topics."
+                    ),
+                }
+            )
+        else:
+            messages.append(
+                {
+                    "role": "system",
+                    "content": (
+                        "The student's cognitive state is mixed or uncertain. "
+                        "Keep explanations clear and moderately paced, and probe for "
+                        "understanding with short questions."
+                    ),
+                }
+            )
+    # 语言偏好控制
+    if language_preference == "English":
+        messages.append(
+            {"role": "system", "content": "Please answer in English."}
+        )
+    elif language_preference == "中文":
+        messages.append(
+            {"role": "system", "content": "请用中文回答学生的问题。"}
+        )
+    # Session 内记忆摘要
+    session_memory_text = build_session_memory_summary(
+        history=history,
+        weaknesses=weaknesses,
+        cognitive_state=cognitive_state,
+    )
+    messages.append(
+        {
+            "role": "system",
+            "content": (
+                "Here is a short summary of this session's memory (only within the current chat; "
+                "it is not persisted across sessions). Use it to stay consistent with the "
+                "student's previous questions, difficulties, and cognitive state: "
+                + session_memory_text
+            ),
+        }
+    )
+    # 历史对话
+    for user, assistant in history:
+        messages.append({"role": "user", "content": user})
+        if assistant is not None:
+            messages.append({"role": "assistant", "content": assistant})
+    # 当前输入
+    messages.append({"role": "user", "content": user_message})
+    return messages
+def chat_with_clare(
+    message: str,
+    history: List[Tuple[str, str]],
+    model_name: str,
+    language_preference: str,
+    learning_mode: str,
+    doc_type: str,
+    course_outline: Optional[List[str]],
+    weaknesses: Optional[List[str]],
+    cognitive_state: Optional[Dict[str, int]],
+) -> Tuple[str, List[Tuple[str, str]]]:
+    try:
+        messages = build_messages(
+            user_message=message,
+            history=history,
+            language_preference=language_preference,
+            learning_mode=learning_mode,
+            doc_type=doc_type,
+            course_outline=course_outline,
+            weaknesses=weaknesses,
+            cognitive_state=cognitive_state,
+        )
+        response = client.chat.completions.create(
+            model=model_name or DEFAULT_MODEL,
+            messages=messages,
+            temperature=0.5,
+        )
+        answer = response.choices[0].message.content
+    except Exception as e:
+        answer = f"⚠️ Error talking to the model: {e}"
+    history = history + [(message, answer)]
+    return answer, history
+# ---------- 导出对话为 Markdown ----------
+def export_conversation(
+    history: List[Tuple[str, str]],
+    course_outline: List[str],
+    learning_mode_val: str,
+    weaknesses: List[str],
+    cognitive_state: Optional[Dict[str, int]],
+) -> str:
+    lines: List[str] = []
+    lines.append("# Clare – Conversation Export\n")
+    lines.append(f"- Learning mode: **{learning_mode_val}**\n")
+    lines.append("- Course topics (short): " + "; ".join(course_outline[:5]) + "\n")
+    lines.append(f"- Cognitive state snapshot: {describe_cognitive_state(cognitive_state)}\n")
+    if weaknesses:
+        lines.append("- Observed student difficulties:\n")
+        for w in weaknesses[-5:]:
+            lines.append(f"  - {w}\n")
+    lines.append("\n---\n\n")
+    for user, assistant in history:
+        lines.append(f"**Student:** {user}\n\n")
+        lines.append(f"**Clare:** {assistant}\n\n")
+        lines.append("---\n\n")
+    return "".join(lines)
+# ---------- 生成 3 个 quiz 题目 ----------
+def generate_quiz_from_history(
+    history: List[Tuple[str, str]],
+    course_outline: List[str],
+    weaknesses: List[str],
+    cognitive_state: Optional[Dict[str, int]],
+    model_name: str,
+    language_preference: str,
+) -> str:
+    conversation_text = ""
+    for user, assistant in history[-8:]:
+        conversation_text += f"Student: {user}\nClare: {assistant}\n"
+    topics_text = "; ".join(course_outline[:8])
+    weakness_text = "; ".join(weaknesses[-5:]) if weaknesses else "N/A"
+    cog_text = describe_cognitive_state(cognitive_state)
+    messages = [
+        {"role": "system", "content": CLARE_SYSTEM_PROMPT},
+        {
+            "role": "system",
+            "content": (
+                "Now your task is to create a **short concept quiz** for the student. "
+                "Based on the conversation and course topics, generate **3 questions** "
+                "(a mix of multiple-choice and short-answer is fine). After listing the "
+                "questions, provide an answer key at the end under a heading 'Answer Key'. "
+                "Number the questions Q1, Q2, Q3. Adjust the difficulty according to the "
+                "student's cognitive state."
+            ),
+        },
+        {
+            "role": "system",
+            "content": f"Course topics: {topics_text}",
+        },
+        {
+            "role": "system",
+            "content": f"Student known difficulties: {weakness_text}",
+        },
+        {
+            "role": "system",
+            "content": f"Student cognitive state: {cog_text}",
+        },
+        {
+            "role": "user",
+            "content": (
+                "Here is the recent conversation between you and the student:\n\n"
+                + conversation_text
+                + "\n\nPlease create the quiz now."
+            ),
+        },
+    ]
+    if language_preference == "中文":
+        messages.append(
+            {
+                "role": "system",
+                "content": "请用中文给出问题和答案。",
+            }
+        )
+    try:
+        response = client.chat.completions.create(
+            model=model_name or DEFAULT_MODEL,
+            messages=messages,
+            temperature=0.5,
+        )
+        quiz_text = response.choices[0].message.content
+    except Exception as e:
+        quiz_text = f"⚠️ Error generating quiz: {e}"
+    return quiz_text
+# ---------- 概念总结（知识点摘要） ----------
+def summarize_conversation(
+    history: List[Tuple[str, str]],
+    course_outline: List[str],
+    weaknesses: List[str],
+    cognitive_state: Optional[Dict[str, int]],
+    model_name: str,
+    language_preference: str,
+) -> str:
+    conversation_text = ""
+    for user, assistant in history[-10:]:
+        conversation_text += f"Student: {user}\nClare: {assistant}\n"
+    topics_text = "; ".join(course_outline[:8])
+    weakness_text = "; ".join(weaknesses[-5:]) if weaknesses else "N/A"
+    cog_text = describe_cognitive_state(cognitive_state)
+    messages = [
+        {"role": "system", "content": CLARE_SYSTEM_PROMPT},
+        {
+            "role": "system",
+            "content": (
+                "Your task now is to produce a **concept-only summary** of this tutoring "
+                "session. Only include knowledge points, definitions, key formulas, "
+                "examples, and main takeaways. Do **not** include any personal remarks, "
+                "jokes, or off-topic chat. Write in clear bullet points. This summary "
+                "should be suitable for the student to paste into their study notes. "
+                "Take into account what the student struggled with and their cognitive state."
+            ),
+        },
+        {
+            "role": "system",
+            "content": f"Course topics context: {topics_text}",
+        },
+        {
+            "role": "system",
+            "content": f"Student known difficulties: {weakness_text}",
+        },
+        {
+            "role": "system",
+            "content": f"Student cognitive state: {cog_text}",
+        },
+        {
+            "role": "user",
+            "content": (
+                "Here is the recent conversation between you and the student:\n\n"
+                + conversation_text
+                + "\n\nPlease summarize only the concepts and key ideas learned."
+            ),
+        },
+    ]
+    if language_preference == "中文":
+        messages.append(
+            {
+                "role": "system",
+                "content": "请用中文给出要点总结，只保留知识点和结论，使用条目符号。"
+            }
+        )
+    try:
+        response = client.chat.completions.create(
+            model=model_name or DEFAULT_MODEL,
+            messages=messages,
+            temperature=0.4,
+        )
+        summary_text = response.choices[0].message.content
+    except Exception as e:
+        summary_text = f"⚠️ Error generating summary: {e}"
+    return summary_text