AI_Agent_Final_V2

Sleeping

App Files Files Community

SarahXia0405 commited on Dec 31, 2025

Commit

ed85c71

verified ·

1 Parent(s): 09fcf0d

Update api/server.py

Browse files

Files changed (1) hide show

api/server.py +57 -88

api/server.py CHANGED Viewed

@@ -128,15 +128,40 @@ def _get_session(user_id: str) -> Dict[str, Any]:
             "course_outline": DEFAULT_COURSE_TOPICS,
             "rag_chunks": list(MODULE10_CHUNKS_CACHE),
             "model_name": DEFAULT_MODEL,
-            # ✅ NEW: keep track of uploaded files and their chunks
-            "uploaded_chunks_by_file": {},     # Dict[str, List[Dict[str, Any]]]
-            "last_uploaded_filename": None,    # Optional[str]
-            "uploaded_filenames": [],          # List[str]
         }
     return SESSIONS[user_id]
 # ----------------------------
 # Warmup
 # ----------------------------
@@ -373,55 +398,6 @@ class FeedbackReq(BaseModel):
     timestamp_ms: Optional[int] = None
-# ----------------------------
-# Helpers: prefer last uploaded file when user asks "read/summarize uploaded file"
-# ----------------------------
-def _wants_last_uploaded_file(msg: str) -> bool:
-    t = (msg or "").lower()
-    triggers = [
-        "summarize the uploaded file",
-        "summarise the uploaded file",
-        "summarize uploaded file",
-        "uploaded file",
-        "read this",
-        "can you see that file",
-        "can you see the file",
-        "read the file",
-        "summarize the file i uploaded",
-        "summarize the document i uploaded",
-        "summarize the document",
-        "总结我上传的文件",
-        "总结上传的文件",
-        "读一下我上传的",
-        "能看到我上传的文件吗",
-        "看一下我上传的文件",
-    ]
-    return any(k in t for k in triggers)
-def _concat_chunks_text(chunks: List[Dict[str, Any]], max_chars: int = 2000) -> str:
-    if not chunks:
-        return ""
-    out: List[str] = []
-    total = 0
-    for c in chunks:
-        # common keys: "text" / "content" / "chunk"
-        txt = c.get("text") or c.get("content") or c.get("chunk") or ""
-        txt = (txt or "").strip()
-        if not txt:
-            continue
-        remain = max_chars - total
-        if remain <= 0:
-            break
-        if len(txt) > remain:
-            txt = txt[:remain]
-        out.append(txt)
-        total += len(txt) + 1
-        if total >= max_chars:
-            break
-    return "\n\n".join(out)
 # ----------------------------
 # API Routes
 # ----------------------------
@@ -469,31 +445,17 @@ def chat(req: ChatReq):
     sess["cognitive_state"] = update_cognitive_state_from_message(msg, sess["cognitive_state"])
     marks_ms["cognitive_update_done"] = (time.time() - t0) * 1000.0
-    # ✅ RAG selection:
-    # If user explicitly asks to read/summarize the uploaded file, prefer last uploaded file chunks
-    rag_context_text, rag_used_chunks = "", []
-    try:
-        if _wants_last_uploaded_file(msg):
-            last_fn = sess.get("last_uploaded_filename")
-            by_file = sess.get("uploaded_chunks_by_file") or {}
-            last_chunks = by_file.get(last_fn) if last_fn else None
-            if last_chunks:
-                rag_context_text = _concat_chunks_text(last_chunks, max_chars=2000)
-                rag_used_chunks = list(last_chunks)[:6]  # keep refs small/stable
-            else:
-                # fallback: if no last upload available, do normal retrieval
-                rag_context_text, rag_used_chunks = retrieve_relevant_chunks(msg, sess["rag_chunks"])
-        else:
-            if len(msg) < 20 and ("?" not in msg):
-                rag_context_text, rag_used_chunks = "", []
-            else:
-                rag_context_text, rag_used_chunks = retrieve_relevant_chunks(msg, sess["rag_chunks"])
-    except Exception as e:
-        print(f"[chat] rag error: {repr(e)}")
         rag_context_text, rag_used_chunks = "", []
     marks_ms["rag_retrieve_done"] = (time.time() - t0) * 1000.0
     try:
         answer, new_history, run_id = chat_with_clare(
             message=msg,
@@ -537,7 +499,7 @@ def chat(req: ChatReq):
         for c in (rag_used_chunks or [])
     ]
-    rag_context_chars = len(rag_context_text or "")
     rag_used_chunks_count = len(rag_used_chunks or [])
     history_len = len(sess["history"])
@@ -592,6 +554,11 @@ def quiz_start(req: QuizStartReq):
         "Module 10 quiz", sess["rag_chunks"]
     )
     try:
         answer, new_history, run_id = chat_with_clare(
             message=quiz_instruction,
@@ -689,19 +656,21 @@ async def upload(
         print(f"[upload] rag build error: {repr(e)}")
         new_chunks = []
-    # ✅ NEW: remember this upload as "last uploaded file"
     try:
-        sess["uploaded_chunks_by_file"] = sess.get("uploaded_chunks_by_file") or {}
-        sess["uploaded_chunks_by_file"][safe_name] = new_chunks
-        sess["last_uploaded_filename"] = safe_name
-        lst = sess.get("uploaded_filenames") or []
-        if safe_name not in lst:
-            lst.append(safe_name)
-        sess["uploaded_filenames"] = lst
     except Exception as e:
-        print(f"[upload] session remember failed: {repr(e)}")
-    status_md = f"✅ Loaded base reading + uploaded {doc_type} file: {safe_name} (chunks={len(new_chunks)})."
     _log_event_to_langsmith(
         {
@@ -718,7 +687,7 @@ async def upload(
         }
     )
-    return {"ok": True, "added_chunks": len(new_chunks), "status_md": status_md, "filename": safe_name}
 @app.post("/api/feedback")

             "course_outline": DEFAULT_COURSE_TOPICS,
             "rag_chunks": list(MODULE10_CHUNKS_CACHE),
             "model_name": DEFAULT_MODEL,
+            "uploaded_files": [],  # ✅ NEW: track uploaded file metadata for prompting/debug
         }
+    # ✅ NEW: backfill for existing sessions created before this change
+    if "uploaded_files" not in SESSIONS[user_id]:
+        SESSIONS[user_id]["uploaded_files"] = []
     return SESSIONS[user_id]
+# ✅ NEW: helper to build a deterministic “what files are loaded” hint for the LLM
+def _build_upload_hint(sess: Dict[str, Any]) -> str:
+    files = sess.get("uploaded_files") or []
+    if not files:
+        # Still mention that base reading is available
+        return (
+            "Files available to you in this session:\n"
+            "- Base reading: module10_responsible_ai.pdf (pre-loaded)\n"
+            "If the student asks about an uploaded file but none exist, ask them to upload."
+        )
+    lines = [
+        "Files available to you in this session:",
+        "- Base reading: module10_responsible_ai.pdf (pre-loaded)",
+    ]
+    # show last few only to keep prompt small
+    for f in files[-5:]:
+        fn = (f.get("filename") or "").strip()
+        dt = (f.get("doc_type") or "").strip()
+        chunks = f.get("added_chunks")
+        lines.append(f"- Uploaded: {fn} (doc_type={dt}, added_chunks={chunks})")
+    lines.append(
+        "When the student asks to summarize/read 'the uploaded file', interpret it as the MOST RECENT uploaded file unless specified."
+    )
+    return "\n".join(lines)
 # ----------------------------
 # Warmup
 # ----------------------------
     timestamp_ms: Optional[int] = None
 # ----------------------------
 # API Routes
 # ----------------------------
     sess["cognitive_state"] = update_cognitive_state_from_message(msg, sess["cognitive_state"])
     marks_ms["cognitive_update_done"] = (time.time() - t0) * 1000.0
+    if len(msg) < 20 and ("?" not in msg):
         rag_context_text, rag_used_chunks = "", []
+    else:
+        rag_context_text, rag_used_chunks = retrieve_relevant_chunks(msg, sess["rag_chunks"])
     marks_ms["rag_retrieve_done"] = (time.time() - t0) * 1000.0
+    # ✅ NEW: prepend deterministic upload/file-state hint so the model never says “no file”
+    upload_hint = _build_upload_hint(sess)
+    if upload_hint:
+        rag_context_text = (upload_hint + "\n\n---\n\n" + (rag_context_text or "")).strip()
     try:
         answer, new_history, run_id = chat_with_clare(
             message=msg,
         for c in (rag_used_chunks or [])
     ]
+    rag_context_chars = len((rag_context_text or ""))
     rag_used_chunks_count = len(rag_used_chunks or [])
     history_len = len(sess["history"])
         "Module 10 quiz", sess["rag_chunks"]
     )
+    # ✅ NEW: same hint for quiz start as well
+    upload_hint = _build_upload_hint(sess)
+    if upload_hint:
+        rag_context_text = (upload_hint + "\n\n---\n\n" + (rag_context_text or "")).strip()
     try:
         answer, new_history, run_id = chat_with_clare(
             message=quiz_instruction,
         print(f"[upload] rag build error: {repr(e)}")
         new_chunks = []
+    # ✅ NEW: record upload metadata for prompting/debug
     try:
+        sess["uploaded_files"] = sess.get("uploaded_files") or []
+        sess["uploaded_files"].append(
+            {
+                "filename": safe_name,
+                "doc_type": doc_type,
+                "added_chunks": len(new_chunks),
+                "ts": int(time.time()),
+            }
+        )
     except Exception as e:
+        print(f"[upload] uploaded_files record error: {repr(e)}")
+    status_md = f"✅ Loaded base reading + uploaded {doc_type} file."
     _log_event_to_langsmith(
         {
         }
     )
+    return {"ok": True, "added_chunks": len(new_chunks), "status_md": status_md}
 @app.post("/api/feedback")