SarahXia0405 commited on
Commit
ed85c71
·
verified ·
1 Parent(s): 09fcf0d

Update api/server.py

Browse files
Files changed (1) hide show
  1. api/server.py +57 -88
api/server.py CHANGED
@@ -128,15 +128,40 @@ def _get_session(user_id: str) -> Dict[str, Any]:
128
  "course_outline": DEFAULT_COURSE_TOPICS,
129
  "rag_chunks": list(MODULE10_CHUNKS_CACHE),
130
  "model_name": DEFAULT_MODEL,
131
-
132
- # ✅ NEW: keep track of uploaded files and their chunks
133
- "uploaded_chunks_by_file": {}, # Dict[str, List[Dict[str, Any]]]
134
- "last_uploaded_filename": None, # Optional[str]
135
- "uploaded_filenames": [], # List[str]
136
  }
 
 
 
137
  return SESSIONS[user_id]
138
 
139
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  # ----------------------------
141
  # Warmup
142
  # ----------------------------
@@ -373,55 +398,6 @@ class FeedbackReq(BaseModel):
373
  timestamp_ms: Optional[int] = None
374
 
375
 
376
- # ----------------------------
377
- # Helpers: prefer last uploaded file when user asks "read/summarize uploaded file"
378
- # ----------------------------
379
- def _wants_last_uploaded_file(msg: str) -> bool:
380
- t = (msg or "").lower()
381
- triggers = [
382
- "summarize the uploaded file",
383
- "summarise the uploaded file",
384
- "summarize uploaded file",
385
- "uploaded file",
386
- "read this",
387
- "can you see that file",
388
- "can you see the file",
389
- "read the file",
390
- "summarize the file i uploaded",
391
- "summarize the document i uploaded",
392
- "summarize the document",
393
- "总结我上传的文件",
394
- "总结上传的文件",
395
- "读一下我上传的",
396
- "能看到我上传的文件吗",
397
- "看一下我上传的文件",
398
- ]
399
- return any(k in t for k in triggers)
400
-
401
-
402
- def _concat_chunks_text(chunks: List[Dict[str, Any]], max_chars: int = 2000) -> str:
403
- if not chunks:
404
- return ""
405
- out: List[str] = []
406
- total = 0
407
- for c in chunks:
408
- # common keys: "text" / "content" / "chunk"
409
- txt = c.get("text") or c.get("content") or c.get("chunk") or ""
410
- txt = (txt or "").strip()
411
- if not txt:
412
- continue
413
- remain = max_chars - total
414
- if remain <= 0:
415
- break
416
- if len(txt) > remain:
417
- txt = txt[:remain]
418
- out.append(txt)
419
- total += len(txt) + 1
420
- if total >= max_chars:
421
- break
422
- return "\n\n".join(out)
423
-
424
-
425
  # ----------------------------
426
  # API Routes
427
  # ----------------------------
@@ -469,31 +445,17 @@ def chat(req: ChatReq):
469
  sess["cognitive_state"] = update_cognitive_state_from_message(msg, sess["cognitive_state"])
470
  marks_ms["cognitive_update_done"] = (time.time() - t0) * 1000.0
471
 
472
- # RAG selection:
473
- # If user explicitly asks to read/summarize the uploaded file, prefer last uploaded file chunks
474
- rag_context_text, rag_used_chunks = "", []
475
- try:
476
- if _wants_last_uploaded_file(msg):
477
- last_fn = sess.get("last_uploaded_filename")
478
- by_file = sess.get("uploaded_chunks_by_file") or {}
479
- last_chunks = by_file.get(last_fn) if last_fn else None
480
- if last_chunks:
481
- rag_context_text = _concat_chunks_text(last_chunks, max_chars=2000)
482
- rag_used_chunks = list(last_chunks)[:6] # keep refs small/stable
483
- else:
484
- # fallback: if no last upload available, do normal retrieval
485
- rag_context_text, rag_used_chunks = retrieve_relevant_chunks(msg, sess["rag_chunks"])
486
- else:
487
- if len(msg) < 20 and ("?" not in msg):
488
- rag_context_text, rag_used_chunks = "", []
489
- else:
490
- rag_context_text, rag_used_chunks = retrieve_relevant_chunks(msg, sess["rag_chunks"])
491
- except Exception as e:
492
- print(f"[chat] rag error: {repr(e)}")
493
  rag_context_text, rag_used_chunks = "", []
494
-
 
495
  marks_ms["rag_retrieve_done"] = (time.time() - t0) * 1000.0
496
 
 
 
 
 
 
497
  try:
498
  answer, new_history, run_id = chat_with_clare(
499
  message=msg,
@@ -537,7 +499,7 @@ def chat(req: ChatReq):
537
  for c in (rag_used_chunks or [])
538
  ]
539
 
540
- rag_context_chars = len(rag_context_text or "")
541
  rag_used_chunks_count = len(rag_used_chunks or [])
542
  history_len = len(sess["history"])
543
 
@@ -592,6 +554,11 @@ def quiz_start(req: QuizStartReq):
592
  "Module 10 quiz", sess["rag_chunks"]
593
  )
594
 
 
 
 
 
 
595
  try:
596
  answer, new_history, run_id = chat_with_clare(
597
  message=quiz_instruction,
@@ -689,19 +656,21 @@ async def upload(
689
  print(f"[upload] rag build error: {repr(e)}")
690
  new_chunks = []
691
 
692
- # ✅ NEW: remember this upload as "last uploaded file"
693
  try:
694
- sess["uploaded_chunks_by_file"] = sess.get("uploaded_chunks_by_file") or {}
695
- sess["uploaded_chunks_by_file"][safe_name] = new_chunks
696
- sess["last_uploaded_filename"] = safe_name
697
- lst = sess.get("uploaded_filenames") or []
698
- if safe_name not in lst:
699
- lst.append(safe_name)
700
- sess["uploaded_filenames"] = lst
 
 
701
  except Exception as e:
702
- print(f"[upload] session remember failed: {repr(e)}")
703
 
704
- status_md = f"✅ Loaded base reading + uploaded {doc_type} file: {safe_name} (chunks={len(new_chunks)})."
705
 
706
  _log_event_to_langsmith(
707
  {
@@ -718,7 +687,7 @@ async def upload(
718
  }
719
  )
720
 
721
- return {"ok": True, "added_chunks": len(new_chunks), "status_md": status_md, "filename": safe_name}
722
 
723
 
724
  @app.post("/api/feedback")
 
128
  "course_outline": DEFAULT_COURSE_TOPICS,
129
  "rag_chunks": list(MODULE10_CHUNKS_CACHE),
130
  "model_name": DEFAULT_MODEL,
131
+ "uploaded_files": [], # ✅ NEW: track uploaded file metadata for prompting/debug
 
 
 
 
132
  }
133
+ # ✅ NEW: backfill for existing sessions created before this change
134
+ if "uploaded_files" not in SESSIONS[user_id]:
135
+ SESSIONS[user_id]["uploaded_files"] = []
136
  return SESSIONS[user_id]
137
 
138
 
139
+ # ✅ NEW: helper to build a deterministic “what files are loaded” hint for the LLM
140
+ def _build_upload_hint(sess: Dict[str, Any]) -> str:
141
+ files = sess.get("uploaded_files") or []
142
+ if not files:
143
+ # Still mention that base reading is available
144
+ return (
145
+ "Files available to you in this session:\n"
146
+ "- Base reading: module10_responsible_ai.pdf (pre-loaded)\n"
147
+ "If the student asks about an uploaded file but none exist, ask them to upload."
148
+ )
149
+ lines = [
150
+ "Files available to you in this session:",
151
+ "- Base reading: module10_responsible_ai.pdf (pre-loaded)",
152
+ ]
153
+ # show last few only to keep prompt small
154
+ for f in files[-5:]:
155
+ fn = (f.get("filename") or "").strip()
156
+ dt = (f.get("doc_type") or "").strip()
157
+ chunks = f.get("added_chunks")
158
+ lines.append(f"- Uploaded: {fn} (doc_type={dt}, added_chunks={chunks})")
159
+ lines.append(
160
+ "When the student asks to summarize/read 'the uploaded file', interpret it as the MOST RECENT uploaded file unless specified."
161
+ )
162
+ return "\n".join(lines)
163
+
164
+
165
  # ----------------------------
166
  # Warmup
167
  # ----------------------------
 
398
  timestamp_ms: Optional[int] = None
399
 
400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
401
  # ----------------------------
402
  # API Routes
403
  # ----------------------------
 
445
  sess["cognitive_state"] = update_cognitive_state_from_message(msg, sess["cognitive_state"])
446
  marks_ms["cognitive_update_done"] = (time.time() - t0) * 1000.0
447
 
448
+ if len(msg) < 20 and ("?" not in msg):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
449
  rag_context_text, rag_used_chunks = "", []
450
+ else:
451
+ rag_context_text, rag_used_chunks = retrieve_relevant_chunks(msg, sess["rag_chunks"])
452
  marks_ms["rag_retrieve_done"] = (time.time() - t0) * 1000.0
453
 
454
+ # ✅ NEW: prepend deterministic upload/file-state hint so the model never says “no file”
455
+ upload_hint = _build_upload_hint(sess)
456
+ if upload_hint:
457
+ rag_context_text = (upload_hint + "\n\n---\n\n" + (rag_context_text or "")).strip()
458
+
459
  try:
460
  answer, new_history, run_id = chat_with_clare(
461
  message=msg,
 
499
  for c in (rag_used_chunks or [])
500
  ]
501
 
502
+ rag_context_chars = len((rag_context_text or ""))
503
  rag_used_chunks_count = len(rag_used_chunks or [])
504
  history_len = len(sess["history"])
505
 
 
554
  "Module 10 quiz", sess["rag_chunks"]
555
  )
556
 
557
+ # ✅ NEW: same hint for quiz start as well
558
+ upload_hint = _build_upload_hint(sess)
559
+ if upload_hint:
560
+ rag_context_text = (upload_hint + "\n\n---\n\n" + (rag_context_text or "")).strip()
561
+
562
  try:
563
  answer, new_history, run_id = chat_with_clare(
564
  message=quiz_instruction,
 
656
  print(f"[upload] rag build error: {repr(e)}")
657
  new_chunks = []
658
 
659
+ # ✅ NEW: record upload metadata for prompting/debug
660
  try:
661
+ sess["uploaded_files"] = sess.get("uploaded_files") or []
662
+ sess["uploaded_files"].append(
663
+ {
664
+ "filename": safe_name,
665
+ "doc_type": doc_type,
666
+ "added_chunks": len(new_chunks),
667
+ "ts": int(time.time()),
668
+ }
669
+ )
670
  except Exception as e:
671
+ print(f"[upload] uploaded_files record error: {repr(e)}")
672
 
673
+ status_md = f"✅ Loaded base reading + uploaded {doc_type} file."
674
 
675
  _log_event_to_langsmith(
676
  {
 
687
  }
688
  )
689
 
690
+ return {"ok": True, "added_chunks": len(new_chunks), "status_md": status_md}
691
 
692
 
693
  @app.post("/api/feedback")