Spaces:
Sleeping
Sleeping
Update api/server.py
Browse files- api/server.py +59 -6
api/server.py
CHANGED
|
@@ -391,7 +391,6 @@ def login(req: LoginReq):
|
|
| 391 |
sess["name"] = name
|
| 392 |
return {"ok": True, "user": {"name": name, "user_id": user_id}}
|
| 393 |
|
| 394 |
-
|
| 395 |
@app.post("/api/chat")
|
| 396 |
def chat(req: ChatReq):
|
| 397 |
user_id = (req.user_id or "").strip()
|
|
@@ -412,6 +411,37 @@ def chat(req: ChatReq):
|
|
| 412 |
"run_id": None,
|
| 413 |
}
|
| 414 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 415 |
t0 = time.time()
|
| 416 |
marks_ms: Dict[str, float] = {"start": 0.0}
|
| 417 |
|
|
@@ -424,10 +454,33 @@ def chat(req: ChatReq):
|
|
| 424 |
sess["cognitive_state"] = update_cognitive_state_from_message(msg, sess["cognitive_state"])
|
| 425 |
marks_ms["cognitive_update_done"] = (time.time() - t0) * 1000.0
|
| 426 |
|
| 427 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 428 |
rag_context_text, rag_used_chunks = "", []
|
| 429 |
-
|
| 430 |
-
rag_context_text, rag_used_chunks = retrieve_relevant_chunks(msg, sess["rag_chunks"])
|
| 431 |
marks_ms["rag_retrieve_done"] = (time.time() - t0) * 1000.0
|
| 432 |
|
| 433 |
try:
|
|
@@ -496,7 +549,7 @@ def chat(req: ChatReq):
|
|
| 496 |
"learning_mode": req.learning_mode,
|
| 497 |
"doc_type": req.doc_type,
|
| 498 |
"refs": refs,
|
| 499 |
-
"run_id": run_id,
|
| 500 |
}
|
| 501 |
)
|
| 502 |
|
|
@@ -507,7 +560,7 @@ def chat(req: ChatReq):
|
|
| 507 |
),
|
| 508 |
"refs": refs,
|
| 509 |
"latency_ms": total_ms,
|
| 510 |
-
"run_id": run_id,
|
| 511 |
}
|
| 512 |
|
| 513 |
|
|
|
|
| 391 |
sess["name"] = name
|
| 392 |
return {"ok": True, "user": {"name": name, "user_id": user_id}}
|
| 393 |
|
|
|
|
| 394 |
@app.post("/api/chat")
|
| 395 |
def chat(req: ChatReq):
|
| 396 |
user_id = (req.user_id or "").strip()
|
|
|
|
| 411 |
"run_id": None,
|
| 412 |
}
|
| 413 |
|
| 414 |
+
# ----------------------------
|
| 415 |
+
# RAG query normalization (short file-intent prompts)
|
| 416 |
+
# ----------------------------
|
| 417 |
+
def _looks_like_file_request(text: str) -> bool:
|
| 418 |
+
t = (text or "").strip().lower()
|
| 419 |
+
if not t:
|
| 420 |
+
return False
|
| 421 |
+
triggers = [
|
| 422 |
+
"read this",
|
| 423 |
+
"summarize",
|
| 424 |
+
"summary",
|
| 425 |
+
"can you see",
|
| 426 |
+
"see that file",
|
| 427 |
+
"see the file",
|
| 428 |
+
"that file",
|
| 429 |
+
"this file",
|
| 430 |
+
"the file",
|
| 431 |
+
"attached",
|
| 432 |
+
"attachment",
|
| 433 |
+
"upload",
|
| 434 |
+
"uploaded",
|
| 435 |
+
"document",
|
| 436 |
+
"pdf",
|
| 437 |
+
"ppt",
|
| 438 |
+
"slides",
|
| 439 |
+
"docx",
|
| 440 |
+
"analyze",
|
| 441 |
+
"explain this doc",
|
| 442 |
+
]
|
| 443 |
+
return any(k in t for k in triggers)
|
| 444 |
+
|
| 445 |
t0 = time.time()
|
| 446 |
marks_ms: Dict[str, float] = {"start": 0.0}
|
| 447 |
|
|
|
|
| 454 |
sess["cognitive_state"] = update_cognitive_state_from_message(msg, sess["cognitive_state"])
|
| 455 |
marks_ms["cognitive_update_done"] = (time.time() - t0) * 1000.0
|
| 456 |
|
| 457 |
+
# ✅ Key fix:
|
| 458 |
+
# - DO NOT gate RAG purely by message length.
|
| 459 |
+
# - For very short generic messages (e.g. "hi"), skip to save latency.
|
| 460 |
+
# - For short file-intent messages (e.g. "Read this"), force a better retrieval query.
|
| 461 |
+
rag_context_text, rag_used_chunks = "", []
|
| 462 |
+
try:
|
| 463 |
+
chunks = sess.get("rag_chunks") or []
|
| 464 |
+
has_chunks = len(chunks) > 0
|
| 465 |
+
|
| 466 |
+
if has_chunks:
|
| 467 |
+
is_file_intent = _looks_like_file_request(msg)
|
| 468 |
+
is_too_short_generic = (len(msg) < 8) and (not is_file_intent)
|
| 469 |
+
|
| 470 |
+
if is_too_short_generic:
|
| 471 |
+
rag_context_text, rag_used_chunks = "", []
|
| 472 |
+
else:
|
| 473 |
+
retrieval_query = msg
|
| 474 |
+
if is_file_intent:
|
| 475 |
+
# Make retrieval robust even when user message is vague.
|
| 476 |
+
# Include doc_type to bias retrieval toward recently uploaded material.
|
| 477 |
+
retrieval_query = f"uploaded document ({req.doc_type}) key content and relevant excerpts for: {msg}"
|
| 478 |
+
|
| 479 |
+
rag_context_text, rag_used_chunks = retrieve_relevant_chunks(retrieval_query, chunks)
|
| 480 |
+
except Exception as e:
|
| 481 |
+
print(f"[chat] rag retrieve error: {repr(e)}")
|
| 482 |
rag_context_text, rag_used_chunks = "", []
|
| 483 |
+
|
|
|
|
| 484 |
marks_ms["rag_retrieve_done"] = (time.time() - t0) * 1000.0
|
| 485 |
|
| 486 |
try:
|
|
|
|
| 549 |
"learning_mode": req.learning_mode,
|
| 550 |
"doc_type": req.doc_type,
|
| 551 |
"refs": refs,
|
| 552 |
+
"run_id": run_id,
|
| 553 |
}
|
| 554 |
)
|
| 555 |
|
|
|
|
| 560 |
),
|
| 561 |
"refs": refs,
|
| 562 |
"latency_ms": total_ms,
|
| 563 |
+
"run_id": run_id,
|
| 564 |
}
|
| 565 |
|
| 566 |
|