Spaces:
Sleeping
Sleeping
Update api/server.py
Browse files- api/server.py +128 -37
api/server.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
# api/server.py
|
| 2 |
import os
|
| 3 |
import time
|
| 4 |
-
from typing import Dict
|
| 5 |
|
| 6 |
from fastapi import FastAPI, UploadFile, File, Form, Request
|
| 7 |
from fastapi.responses import FileResponse, JSONResponse
|
|
@@ -22,6 +22,58 @@ from api.clare_core import (
|
|
| 22 |
summarize_conversation,
|
| 23 |
)
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
# ----------------------------
|
| 26 |
# Paths / Constants
|
| 27 |
# ----------------------------
|
|
@@ -40,7 +92,6 @@ WEB_ASSETS = os.path.join(WEB_DIST, "assets")
|
|
| 40 |
# ----------------------------
|
| 41 |
app = FastAPI(title="Clare API")
|
| 42 |
|
| 43 |
-
# Same-origin for Docker Space doesn't need CORS, but leaving it open helps if you later split FE/BE.
|
| 44 |
app.add_middleware(
|
| 45 |
CORSMiddleware,
|
| 46 |
allow_origins=["*"],
|
|
@@ -52,11 +103,9 @@ app.add_middleware(
|
|
| 52 |
# ----------------------------
|
| 53 |
# Static hosting (Vite build)
|
| 54 |
# ----------------------------
|
| 55 |
-
# Mount /assets so <script src="/assets/..."> works.
|
| 56 |
if os.path.isdir(WEB_ASSETS):
|
| 57 |
app.mount("/assets", StaticFiles(directory=WEB_ASSETS), name="assets")
|
| 58 |
|
| 59 |
-
# Optional: serve other static files in build root (e.g., favicon) under /static
|
| 60 |
if os.path.isdir(WEB_DIST):
|
| 61 |
app.mount("/static", StaticFiles(directory=WEB_DIST), name="static")
|
| 62 |
|
|
@@ -99,11 +148,8 @@ def _get_session(user_id: str) -> Dict:
|
|
| 99 |
"weaknesses": [],
|
| 100 |
"cognitive_state": {"confusion": 0, "mastery": 0},
|
| 101 |
"course_outline": DEFAULT_COURSE_TOPICS,
|
| 102 |
-
# preload base reading
|
| 103 |
"rag_chunks": list(MODULE10_CHUNKS_CACHE),
|
| 104 |
"model_name": DEFAULT_MODEL,
|
| 105 |
-
# ✅ NEW: track last syllabus filename for refs fallback
|
| 106 |
-
"last_syllabus_file": None,
|
| 107 |
}
|
| 108 |
return SESSIONS[user_id]
|
| 109 |
|
|
@@ -135,6 +181,22 @@ class SummaryReq(BaseModel):
|
|
| 135 |
language_preference: str = "Auto"
|
| 136 |
|
| 137 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
# ----------------------------
|
| 139 |
# API Routes
|
| 140 |
# ----------------------------
|
|
@@ -175,7 +237,6 @@ def chat(req: ChatReq):
|
|
| 175 |
sess["weaknesses"] = update_weaknesses_from_message(msg, sess["weaknesses"])
|
| 176 |
sess["cognitive_state"] = update_cognitive_state_from_message(msg, sess["cognitive_state"])
|
| 177 |
|
| 178 |
-
# RAG
|
| 179 |
rag_context_text, rag_used_chunks = retrieve_relevant_chunks(msg, sess["rag_chunks"])
|
| 180 |
|
| 181 |
start_ts = time.time()
|
|
@@ -204,24 +265,27 @@ def chat(req: ChatReq):
|
|
| 204 |
for c in (rag_used_chunks or [])
|
| 205 |
]
|
| 206 |
|
| 207 |
-
#
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
|
|
|
|
|
|
|
|
|
| 225 |
|
| 226 |
return {
|
| 227 |
"reply": answer,
|
|
@@ -249,7 +313,6 @@ async def upload(
|
|
| 249 |
|
| 250 |
sess = _get_session(user_id)
|
| 251 |
|
| 252 |
-
# Save to /tmp (sanitize filename)
|
| 253 |
safe_name = os.path.basename(file.filename).replace("..", "_")
|
| 254 |
tmp_path = os.path.join("/tmp", safe_name)
|
| 255 |
|
|
@@ -257,11 +320,6 @@ async def upload(
|
|
| 257 |
with open(tmp_path, "wb") as f:
|
| 258 |
f.write(content)
|
| 259 |
|
| 260 |
-
# ✅ NEW: remember the latest syllabus filename for refs fallback
|
| 261 |
-
if doc_type == "Syllabus":
|
| 262 |
-
sess["last_syllabus_file"] = os.path.basename(file.filename) or safe_name
|
| 263 |
-
|
| 264 |
-
# Update topics only for syllabus
|
| 265 |
if doc_type == "Syllabus":
|
| 266 |
class _F:
|
| 267 |
pass
|
|
@@ -273,7 +331,6 @@ async def upload(
|
|
| 273 |
except Exception as e:
|
| 274 |
print(f"[upload] syllabus parse error: {repr(e)}")
|
| 275 |
|
| 276 |
-
# Update rag chunks for any doc
|
| 277 |
try:
|
| 278 |
new_chunks = build_rag_chunks_from_file(tmp_path, doc_type) or []
|
| 279 |
sess["rag_chunks"] = (sess["rag_chunks"] or []) + new_chunks
|
|
@@ -285,6 +342,43 @@ async def upload(
|
|
| 285 |
return {"ok": True, "added_chunks": len(new_chunks), "status_md": status_md}
|
| 286 |
|
| 287 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 288 |
@app.post("/api/export")
|
| 289 |
def api_export(req: ExportReq):
|
| 290 |
user_id = (req.user_id or "").strip()
|
|
@@ -323,17 +417,14 @@ def api_summary(req: SummaryReq):
|
|
| 323 |
@app.get("/api/memoryline")
|
| 324 |
def memoryline(user_id: str):
|
| 325 |
_ = _get_session((user_id or "").strip())
|
| 326 |
-
# v1: 写死也没问题;前端只渲染
|
| 327 |
return {"next_review_label": "T+7", "progress_pct": 0.4}
|
| 328 |
|
| 329 |
|
| 330 |
# ----------------------------
|
| 331 |
-
# SPA Fallback
|
| 332 |
# ----------------------------
|
| 333 |
-
# If user refreshes /some/route, FE router needs index.html.
|
| 334 |
@app.get("/{full_path:path}")
|
| 335 |
def spa_fallback(full_path: str, request: Request):
|
| 336 |
-
# Do not hijack API/static paths
|
| 337 |
if (
|
| 338 |
full_path.startswith("api/")
|
| 339 |
or full_path.startswith("assets/")
|
|
|
|
| 1 |
# api/server.py
|
| 2 |
import os
|
| 3 |
import time
|
| 4 |
+
from typing import Dict, Any, Optional, List
|
| 5 |
|
| 6 |
from fastapi import FastAPI, UploadFile, File, Form, Request
|
| 7 |
from fastapi.responses import FileResponse, JSONResponse
|
|
|
|
| 22 |
summarize_conversation,
|
| 23 |
)
|
| 24 |
|
| 25 |
+
# ----------------------------
|
| 26 |
+
# LangSmith (Dataset logging)
|
| 27 |
+
# ----------------------------
|
| 28 |
+
# 你在 HF Space 里需要配置:
|
| 29 |
+
# LANGSMITH_API_KEY=...
|
| 30 |
+
# 可选:
|
| 31 |
+
# LANGSMITH_DATASET_NAME=clare_user_events
|
| 32 |
+
# LANGSMITH_PROJECT=...
|
| 33 |
+
try:
|
| 34 |
+
from langsmith import Client as LangSmithClient # type: ignore
|
| 35 |
+
except Exception:
|
| 36 |
+
LangSmithClient = None # type: ignore
|
| 37 |
+
|
| 38 |
+
LS_DATASET_NAME = os.getenv("LANGSMITH_DATASET_NAME", "clare_user_events").strip()
|
| 39 |
+
LS_PROJECT = os.getenv("LANGSMITH_PROJECT", "").strip()
|
| 40 |
+
|
| 41 |
+
_ls_client = None
|
| 42 |
+
if LangSmithClient is not None and os.getenv("LANGSMITH_API_KEY"):
|
| 43 |
+
try:
|
| 44 |
+
_ls_client = LangSmithClient()
|
| 45 |
+
except Exception as e:
|
| 46 |
+
print(f"[langsmith] init failed: {repr(e)}")
|
| 47 |
+
_ls_client = None
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def log_event_to_langsmith(
|
| 51 |
+
*,
|
| 52 |
+
inputs: Dict[str, Any],
|
| 53 |
+
outputs: Dict[str, Any],
|
| 54 |
+
metadata: Dict[str, Any],
|
| 55 |
+
) -> None:
|
| 56 |
+
"""
|
| 57 |
+
Write a single event as an Example row into LangSmith Dataset.
|
| 58 |
+
This mirrors your old Gradio pattern (dataset作为事件日志).
|
| 59 |
+
"""
|
| 60 |
+
if _ls_client is None:
|
| 61 |
+
return
|
| 62 |
+
try:
|
| 63 |
+
# project 不是必须;dataset 足够你做过滤与分析
|
| 64 |
+
if LS_PROJECT:
|
| 65 |
+
metadata = {**metadata, "langsmith_project": LS_PROJECT}
|
| 66 |
+
|
| 67 |
+
_ls_client.create_example(
|
| 68 |
+
inputs=inputs,
|
| 69 |
+
outputs=outputs,
|
| 70 |
+
metadata=metadata,
|
| 71 |
+
dataset_name=LS_DATASET_NAME,
|
| 72 |
+
)
|
| 73 |
+
except Exception as e:
|
| 74 |
+
print(f"[langsmith] create_example failed: {repr(e)}")
|
| 75 |
+
|
| 76 |
+
|
| 77 |
# ----------------------------
|
| 78 |
# Paths / Constants
|
| 79 |
# ----------------------------
|
|
|
|
| 92 |
# ----------------------------
|
| 93 |
app = FastAPI(title="Clare API")
|
| 94 |
|
|
|
|
| 95 |
app.add_middleware(
|
| 96 |
CORSMiddleware,
|
| 97 |
allow_origins=["*"],
|
|
|
|
| 103 |
# ----------------------------
|
| 104 |
# Static hosting (Vite build)
|
| 105 |
# ----------------------------
|
|
|
|
| 106 |
if os.path.isdir(WEB_ASSETS):
|
| 107 |
app.mount("/assets", StaticFiles(directory=WEB_ASSETS), name="assets")
|
| 108 |
|
|
|
|
| 109 |
if os.path.isdir(WEB_DIST):
|
| 110 |
app.mount("/static", StaticFiles(directory=WEB_DIST), name="static")
|
| 111 |
|
|
|
|
| 148 |
"weaknesses": [],
|
| 149 |
"cognitive_state": {"confusion": 0, "mastery": 0},
|
| 150 |
"course_outline": DEFAULT_COURSE_TOPICS,
|
|
|
|
| 151 |
"rag_chunks": list(MODULE10_CHUNKS_CACHE),
|
| 152 |
"model_name": DEFAULT_MODEL,
|
|
|
|
|
|
|
| 153 |
}
|
| 154 |
return SESSIONS[user_id]
|
| 155 |
|
|
|
|
| 181 |
language_preference: str = "Auto"
|
| 182 |
|
| 183 |
|
| 184 |
+
class FeedbackReq(BaseModel):
|
| 185 |
+
# FE 会发的最小字段
|
| 186 |
+
user_id: str
|
| 187 |
+
rating: str # "helpful" | "not_helpful"
|
| 188 |
+
assistant_message_id: str
|
| 189 |
+
assistant_text: str
|
| 190 |
+
|
| 191 |
+
# 可选:用于更好的分析
|
| 192 |
+
user_text: Optional[str] = None
|
| 193 |
+
comment: Optional[str] = None
|
| 194 |
+
refs: Optional[List[str]] = None
|
| 195 |
+
learning_mode: Optional[str] = None
|
| 196 |
+
doc_type: Optional[str] = None
|
| 197 |
+
timestamp_ms: Optional[float] = None
|
| 198 |
+
|
| 199 |
+
|
| 200 |
# ----------------------------
|
| 201 |
# API Routes
|
| 202 |
# ----------------------------
|
|
|
|
| 237 |
sess["weaknesses"] = update_weaknesses_from_message(msg, sess["weaknesses"])
|
| 238 |
sess["cognitive_state"] = update_cognitive_state_from_message(msg, sess["cognitive_state"])
|
| 239 |
|
|
|
|
| 240 |
rag_context_text, rag_used_chunks = retrieve_relevant_chunks(msg, sess["rag_chunks"])
|
| 241 |
|
| 242 |
start_ts = time.time()
|
|
|
|
| 265 |
for c in (rag_used_chunks or [])
|
| 266 |
]
|
| 267 |
|
| 268 |
+
# 可选:把 chat_turn 也写入 dataset(你以前 Gradio 有)
|
| 269 |
+
try:
|
| 270 |
+
log_event_to_langsmith(
|
| 271 |
+
inputs={
|
| 272 |
+
"question": msg,
|
| 273 |
+
"student_id": user_id,
|
| 274 |
+
},
|
| 275 |
+
outputs={
|
| 276 |
+
"answer": answer,
|
| 277 |
+
},
|
| 278 |
+
metadata={
|
| 279 |
+
"event_type": "chat_turn",
|
| 280 |
+
"timestamp": time.time(),
|
| 281 |
+
"latency_ms": latency_ms,
|
| 282 |
+
"learning_mode": req.learning_mode,
|
| 283 |
+
"language": resolved_lang,
|
| 284 |
+
"doc_type": req.doc_type,
|
| 285 |
+
},
|
| 286 |
+
)
|
| 287 |
+
except Exception:
|
| 288 |
+
pass
|
| 289 |
|
| 290 |
return {
|
| 291 |
"reply": answer,
|
|
|
|
| 313 |
|
| 314 |
sess = _get_session(user_id)
|
| 315 |
|
|
|
|
| 316 |
safe_name = os.path.basename(file.filename).replace("..", "_")
|
| 317 |
tmp_path = os.path.join("/tmp", safe_name)
|
| 318 |
|
|
|
|
| 320 |
with open(tmp_path, "wb") as f:
|
| 321 |
f.write(content)
|
| 322 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 323 |
if doc_type == "Syllabus":
|
| 324 |
class _F:
|
| 325 |
pass
|
|
|
|
| 331 |
except Exception as e:
|
| 332 |
print(f"[upload] syllabus parse error: {repr(e)}")
|
| 333 |
|
|
|
|
| 334 |
try:
|
| 335 |
new_chunks = build_rag_chunks_from_file(tmp_path, doc_type) or []
|
| 336 |
sess["rag_chunks"] = (sess["rag_chunks"] or []) + new_chunks
|
|
|
|
| 342 |
return {"ok": True, "added_chunks": len(new_chunks), "status_md": status_md}
|
| 343 |
|
| 344 |
|
| 345 |
+
@app.post("/api/feedback")
|
| 346 |
+
def feedback(req: FeedbackReq):
|
| 347 |
+
user_id = (req.user_id or "").strip()
|
| 348 |
+
if not user_id:
|
| 349 |
+
return JSONResponse({"ok": False, "error": "Missing user_id"}, status_code=400)
|
| 350 |
+
|
| 351 |
+
rating = (req.rating or "").strip().lower()
|
| 352 |
+
if rating not in ("helpful", "not_helpful"):
|
| 353 |
+
return JSONResponse({"ok": False, "error": "rating must be helpful|not_helpful"}, status_code=400)
|
| 354 |
+
|
| 355 |
+
# 写入 LangSmith dataset(与你 Gradio 时代一致)
|
| 356 |
+
try:
|
| 357 |
+
log_event_to_langsmith(
|
| 358 |
+
inputs={
|
| 359 |
+
"question": req.user_text or "", # 允许为空(只对 assistant reply 点赞)
|
| 360 |
+
"student_id": user_id,
|
| 361 |
+
"assistant_message_id": req.assistant_message_id,
|
| 362 |
+
},
|
| 363 |
+
outputs={
|
| 364 |
+
"answer": req.assistant_text or "",
|
| 365 |
+
},
|
| 366 |
+
metadata={
|
| 367 |
+
"event_type": "feedback",
|
| 368 |
+
"rating": rating,
|
| 369 |
+
"comment": (req.comment or "").strip(),
|
| 370 |
+
"learning_mode": req.learning_mode or "",
|
| 371 |
+
"doc_type": req.doc_type or "",
|
| 372 |
+
"refs": req.refs or [],
|
| 373 |
+
"timestamp_ms": req.timestamp_ms or (time.time() * 1000.0),
|
| 374 |
+
},
|
| 375 |
+
)
|
| 376 |
+
except Exception as e:
|
| 377 |
+
print(f"[feedback] log failed: {repr(e)}")
|
| 378 |
+
|
| 379 |
+
return {"ok": True}
|
| 380 |
+
|
| 381 |
+
|
| 382 |
@app.post("/api/export")
|
| 383 |
def api_export(req: ExportReq):
|
| 384 |
user_id = (req.user_id or "").strip()
|
|
|
|
| 417 |
@app.get("/api/memoryline")
|
| 418 |
def memoryline(user_id: str):
|
| 419 |
_ = _get_session((user_id or "").strip())
|
|
|
|
| 420 |
return {"next_review_label": "T+7", "progress_pct": 0.4}
|
| 421 |
|
| 422 |
|
| 423 |
# ----------------------------
|
| 424 |
+
# SPA Fallback
|
| 425 |
# ----------------------------
|
|
|
|
| 426 |
@app.get("/{full_path:path}")
|
| 427 |
def spa_fallback(full_path: str, request: Request):
|
|
|
|
| 428 |
if (
|
| 429 |
full_path.startswith("api/")
|
| 430 |
or full_path.startswith("assets/")
|