SarahXia0405 commited on
Commit
3474589
·
verified ·
1 Parent(s): 4936922

Update api/server.py

Browse files
Files changed (1) hide show
  1. api/server.py +128 -37
api/server.py CHANGED
@@ -1,7 +1,7 @@
1
  # api/server.py
2
  import os
3
  import time
4
- from typing import Dict
5
 
6
  from fastapi import FastAPI, UploadFile, File, Form, Request
7
  from fastapi.responses import FileResponse, JSONResponse
@@ -22,6 +22,58 @@ from api.clare_core import (
22
  summarize_conversation,
23
  )
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  # ----------------------------
26
  # Paths / Constants
27
  # ----------------------------
@@ -40,7 +92,6 @@ WEB_ASSETS = os.path.join(WEB_DIST, "assets")
40
  # ----------------------------
41
  app = FastAPI(title="Clare API")
42
 
43
- # Same-origin for Docker Space doesn't need CORS, but leaving it open helps if you later split FE/BE.
44
  app.add_middleware(
45
  CORSMiddleware,
46
  allow_origins=["*"],
@@ -52,11 +103,9 @@ app.add_middleware(
52
  # ----------------------------
53
  # Static hosting (Vite build)
54
  # ----------------------------
55
- # Mount /assets so <script src="/assets/..."> works.
56
  if os.path.isdir(WEB_ASSETS):
57
  app.mount("/assets", StaticFiles(directory=WEB_ASSETS), name="assets")
58
 
59
- # Optional: serve other static files in build root (e.g., favicon) under /static
60
  if os.path.isdir(WEB_DIST):
61
  app.mount("/static", StaticFiles(directory=WEB_DIST), name="static")
62
 
@@ -99,11 +148,8 @@ def _get_session(user_id: str) -> Dict:
99
  "weaknesses": [],
100
  "cognitive_state": {"confusion": 0, "mastery": 0},
101
  "course_outline": DEFAULT_COURSE_TOPICS,
102
- # preload base reading
103
  "rag_chunks": list(MODULE10_CHUNKS_CACHE),
104
  "model_name": DEFAULT_MODEL,
105
- # ✅ NEW: track last syllabus filename for refs fallback
106
- "last_syllabus_file": None,
107
  }
108
  return SESSIONS[user_id]
109
 
@@ -135,6 +181,22 @@ class SummaryReq(BaseModel):
135
  language_preference: str = "Auto"
136
 
137
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  # ----------------------------
139
  # API Routes
140
  # ----------------------------
@@ -175,7 +237,6 @@ def chat(req: ChatReq):
175
  sess["weaknesses"] = update_weaknesses_from_message(msg, sess["weaknesses"])
176
  sess["cognitive_state"] = update_cognitive_state_from_message(msg, sess["cognitive_state"])
177
 
178
- # RAG
179
  rag_context_text, rag_used_chunks = retrieve_relevant_chunks(msg, sess["rag_chunks"])
180
 
181
  start_ts = time.time()
@@ -204,24 +265,27 @@ def chat(req: ChatReq):
204
  for c in (rag_used_chunks or [])
205
  ]
206
 
207
- # NEW: Syllabus chat should show syllabus in refs (fallback if retrieval didn't hit it)
208
- doc_type_norm = (req.doc_type or "").strip().lower()
209
- if doc_type_norm == "syllabus":
210
- syllabus_name = sess.get("last_syllabus_file") or "uploaded_syllabus"
211
- # We consider it a hit if any ref source_file contains the syllabus filename
212
- def _has_syllabus_ref() -> bool:
213
- for r in refs:
214
- sf = (r.get("source_file") or "").strip()
215
- if not sf:
216
- continue
217
- # robust matching: exact basename containment OR endswith
218
- if syllabus_name in sf or sf.endswith(syllabus_name):
219
- return True
220
- return False
221
-
222
- if not _has_syllabus_ref():
223
- # put syllabus at the front, preserve existing refs after
224
- refs = [{"source_file": syllabus_name, "section": "syllabus_outline"}] + refs
 
 
 
225
 
226
  return {
227
  "reply": answer,
@@ -249,7 +313,6 @@ async def upload(
249
 
250
  sess = _get_session(user_id)
251
 
252
- # Save to /tmp (sanitize filename)
253
  safe_name = os.path.basename(file.filename).replace("..", "_")
254
  tmp_path = os.path.join("/tmp", safe_name)
255
 
@@ -257,11 +320,6 @@ async def upload(
257
  with open(tmp_path, "wb") as f:
258
  f.write(content)
259
 
260
- # ✅ NEW: remember the latest syllabus filename for refs fallback
261
- if doc_type == "Syllabus":
262
- sess["last_syllabus_file"] = os.path.basename(file.filename) or safe_name
263
-
264
- # Update topics only for syllabus
265
  if doc_type == "Syllabus":
266
  class _F:
267
  pass
@@ -273,7 +331,6 @@ async def upload(
273
  except Exception as e:
274
  print(f"[upload] syllabus parse error: {repr(e)}")
275
 
276
- # Update rag chunks for any doc
277
  try:
278
  new_chunks = build_rag_chunks_from_file(tmp_path, doc_type) or []
279
  sess["rag_chunks"] = (sess["rag_chunks"] or []) + new_chunks
@@ -285,6 +342,43 @@ async def upload(
285
  return {"ok": True, "added_chunks": len(new_chunks), "status_md": status_md}
286
 
287
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
  @app.post("/api/export")
289
  def api_export(req: ExportReq):
290
  user_id = (req.user_id or "").strip()
@@ -323,17 +417,14 @@ def api_summary(req: SummaryReq):
323
  @app.get("/api/memoryline")
324
  def memoryline(user_id: str):
325
  _ = _get_session((user_id or "").strip())
326
- # v1: 写死也没问题;前端只渲染
327
  return {"next_review_label": "T+7", "progress_pct": 0.4}
328
 
329
 
330
  # ----------------------------
331
- # SPA Fallback (important!)
332
  # ----------------------------
333
- # If user refreshes /some/route, FE router needs index.html.
334
  @app.get("/{full_path:path}")
335
  def spa_fallback(full_path: str, request: Request):
336
- # Do not hijack API/static paths
337
  if (
338
  full_path.startswith("api/")
339
  or full_path.startswith("assets/")
 
1
  # api/server.py
2
  import os
3
  import time
4
+ from typing import Dict, Any, Optional, List
5
 
6
  from fastapi import FastAPI, UploadFile, File, Form, Request
7
  from fastapi.responses import FileResponse, JSONResponse
 
22
  summarize_conversation,
23
  )
24
 
25
+ # ----------------------------
26
+ # LangSmith (Dataset logging)
27
+ # ----------------------------
28
+ # 你在 HF Space 里需要配置:
29
+ # LANGSMITH_API_KEY=...
30
+ # 可选:
31
+ # LANGSMITH_DATASET_NAME=clare_user_events
32
+ # LANGSMITH_PROJECT=...
33
+ try:
34
+ from langsmith import Client as LangSmithClient # type: ignore
35
+ except Exception:
36
+ LangSmithClient = None # type: ignore
37
+
38
+ LS_DATASET_NAME = os.getenv("LANGSMITH_DATASET_NAME", "clare_user_events").strip()
39
+ LS_PROJECT = os.getenv("LANGSMITH_PROJECT", "").strip()
40
+
41
+ _ls_client = None
42
+ if LangSmithClient is not None and os.getenv("LANGSMITH_API_KEY"):
43
+ try:
44
+ _ls_client = LangSmithClient()
45
+ except Exception as e:
46
+ print(f"[langsmith] init failed: {repr(e)}")
47
+ _ls_client = None
48
+
49
+
50
+ def log_event_to_langsmith(
51
+ *,
52
+ inputs: Dict[str, Any],
53
+ outputs: Dict[str, Any],
54
+ metadata: Dict[str, Any],
55
+ ) -> None:
56
+ """
57
+ Write a single event as an Example row into LangSmith Dataset.
58
+ This mirrors your old Gradio pattern (dataset作为事件日志).
59
+ """
60
+ if _ls_client is None:
61
+ return
62
+ try:
63
+ # project 不是必须;dataset 足够你做过滤与分析
64
+ if LS_PROJECT:
65
+ metadata = {**metadata, "langsmith_project": LS_PROJECT}
66
+
67
+ _ls_client.create_example(
68
+ inputs=inputs,
69
+ outputs=outputs,
70
+ metadata=metadata,
71
+ dataset_name=LS_DATASET_NAME,
72
+ )
73
+ except Exception as e:
74
+ print(f"[langsmith] create_example failed: {repr(e)}")
75
+
76
+
77
  # ----------------------------
78
  # Paths / Constants
79
  # ----------------------------
 
92
  # ----------------------------
93
  app = FastAPI(title="Clare API")
94
 
 
95
  app.add_middleware(
96
  CORSMiddleware,
97
  allow_origins=["*"],
 
103
  # ----------------------------
104
  # Static hosting (Vite build)
105
  # ----------------------------
 
106
  if os.path.isdir(WEB_ASSETS):
107
  app.mount("/assets", StaticFiles(directory=WEB_ASSETS), name="assets")
108
 
 
109
  if os.path.isdir(WEB_DIST):
110
  app.mount("/static", StaticFiles(directory=WEB_DIST), name="static")
111
 
 
148
  "weaknesses": [],
149
  "cognitive_state": {"confusion": 0, "mastery": 0},
150
  "course_outline": DEFAULT_COURSE_TOPICS,
 
151
  "rag_chunks": list(MODULE10_CHUNKS_CACHE),
152
  "model_name": DEFAULT_MODEL,
 
 
153
  }
154
  return SESSIONS[user_id]
155
 
 
181
  language_preference: str = "Auto"
182
 
183
 
184
+ class FeedbackReq(BaseModel):
185
+ # FE 会发的最小字段
186
+ user_id: str
187
+ rating: str # "helpful" | "not_helpful"
188
+ assistant_message_id: str
189
+ assistant_text: str
190
+
191
+ # 可选:用于更好的分析
192
+ user_text: Optional[str] = None
193
+ comment: Optional[str] = None
194
+ refs: Optional[List[str]] = None
195
+ learning_mode: Optional[str] = None
196
+ doc_type: Optional[str] = None
197
+ timestamp_ms: Optional[float] = None
198
+
199
+
200
  # ----------------------------
201
  # API Routes
202
  # ----------------------------
 
237
  sess["weaknesses"] = update_weaknesses_from_message(msg, sess["weaknesses"])
238
  sess["cognitive_state"] = update_cognitive_state_from_message(msg, sess["cognitive_state"])
239
 
 
240
  rag_context_text, rag_used_chunks = retrieve_relevant_chunks(msg, sess["rag_chunks"])
241
 
242
  start_ts = time.time()
 
265
  for c in (rag_used_chunks or [])
266
  ]
267
 
268
+ # 可选:把 chat_turn 也写入 dataset(你以前 Gradio 有)
269
+ try:
270
+ log_event_to_langsmith(
271
+ inputs={
272
+ "question": msg,
273
+ "student_id": user_id,
274
+ },
275
+ outputs={
276
+ "answer": answer,
277
+ },
278
+ metadata={
279
+ "event_type": "chat_turn",
280
+ "timestamp": time.time(),
281
+ "latency_ms": latency_ms,
282
+ "learning_mode": req.learning_mode,
283
+ "language": resolved_lang,
284
+ "doc_type": req.doc_type,
285
+ },
286
+ )
287
+ except Exception:
288
+ pass
289
 
290
  return {
291
  "reply": answer,
 
313
 
314
  sess = _get_session(user_id)
315
 
 
316
  safe_name = os.path.basename(file.filename).replace("..", "_")
317
  tmp_path = os.path.join("/tmp", safe_name)
318
 
 
320
  with open(tmp_path, "wb") as f:
321
  f.write(content)
322
 
 
 
 
 
 
323
  if doc_type == "Syllabus":
324
  class _F:
325
  pass
 
331
  except Exception as e:
332
  print(f"[upload] syllabus parse error: {repr(e)}")
333
 
 
334
  try:
335
  new_chunks = build_rag_chunks_from_file(tmp_path, doc_type) or []
336
  sess["rag_chunks"] = (sess["rag_chunks"] or []) + new_chunks
 
342
  return {"ok": True, "added_chunks": len(new_chunks), "status_md": status_md}
343
 
344
 
345
+ @app.post("/api/feedback")
346
+ def feedback(req: FeedbackReq):
347
+ user_id = (req.user_id or "").strip()
348
+ if not user_id:
349
+ return JSONResponse({"ok": False, "error": "Missing user_id"}, status_code=400)
350
+
351
+ rating = (req.rating or "").strip().lower()
352
+ if rating not in ("helpful", "not_helpful"):
353
+ return JSONResponse({"ok": False, "error": "rating must be helpful|not_helpful"}, status_code=400)
354
+
355
+ # 写入 LangSmith dataset(与你 Gradio 时代一致)
356
+ try:
357
+ log_event_to_langsmith(
358
+ inputs={
359
+ "question": req.user_text or "", # 允许为空(只对 assistant reply 点赞)
360
+ "student_id": user_id,
361
+ "assistant_message_id": req.assistant_message_id,
362
+ },
363
+ outputs={
364
+ "answer": req.assistant_text or "",
365
+ },
366
+ metadata={
367
+ "event_type": "feedback",
368
+ "rating": rating,
369
+ "comment": (req.comment or "").strip(),
370
+ "learning_mode": req.learning_mode or "",
371
+ "doc_type": req.doc_type or "",
372
+ "refs": req.refs or [],
373
+ "timestamp_ms": req.timestamp_ms or (time.time() * 1000.0),
374
+ },
375
+ )
376
+ except Exception as e:
377
+ print(f"[feedback] log failed: {repr(e)}")
378
+
379
+ return {"ok": True}
380
+
381
+
382
  @app.post("/api/export")
383
  def api_export(req: ExportReq):
384
  user_id = (req.user_id or "").strip()
 
417
  @app.get("/api/memoryline")
418
  def memoryline(user_id: str):
419
  _ = _get_session((user_id or "").strip())
 
420
  return {"next_review_label": "T+7", "progress_pct": 0.4}
421
 
422
 
423
  # ----------------------------
424
+ # SPA Fallback
425
  # ----------------------------
 
426
  @app.get("/{full_path:path}")
427
  def spa_fallback(full_path: str, request: Request):
 
428
  if (
429
  full_path.startswith("api/")
430
  or full_path.startswith("assets/")