SarahXia0405 commited on
Commit
adf4467
·
verified ·
1 Parent(s): 072270b

Update api/server.py

Browse files
Files changed (1) hide show
  1. api/server.py +44 -110
api/server.py CHANGED
@@ -1,8 +1,8 @@
1
  # api/server.py
2
  import os
3
  import time
4
- import threading
5
- from typing import Dict, List, Optional, Any
6
 
7
  from fastapi import FastAPI, UploadFile, File, Form, Request
8
  from fastapi.responses import FileResponse, JSONResponse
@@ -23,15 +23,12 @@ from api.clare_core import (
23
  summarize_conversation,
24
  )
25
 
26
- # ✅ LangSmith
27
  try:
28
  from langsmith import Client
29
  except Exception:
30
  Client = None
31
 
32
- # ----------------------------
33
- # Paths / Constants
34
- # ----------------------------
35
  API_DIR = os.path.dirname(__file__)
36
 
37
  MODULE10_PATH = os.path.join(API_DIR, "module10_responsible_ai.pdf")
@@ -44,13 +41,6 @@ WEB_ASSETS = os.path.join(WEB_DIST, "assets")
44
  LS_DATASET_NAME = os.getenv("LS_DATASET_NAME", "clare_user_events").strip()
45
  LS_PROJECT = os.getenv("LANGSMITH_PROJECT", os.getenv("LANGCHAIN_PROJECT", "")).strip()
46
 
47
- # ✅ Profiling / Debug controls
48
- RETURN_LATENCY_BREAKDOWN = os.getenv("RETURN_LATENCY_BREAKDOWN", "1").strip() == "1"
49
- ASYNC_LANGSMITH_LOGGING = os.getenv("ASYNC_LANGSMITH_LOGGING", "1").strip() == "1"
50
-
51
- # ----------------------------
52
- # App
53
- # ----------------------------
54
  app = FastAPI(title="Clare API")
55
 
56
  app.add_middleware(
@@ -61,9 +51,6 @@ app.add_middleware(
61
  allow_headers=["*"],
62
  )
63
 
64
- # ----------------------------
65
- # Static hosting (Vite build)
66
- # ----------------------------
67
  if os.path.isdir(WEB_ASSETS):
68
  app.mount("/assets", StaticFiles(directory=WEB_ASSETS), name="assets")
69
 
@@ -81,9 +68,6 @@ def index():
81
  )
82
 
83
 
84
- # ----------------------------
85
- # In-memory session store (MVP)
86
- # ----------------------------
87
  SESSIONS: Dict[str, Dict] = {}
88
 
89
 
@@ -115,34 +99,6 @@ def _get_session(user_id: str) -> Dict:
115
  return SESSIONS[user_id]
116
 
117
 
118
- # ----------------------------
119
- # Profiling helpers
120
- # ----------------------------
121
- def _now_ms() -> float:
122
- return time.perf_counter() * 1000.0
123
-
124
-
125
- class _Perf:
126
- def __init__(self):
127
- self.t0 = _now_ms()
128
- self.marks: Dict[str, float] = {}
129
-
130
- def mark(self, name: str):
131
- self.marks[name] = _now_ms() - self.t0
132
-
133
- def summary(self) -> Dict[str, Any]:
134
- keys = list(self.marks.keys())
135
- prev = 0.0
136
- seg = {}
137
- for k in keys:
138
- seg[k] = self.marks[k] - prev
139
- prev = self.marks[k]
140
- return {"marks_ms": dict(self.marks), "segments_ms": seg, "total_ms": prev}
141
-
142
-
143
- # ----------------------------
144
- # LangSmith helpers
145
- # ----------------------------
146
  _ls_client = None
147
  if Client is not None:
148
  try:
@@ -152,12 +108,7 @@ if Client is not None:
152
  _ls_client = None
153
 
154
 
155
- def _log_event_to_langsmith_sync(data: Dict):
156
- """
157
- Create an Example in LangSmith Dataset.
158
- Inputs/Outputs show up as "Inputs" / "Reference Outputs".
159
- Everything else goes into metadata columns.
160
- """
161
  if _ls_client is None:
162
  return
163
 
@@ -183,25 +134,6 @@ def _log_event_to_langsmith_sync(data: Dict):
183
  print("[langsmith] log failed:", repr(e))
184
 
185
 
186
- def _log_event_to_langsmith(data: Dict):
187
- # ✅ Async mode: do not block API latency on network I/O
188
- if not ASYNC_LANGSMITH_LOGGING:
189
- _log_event_to_langsmith_sync(data)
190
- return
191
-
192
- if _ls_client is None:
193
- return
194
-
195
- def _runner():
196
- _log_event_to_langsmith_sync(data)
197
-
198
- t = threading.Thread(target=_runner, daemon=True)
199
- t.start()
200
-
201
-
202
- # ----------------------------
203
- # Schemas
204
- # ----------------------------
205
  class LoginReq(BaseModel):
206
  name: str
207
  user_id: str
@@ -228,23 +160,17 @@ class SummaryReq(BaseModel):
228
 
229
  class FeedbackReq(BaseModel):
230
  user_id: str
231
- rating: str # "helpful" | "not_helpful"
232
  assistant_message_id: Optional[str] = None
233
-
234
  assistant_text: str
235
  user_text: Optional[str] = ""
236
-
237
  comment: Optional[str] = ""
238
-
239
  refs: Optional[List[str]] = []
240
  learning_mode: Optional[str] = None
241
  doc_type: Optional[str] = None
242
  timestamp_ms: Optional[int] = None
243
 
244
 
245
- # ----------------------------
246
- # API Routes
247
- # ----------------------------
248
  @app.post("/api/login")
249
  def login(req: LoginReq):
250
  user_id = (req.user_id or "").strip()
@@ -259,8 +185,8 @@ def login(req: LoginReq):
259
 
260
  @app.post("/api/chat")
261
  def chat(req: ChatReq):
262
- perf = _Perf()
263
- perf.mark("start")
264
 
265
  user_id = (req.user_id or "").strip()
266
  msg = (req.message or "").strip()
@@ -277,24 +203,25 @@ def chat(req: ChatReq):
277
  ),
278
  "refs": [],
279
  "latency_ms": 0.0,
 
280
  }
281
 
282
- # 1) language detect
283
  resolved_lang = detect_language(msg, req.language_preference)
284
- perf.mark("language_detect_done")
285
 
286
- # 2) state updates (cheap)
287
  sess["weaknesses"] = update_weaknesses_from_message(msg, sess["weaknesses"])
288
- perf.mark("weakness_update_done")
289
 
290
  sess["cognitive_state"] = update_cognitive_state_from_message(msg, sess["cognitive_state"])
291
- perf.mark("cognitive_update_done")
292
 
293
- # 3) RAG retrieval (can be heavy if chunks huge)
294
  rag_context_text, rag_used_chunks = retrieve_relevant_chunks(msg, sess["rag_chunks"])
295
- perf.mark("rag_retrieve_done")
 
 
 
 
296
 
297
- # 4) LLM
298
  try:
299
  answer, new_history = chat_with_clare(
300
  message=msg,
@@ -312,8 +239,9 @@ def chat(req: ChatReq):
312
  print(f"[chat] error: {repr(e)}")
313
  return JSONResponse({"error": f"chat failed: {repr(e)}"}, status_code=500)
314
 
315
- perf.mark("llm_done")
316
 
 
317
  sess["history"] = new_history
318
 
319
  refs = [
@@ -321,10 +249,22 @@ def chat(req: ChatReq):
321
  for c in (rag_used_chunks or [])
322
  ]
323
 
324
- breakdown = perf.summary()
325
- latency_ms_total = float(breakdown["total_ms"])
 
 
 
 
 
 
 
 
 
 
 
 
 
326
 
327
- # 5) LangSmith log (async by default)
328
  _log_event_to_langsmith(
329
  {
330
  "experiment_id": "RESP_AI_W10",
@@ -332,9 +272,8 @@ def chat(req: ChatReq):
332
  "student_name": sess.get("name", ""),
333
  "event_type": "chat_turn",
334
  "timestamp": time.time(),
335
- "latency_ms": latency_ms_total,
336
- "latency_breakdown": breakdown, # ✅ now visible in metadata
337
-
338
  "question": msg,
339
  "answer": answer,
340
  "model_name": sess["model_name"],
@@ -342,26 +281,24 @@ def chat(req: ChatReq):
342
  "learning_mode": req.learning_mode,
343
  "doc_type": req.doc_type,
344
  "refs": refs,
345
-
346
- # ✅ helpful debug dimensions
347
- "rag_used_chunks_count": len(rag_used_chunks or []),
348
- "rag_context_chars": len(rag_context_text or ""),
349
- "history_len": len(sess.get("history") or []),
350
  }
351
  )
352
- perf.mark("langsmith_log_scheduled")
353
 
354
- resp = {
355
  "reply": answer,
356
  "session_status_md": render_session_status(
357
  req.learning_mode, sess["weaknesses"], sess["cognitive_state"]
358
  ),
359
  "refs": refs,
360
- "latency_ms": latency_ms_total,
 
 
 
 
361
  }
362
- if RETURN_LATENCY_BREAKDOWN:
363
- resp["latency_breakdown"] = breakdown
364
- return resp
365
 
366
 
367
  @app.post("/api/upload")
@@ -500,9 +437,6 @@ def memoryline(user_id: str):
500
  return {"next_review_label": "T+7", "progress_pct": 0.4}
501
 
502
 
503
- # ----------------------------
504
- # SPA Fallback
505
- # ----------------------------
506
  @app.get("/{full_path:path}")
507
  def spa_fallback(full_path: str, request: Request):
508
  if (
 
1
  # api/server.py
2
  import os
3
  import time
4
+ import json
5
+ from typing import Dict, List, Optional
6
 
7
  from fastapi import FastAPI, UploadFile, File, Form, Request
8
  from fastapi.responses import FileResponse, JSONResponse
 
23
  summarize_conversation,
24
  )
25
 
 
26
  try:
27
  from langsmith import Client
28
  except Exception:
29
  Client = None
30
 
31
+
 
 
32
  API_DIR = os.path.dirname(__file__)
33
 
34
  MODULE10_PATH = os.path.join(API_DIR, "module10_responsible_ai.pdf")
 
41
  LS_DATASET_NAME = os.getenv("LS_DATASET_NAME", "clare_user_events").strip()
42
  LS_PROJECT = os.getenv("LANGSMITH_PROJECT", os.getenv("LANGCHAIN_PROJECT", "")).strip()
43
 
 
 
 
 
 
 
 
44
  app = FastAPI(title="Clare API")
45
 
46
  app.add_middleware(
 
51
  allow_headers=["*"],
52
  )
53
 
 
 
 
54
  if os.path.isdir(WEB_ASSETS):
55
  app.mount("/assets", StaticFiles(directory=WEB_ASSETS), name="assets")
56
 
 
68
  )
69
 
70
 
 
 
 
71
  SESSIONS: Dict[str, Dict] = {}
72
 
73
 
 
99
  return SESSIONS[user_id]
100
 
101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  _ls_client = None
103
  if Client is not None:
104
  try:
 
108
  _ls_client = None
109
 
110
 
111
+ def _log_event_to_langsmith(data: Dict):
 
 
 
 
 
112
  if _ls_client is None:
113
  return
114
 
 
134
  print("[langsmith] log failed:", repr(e))
135
 
136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  class LoginReq(BaseModel):
138
  name: str
139
  user_id: str
 
160
 
161
  class FeedbackReq(BaseModel):
162
  user_id: str
163
+ rating: str
164
  assistant_message_id: Optional[str] = None
 
165
  assistant_text: str
166
  user_text: Optional[str] = ""
 
167
  comment: Optional[str] = ""
 
168
  refs: Optional[List[str]] = []
169
  learning_mode: Optional[str] = None
170
  doc_type: Optional[str] = None
171
  timestamp_ms: Optional[int] = None
172
 
173
 
 
 
 
174
  @app.post("/api/login")
175
  def login(req: LoginReq):
176
  user_id = (req.user_id or "").strip()
 
185
 
186
  @app.post("/api/chat")
187
  def chat(req: ChatReq):
188
+ t0 = time.time()
189
+ marks = {"start": 0.0}
190
 
191
  user_id = (req.user_id or "").strip()
192
  msg = (req.message or "").strip()
 
203
  ),
204
  "refs": [],
205
  "latency_ms": 0.0,
206
+ "latency_breakdown": {"total_ms": 0.0, "marks_ms": {}, "segments_ms": {}},
207
  }
208
 
 
209
  resolved_lang = detect_language(msg, req.language_preference)
210
+ marks["language_detect_done"] = (time.time() - t0) * 1000.0
211
 
 
212
  sess["weaknesses"] = update_weaknesses_from_message(msg, sess["weaknesses"])
213
+ marks["weakness_update_done"] = (time.time() - t0) * 1000.0
214
 
215
  sess["cognitive_state"] = update_cognitive_state_from_message(msg, sess["cognitive_state"])
216
+ marks["cognitive_update_done"] = (time.time() - t0) * 1000.0
217
 
 
218
  rag_context_text, rag_used_chunks = retrieve_relevant_chunks(msg, sess["rag_chunks"])
219
+ marks["rag_retrieve_done"] = (time.time() - t0) * 1000.0
220
+
221
+ rag_context_chars = len(rag_context_text or "")
222
+ used_chunks_count = len(rag_used_chunks or [])
223
+ history_len = len(sess.get("history") or [])
224
 
 
225
  try:
226
  answer, new_history = chat_with_clare(
227
  message=msg,
 
239
  print(f"[chat] error: {repr(e)}")
240
  return JSONResponse({"error": f"chat failed: {repr(e)}"}, status_code=500)
241
 
242
+ marks["llm_done"] = (time.time() - t0) * 1000.0
243
 
244
+ total_ms = (time.time() - t0) * 1000.0
245
  sess["history"] = new_history
246
 
247
  refs = [
 
249
  for c in (rag_used_chunks or [])
250
  ]
251
 
252
+ # segments_ms:给你一眼看“每段耗时”
253
+ marks_ms = dict(marks)
254
+ segments_ms = {}
255
+ order = ["start", "language_detect_done", "weakness_update_done", "cognitive_update_done", "rag_retrieve_done", "llm_done"]
256
+ prev = 0.0
257
+ for k in order[1:]:
258
+ cur = marks_ms.get(k, prev)
259
+ segments_ms[k] = max(0.0, cur - prev)
260
+ prev = cur
261
+
262
+ latency_breakdown = {
263
+ "marks_ms": marks_ms,
264
+ "segments_ms": segments_ms,
265
+ "total_ms": total_ms,
266
+ }
267
 
 
268
  _log_event_to_langsmith(
269
  {
270
  "experiment_id": "RESP_AI_W10",
 
272
  "student_name": sess.get("name", ""),
273
  "event_type": "chat_turn",
274
  "timestamp": time.time(),
275
+ "latency_ms": total_ms,
276
+ "latency_breakdown": json.dumps(latency_breakdown, ensure_ascii=False),
 
277
  "question": msg,
278
  "answer": answer,
279
  "model_name": sess["model_name"],
 
281
  "learning_mode": req.learning_mode,
282
  "doc_type": req.doc_type,
283
  "refs": refs,
284
+ "history_len": history_len,
285
+ "rag_context_chars": rag_context_chars,
286
+ "rag_used_chunks_count": used_chunks_count,
 
 
287
  }
288
  )
 
289
 
290
+ return {
291
  "reply": answer,
292
  "session_status_md": render_session_status(
293
  req.learning_mode, sess["weaknesses"], sess["cognitive_state"]
294
  ),
295
  "refs": refs,
296
+ "latency_ms": total_ms,
297
+ "latency_breakdown": latency_breakdown,
298
+ "rag_context_chars": rag_context_chars,
299
+ "rag_used_chunks_count": used_chunks_count,
300
+ "history_len": history_len,
301
  }
 
 
 
302
 
303
 
304
  @app.post("/api/upload")
 
437
  return {"next_review_label": "T+7", "progress_pct": 0.4}
438
 
439
 
 
 
 
440
  @app.get("/{full_path:path}")
441
  def spa_fallback(full_path: str, request: Request):
442
  if (