SarahXia0405 commited on
Commit
bed7526
·
verified ·
1 Parent(s): 64004f1

Update api/server.py

Browse files
Files changed (1) hide show
  1. api/server.py +31 -46
api/server.py CHANGED
@@ -42,7 +42,7 @@ WEB_INDEX = os.path.join(WEB_DIST, "index.html")
42
  WEB_ASSETS = os.path.join(WEB_DIST, "assets")
43
 
44
  LS_DATASET_NAME = os.getenv("LS_DATASET_NAME", "clare_user_events").strip()
45
- LS_PROJECT = os.getenv("LANGSMITH_PROJECT", os.getenv("LANGCHAIN_PROJECT", "")).strip() # optional
46
 
47
  EXPERIMENT_ID = os.getenv("CLARE_EXPERIMENT_ID", "RESP_AI_W10").strip()
48
 
@@ -55,13 +55,10 @@ WARMUP_DONE = False
55
  WARMUP_ERROR: Optional[str] = None
56
  WARMUP_STARTED = False
57
 
58
- # warmup knobs
59
  CLARE_ENABLE_WARMUP = os.getenv("CLARE_ENABLE_WARMUP", "1").strip() == "1"
60
  CLARE_WARMUP_BLOCK_READY = os.getenv("CLARE_WARMUP_BLOCK_READY", "0").strip() == "1"
61
 
62
- # langsmith knobs (important for latency)
63
  CLARE_ENABLE_LANGSMITH_LOG = os.getenv("CLARE_ENABLE_LANGSMITH_LOG", "0").strip() == "1"
64
- # If true, logging is done in background thread to avoid blocking /api/chat
65
  CLARE_LANGSMITH_ASYNC = os.getenv("CLARE_LANGSMITH_ASYNC", "1").strip() == "1"
66
 
67
  # ----------------------------
@@ -113,7 +110,6 @@ def _preload_module10_chunks() -> List[Dict[str, Any]]:
113
  return []
114
 
115
 
116
- # Preload at import time (fast path for requests)
117
  MODULE10_CHUNKS_CACHE = _preload_module10_chunks()
118
 
119
 
@@ -133,28 +129,18 @@ def _get_session(user_id: str) -> Dict[str, Any]:
133
 
134
 
135
  # ----------------------------
136
- # Warmup (runs once, background)
137
  # ----------------------------
138
  def _do_warmup_once():
139
- """
140
- Warm OpenAI connection + touch module10 chunks cache.
141
- Best-effort; should never crash the app.
142
- """
143
  global WARMUP_DONE, WARMUP_ERROR, WARMUP_STARTED
144
  if WARMUP_STARTED:
145
  return
146
  WARMUP_STARTED = True
147
 
148
  try:
149
- # Warm OpenAI network / TLS / keep-alive
150
  from api.config import client
151
-
152
- # cheapest call: models.list() (no token usage)
153
  client.models.list()
154
-
155
- # Touch module10 cache (already loaded at import; this is just a safety)
156
  _ = MODULE10_CHUNKS_CACHE
157
-
158
  WARMUP_DONE = True
159
  WARMUP_ERROR = None
160
  except Exception as e:
@@ -174,7 +160,7 @@ def _on_startup():
174
 
175
 
176
  # ----------------------------
177
- # LangSmith helpers (optional; default OFF)
178
  # ----------------------------
179
  _ls_client = None
180
  if (Client is not None) and CLARE_ENABLE_LANGSMITH_LOG:
@@ -186,10 +172,6 @@ if (Client is not None) and CLARE_ENABLE_LANGSMITH_LOG:
186
 
187
 
188
  def _log_event_to_langsmith(data: Dict[str, Any]):
189
- """
190
- Create an Example in LangSmith Dataset.
191
- Best-effort and non-blocking by default (async thread).
192
- """
193
  if _ls_client is None:
194
  return
195
 
@@ -201,6 +183,8 @@ def _log_event_to_langsmith(data: Dict[str, Any]):
201
  "student_name": data.get("student_name", ""),
202
  }
203
  outputs = {"answer": data.get("answer", "")}
 
 
204
  metadata = {k: v for k, v in data.items() if k not in ("question", "answer")}
205
 
206
  if LS_PROJECT:
@@ -222,11 +206,10 @@ def _log_event_to_langsmith(data: Dict[str, Any]):
222
 
223
 
224
  # ----------------------------
225
- # Health endpoints (pure lightweight)
226
  # ----------------------------
227
  @app.get("/health")
228
  def health():
229
- # do not touch LLM/RAG/disk heavy work here
230
  return {
231
  "ok": True,
232
  "uptime_s": round(time.time() - APP_START_TS, 3),
@@ -243,13 +226,10 @@ def health():
243
 
244
  @app.get("/ready")
245
  def ready():
246
- # readiness probe: optionally block until warmup completes
247
  if not CLARE_ENABLE_WARMUP or not CLARE_WARMUP_BLOCK_READY:
248
  return {"ready": True}
249
-
250
  if WARMUP_DONE:
251
  return {"ready": True}
252
-
253
  return JSONResponse({"ready": False, "error": WARMUP_ERROR}, status_code=503)
254
 
255
 
@@ -281,6 +261,10 @@ class SummaryReq(BaseModel):
281
 
282
 
283
  class FeedbackReq(BaseModel):
 
 
 
 
284
  user_id: str
285
  rating: str # "helpful" | "not_helpful"
286
  assistant_message_id: Optional[str] = None
@@ -288,11 +272,12 @@ class FeedbackReq(BaseModel):
288
  assistant_text: str
289
  user_text: Optional[str] = ""
290
 
291
- tags: Optional[List[str]] = [] # ✅ NEW: user selected chips/tags
292
-
293
  comment: Optional[str] = ""
294
 
 
 
295
  refs: Optional[List[str]] = []
 
296
  learning_mode: Optional[str] = None
297
  doc_type: Optional[str] = None
298
  timestamp_ms: Optional[int] = None
@@ -332,33 +317,24 @@ def chat(req: ChatReq):
332
  "latency_ms": 0.0,
333
  }
334
 
335
- # ----------------------------
336
- # Latency breakdown marks (ms)
337
- # ----------------------------
338
  t0 = time.time()
339
  marks_ms: Dict[str, float] = {"start": 0.0}
340
 
341
- # language detect
342
  resolved_lang = detect_language(msg, req.language_preference)
343
  marks_ms["language_detect_done"] = (time.time() - t0) * 1000.0
344
 
345
- # weakness update
346
  sess["weaknesses"] = update_weaknesses_from_message(msg, sess["weaknesses"])
347
  marks_ms["weakness_update_done"] = (time.time() - t0) * 1000.0
348
 
349
- # cognitive update
350
  sess["cognitive_state"] = update_cognitive_state_from_message(msg, sess["cognitive_state"])
351
  marks_ms["cognitive_update_done"] = (time.time() - t0) * 1000.0
352
 
353
- # rag retrieve (optional micro-gate for very short messages)
354
  if len(msg) < 20 and ("?" not in msg):
355
  rag_context_text, rag_used_chunks = "", []
356
  else:
357
  rag_context_text, rag_used_chunks = retrieve_relevant_chunks(msg, sess["rag_chunks"])
358
-
359
  marks_ms["rag_retrieve_done"] = (time.time() - t0) * 1000.0
360
 
361
- # llm
362
  try:
363
  answer, new_history = chat_with_clare(
364
  message=msg,
@@ -379,7 +355,6 @@ def chat(req: ChatReq):
379
  marks_ms["llm_done"] = (time.time() - t0) * 1000.0
380
  total_ms = marks_ms["llm_done"]
381
 
382
- # segments (delta)
383
  ordered = [
384
  "start",
385
  "language_detect_done",
@@ -403,12 +378,10 @@ def chat(req: ChatReq):
403
  for c in (rag_used_chunks or [])
404
  ]
405
 
406
- # extra metadata fields
407
  rag_context_chars = len(rag_context_text or "")
408
  rag_used_chunks_count = len(rag_used_chunks or [])
409
  history_len = len(sess["history"])
410
 
411
- # ✅ log chat_turn to LangSmith (optional; async by default)
412
  _log_event_to_langsmith(
413
  {
414
  "experiment_id": EXPERIMENT_ID,
@@ -514,6 +487,14 @@ def api_feedback(req: FeedbackReq):
514
  if rating not in ("helpful", "not_helpful"):
515
  return JSONResponse({"ok": False, "error": "Invalid rating"}, status_code=400)
516
 
 
 
 
 
 
 
 
 
517
  _log_event_to_langsmith(
518
  {
519
  "experiment_id": EXPERIMENT_ID,
@@ -521,16 +502,20 @@ def api_feedback(req: FeedbackReq):
521
  "student_name": student_name,
522
  "event_type": "feedback",
523
  "timestamp": time.time(),
 
524
  "rating": rating,
525
  "assistant_message_id": req.assistant_message_id,
526
- "question": (req.user_text or "").strip(),
527
- "answer": (req.assistant_text or "").strip(),
528
- "tags": req.tags or [], # NEW
529
- "comment": (req.comment or "").strip(),
530
- "refs": req.refs or [],
 
 
 
 
531
  "learning_mode": req.learning_mode,
532
  "doc_type": req.doc_type,
533
- "timestamp_ms": req.timestamp_ms,
534
  }
535
  )
536
 
 
42
  WEB_ASSETS = os.path.join(WEB_DIST, "assets")
43
 
44
  LS_DATASET_NAME = os.getenv("LS_DATASET_NAME", "clare_user_events").strip()
45
+ LS_PROJECT = os.getenv("LANGSMITH_PROJECT", os.getenv("LANGCHAIN_PROJECT", "")).strip()
46
 
47
  EXPERIMENT_ID = os.getenv("CLARE_EXPERIMENT_ID", "RESP_AI_W10").strip()
48
 
 
55
  WARMUP_ERROR: Optional[str] = None
56
  WARMUP_STARTED = False
57
 
 
58
  CLARE_ENABLE_WARMUP = os.getenv("CLARE_ENABLE_WARMUP", "1").strip() == "1"
59
  CLARE_WARMUP_BLOCK_READY = os.getenv("CLARE_WARMUP_BLOCK_READY", "0").strip() == "1"
60
 
 
61
  CLARE_ENABLE_LANGSMITH_LOG = os.getenv("CLARE_ENABLE_LANGSMITH_LOG", "0").strip() == "1"
 
62
  CLARE_LANGSMITH_ASYNC = os.getenv("CLARE_LANGSMITH_ASYNC", "1").strip() == "1"
63
 
64
  # ----------------------------
 
110
  return []
111
 
112
 
 
113
  MODULE10_CHUNKS_CACHE = _preload_module10_chunks()
114
 
115
 
 
129
 
130
 
131
  # ----------------------------
132
+ # Warmup
133
  # ----------------------------
134
  def _do_warmup_once():
 
 
 
 
135
  global WARMUP_DONE, WARMUP_ERROR, WARMUP_STARTED
136
  if WARMUP_STARTED:
137
  return
138
  WARMUP_STARTED = True
139
 
140
  try:
 
141
  from api.config import client
 
 
142
  client.models.list()
 
 
143
  _ = MODULE10_CHUNKS_CACHE
 
144
  WARMUP_DONE = True
145
  WARMUP_ERROR = None
146
  except Exception as e:
 
160
 
161
 
162
  # ----------------------------
163
+ # LangSmith helpers
164
  # ----------------------------
165
  _ls_client = None
166
  if (Client is not None) and CLARE_ENABLE_LANGSMITH_LOG:
 
172
 
173
 
174
  def _log_event_to_langsmith(data: Dict[str, Any]):
 
 
 
 
175
  if _ls_client is None:
176
  return
177
 
 
183
  "student_name": data.get("student_name", ""),
184
  }
185
  outputs = {"answer": data.get("answer", "")}
186
+
187
+ # keep metadata clean and JSON-serializable
188
  metadata = {k: v for k, v in data.items() if k not in ("question", "answer")}
189
 
190
  if LS_PROJECT:
 
206
 
207
 
208
  # ----------------------------
209
+ # Health endpoints
210
  # ----------------------------
211
  @app.get("/health")
212
  def health():
 
213
  return {
214
  "ok": True,
215
  "uptime_s": round(time.time() - APP_START_TS, 3),
 
226
 
227
  @app.get("/ready")
228
  def ready():
 
229
  if not CLARE_ENABLE_WARMUP or not CLARE_WARMUP_BLOCK_READY:
230
  return {"ready": True}
 
231
  if WARMUP_DONE:
232
  return {"ready": True}
 
233
  return JSONResponse({"ready": False, "error": WARMUP_ERROR}, status_code=503)
234
 
235
 
 
261
 
262
 
263
  class FeedbackReq(BaseModel):
264
+ # IMPORTANT: allow extra fields so FE can evolve without breaking backend
265
+ class Config:
266
+ extra = "ignore"
267
+
268
  user_id: str
269
  rating: str # "helpful" | "not_helpful"
270
  assistant_message_id: Optional[str] = None
 
272
  assistant_text: str
273
  user_text: Optional[str] = ""
274
 
 
 
275
  comment: Optional[str] = ""
276
 
277
+ # optional structured fields
278
+ tags: Optional[List[str]] = []
279
  refs: Optional[List[str]] = []
280
+
281
  learning_mode: Optional[str] = None
282
  doc_type: Optional[str] = None
283
  timestamp_ms: Optional[int] = None
 
317
  "latency_ms": 0.0,
318
  }
319
 
 
 
 
320
  t0 = time.time()
321
  marks_ms: Dict[str, float] = {"start": 0.0}
322
 
 
323
  resolved_lang = detect_language(msg, req.language_preference)
324
  marks_ms["language_detect_done"] = (time.time() - t0) * 1000.0
325
 
 
326
  sess["weaknesses"] = update_weaknesses_from_message(msg, sess["weaknesses"])
327
  marks_ms["weakness_update_done"] = (time.time() - t0) * 1000.0
328
 
 
329
  sess["cognitive_state"] = update_cognitive_state_from_message(msg, sess["cognitive_state"])
330
  marks_ms["cognitive_update_done"] = (time.time() - t0) * 1000.0
331
 
 
332
  if len(msg) < 20 and ("?" not in msg):
333
  rag_context_text, rag_used_chunks = "", []
334
  else:
335
  rag_context_text, rag_used_chunks = retrieve_relevant_chunks(msg, sess["rag_chunks"])
 
336
  marks_ms["rag_retrieve_done"] = (time.time() - t0) * 1000.0
337
 
 
338
  try:
339
  answer, new_history = chat_with_clare(
340
  message=msg,
 
355
  marks_ms["llm_done"] = (time.time() - t0) * 1000.0
356
  total_ms = marks_ms["llm_done"]
357
 
 
358
  ordered = [
359
  "start",
360
  "language_detect_done",
 
378
  for c in (rag_used_chunks or [])
379
  ]
380
 
 
381
  rag_context_chars = len(rag_context_text or "")
382
  rag_used_chunks_count = len(rag_used_chunks or [])
383
  history_len = len(sess["history"])
384
 
 
385
  _log_event_to_langsmith(
386
  {
387
  "experiment_id": EXPERIMENT_ID,
 
487
  if rating not in ("helpful", "not_helpful"):
488
  return JSONResponse({"ok": False, "error": "Invalid rating"}, status_code=400)
489
 
490
+ # normalize fields
491
+ assistant_text = (req.assistant_text or "").strip()
492
+ user_text = (req.user_text or "").strip()
493
+ comment = (req.comment or "").strip()
494
+ refs = req.refs or []
495
+ tags = req.tags or []
496
+ timestamp_ms = int(req.timestamp_ms or int(time.time() * 1000))
497
+
498
  _log_event_to_langsmith(
499
  {
500
  "experiment_id": EXPERIMENT_ID,
 
502
  "student_name": student_name,
503
  "event_type": "feedback",
504
  "timestamp": time.time(),
505
+ "timestamp_ms": timestamp_ms,
506
  "rating": rating,
507
  "assistant_message_id": req.assistant_message_id,
508
+
509
+ # Keep the Example readable:
510
+ "question": user_text, # what user asked (optional)
511
+ "answer": assistant_text, # the assistant response being rated
512
+
513
+ # metadata
514
+ "comment": comment,
515
+ "tags": tags,
516
+ "refs": refs,
517
  "learning_mode": req.learning_mode,
518
  "doc_type": req.doc_type,
 
519
  }
520
  )
521