SarahXia0405 commited on
Commit
67873f5
·
verified ·
1 Parent(s): ddd286e

Update api/server.py

Browse files
Files changed (1) hide show
  1. api/server.py +137 -25
api/server.py CHANGED
@@ -1,6 +1,7 @@
1
  # api/server.py
2
  import os
3
  import time
 
4
  from typing import Dict, List, Optional, Any, Tuple
5
 
6
  from fastapi import FastAPI, UploadFile, File, Form, Request
@@ -22,7 +23,7 @@ from api.clare_core import (
22
  summarize_conversation,
23
  )
24
 
25
- # ✅ LangSmith
26
  try:
27
  from langsmith import Client
28
  except Exception:
@@ -45,6 +46,24 @@ LS_PROJECT = os.getenv("LANGSMITH_PROJECT", os.getenv("LANGCHAIN_PROJECT", "")).
45
 
46
  EXPERIMENT_ID = os.getenv("CLARE_EXPERIMENT_ID", "RESP_AI_W10").strip()
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  # ----------------------------
49
  # App
50
  # ----------------------------
@@ -94,6 +113,7 @@ def _preload_module10_chunks() -> List[Dict[str, Any]]:
94
  return []
95
 
96
 
 
97
  MODULE10_CHUNKS_CACHE = _preload_module10_chunks()
98
 
99
 
@@ -113,10 +133,51 @@ def _get_session(user_id: str) -> Dict[str, Any]:
113
 
114
 
115
  # ----------------------------
116
- # LangSmith helpers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  # ----------------------------
118
  _ls_client = None
119
- if Client is not None:
120
  try:
121
  _ls_client = Client()
122
  except Exception as e:
@@ -127,29 +188,69 @@ if Client is not None:
127
  def _log_event_to_langsmith(data: Dict[str, Any]):
128
  """
129
  Create an Example in LangSmith Dataset.
 
130
  """
131
  if _ls_client is None:
132
  return
133
- try:
134
- inputs = {
135
- "question": data.get("question", ""),
136
- "student_id": data.get("student_id", ""),
137
- "student_name": data.get("student_name", ""),
138
- }
139
- outputs = {"answer": data.get("answer", "")}
140
- metadata = {k: v for k, v in data.items() if k not in ("question", "answer")}
141
 
142
- if LS_PROJECT:
143
- metadata.setdefault("langsmith_project", LS_PROJECT)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
 
145
- _ls_client.create_example(
146
- inputs=inputs,
147
- outputs=outputs,
148
- metadata=metadata,
149
- dataset_name=LS_DATASET_NAME,
150
- )
151
- except Exception as e:
152
- print("[langsmith] log failed:", repr(e))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
 
154
 
155
  # ----------------------------
@@ -247,8 +348,12 @@ def chat(req: ChatReq):
247
  sess["cognitive_state"] = update_cognitive_state_from_message(msg, sess["cognitive_state"])
248
  marks_ms["cognitive_update_done"] = (time.time() - t0) * 1000.0
249
 
250
- # rag retrieve
251
- rag_context_text, rag_used_chunks = retrieve_relevant_chunks(msg, sess["rag_chunks"])
 
 
 
 
252
  marks_ms["rag_retrieve_done"] = (time.time() - t0) * 1000.0
253
 
254
  # llm
@@ -273,7 +378,14 @@ def chat(req: ChatReq):
273
  total_ms = marks_ms["llm_done"]
274
 
275
  # segments (delta)
276
- ordered = ["start", "language_detect_done", "weakness_update_done", "cognitive_update_done", "rag_retrieve_done", "llm_done"]
 
 
 
 
 
 
 
277
  segments_ms: Dict[str, float] = {}
278
  for i in range(1, len(ordered)):
279
  a = ordered[i - 1]
@@ -294,7 +406,7 @@ def chat(req: ChatReq):
294
  rag_used_chunks_count = len(rag_used_chunks or [])
295
  history_len = len(sess["history"])
296
 
297
- # ✅ log chat_turn to LangSmith
298
  _log_event_to_langsmith(
299
  {
300
  "experiment_id": EXPERIMENT_ID,
 
1
  # api/server.py
2
  import os
3
  import time
4
+ import threading
5
  from typing import Dict, List, Optional, Any, Tuple
6
 
7
  from fastapi import FastAPI, UploadFile, File, Form, Request
 
23
  summarize_conversation,
24
  )
25
 
26
+ # ✅ LangSmith (optional)
27
  try:
28
  from langsmith import Client
29
  except Exception:
 
46
 
47
  EXPERIMENT_ID = os.getenv("CLARE_EXPERIMENT_ID", "RESP_AI_W10").strip()
48
 
49
+ # ----------------------------
50
+ # Health / Warmup (cold start mitigation)
51
+ # ----------------------------
52
+ APP_START_TS = time.time()
53
+
54
+ WARMUP_DONE = False
55
+ WARMUP_ERROR: Optional[str] = None
56
+ WARMUP_STARTED = False
57
+
58
+ # warmup knobs
59
+ CLARE_ENABLE_WARMUP = os.getenv("CLARE_ENABLE_WARMUP", "1").strip() == "1"
60
+ CLARE_WARMUP_BLOCK_READY = os.getenv("CLARE_WARMUP_BLOCK_READY", "0").strip() == "1"
61
+
62
+ # langsmith knobs (important for latency)
63
+ CLARE_ENABLE_LANGSMITH_LOG = os.getenv("CLARE_ENABLE_LANGSMITH_LOG", "0").strip() == "1"
64
+ # If true, logging is done in background thread to avoid blocking /api/chat
65
+ CLARE_LANGSMITH_ASYNC = os.getenv("CLARE_LANGSMITH_ASYNC", "1").strip() == "1"
66
+
67
  # ----------------------------
68
  # App
69
  # ----------------------------
 
113
  return []
114
 
115
 
116
+ # Preload at import time (fast path for requests)
117
  MODULE10_CHUNKS_CACHE = _preload_module10_chunks()
118
 
119
 
 
133
 
134
 
135
  # ----------------------------
136
+ # Warmup (runs once, background)
137
+ # ----------------------------
138
+ def _do_warmup_once():
139
+ """
140
+ Warm OpenAI connection + touch module10 chunks cache.
141
+ Best-effort; should never crash the app.
142
+ """
143
+ global WARMUP_DONE, WARMUP_ERROR, WARMUP_STARTED
144
+ if WARMUP_STARTED:
145
+ return
146
+ WARMUP_STARTED = True
147
+
148
+ try:
149
+ # Warm OpenAI network / TLS / keep-alive
150
+ from api.config import client
151
+
152
+ # cheapest call: models.list() (no token usage)
153
+ client.models.list()
154
+
155
+ # Touch module10 cache (already loaded at import; this is just a safety)
156
+ _ = MODULE10_CHUNKS_CACHE
157
+
158
+ WARMUP_DONE = True
159
+ WARMUP_ERROR = None
160
+ except Exception as e:
161
+ WARMUP_DONE = False
162
+ WARMUP_ERROR = repr(e)
163
+
164
+
165
+ def _start_warmup_background():
166
+ if not CLARE_ENABLE_WARMUP:
167
+ return
168
+ threading.Thread(target=_do_warmup_once, daemon=True).start()
169
+
170
+
171
+ @app.on_event("startup")
172
+ def _on_startup():
173
+ _start_warmup_background()
174
+
175
+
176
+ # ----------------------------
177
+ # LangSmith helpers (optional; default OFF)
178
  # ----------------------------
179
  _ls_client = None
180
+ if (Client is not None) and CLARE_ENABLE_LANGSMITH_LOG:
181
  try:
182
  _ls_client = Client()
183
  except Exception as e:
 
188
  def _log_event_to_langsmith(data: Dict[str, Any]):
189
  """
190
  Create an Example in LangSmith Dataset.
191
+ Best-effort and non-blocking by default (async thread).
192
  """
193
  if _ls_client is None:
194
  return
 
 
 
 
 
 
 
 
195
 
196
+ def _do():
197
+ try:
198
+ inputs = {
199
+ "question": data.get("question", ""),
200
+ "student_id": data.get("student_id", ""),
201
+ "student_name": data.get("student_name", ""),
202
+ }
203
+ outputs = {"answer": data.get("answer", "")}
204
+ metadata = {k: v for k, v in data.items() if k not in ("question", "answer")}
205
+
206
+ if LS_PROJECT:
207
+ metadata.setdefault("langsmith_project", LS_PROJECT)
208
+
209
+ _ls_client.create_example(
210
+ inputs=inputs,
211
+ outputs=outputs,
212
+ metadata=metadata,
213
+ dataset_name=LS_DATASET_NAME,
214
+ )
215
+ except Exception as e:
216
+ print("[langsmith] log failed:", repr(e))
217
 
218
+ if CLARE_LANGSMITH_ASYNC:
219
+ threading.Thread(target=_do, daemon=True).start()
220
+ else:
221
+ _do()
222
+
223
+
224
+ # ----------------------------
225
+ # Health endpoints (pure lightweight)
226
+ # ----------------------------
227
+ @app.get("/health")
228
+ def health():
229
+ # do not touch LLM/RAG/disk heavy work here
230
+ return {
231
+ "ok": True,
232
+ "uptime_s": round(time.time() - APP_START_TS, 3),
233
+ "warmup_enabled": CLARE_ENABLE_WARMUP,
234
+ "warmup_started": bool(WARMUP_STARTED),
235
+ "warmup_done": bool(WARMUP_DONE),
236
+ "warmup_error": WARMUP_ERROR,
237
+ "ready": bool(WARMUP_DONE) if CLARE_WARMUP_BLOCK_READY else True,
238
+ "langsmith_enabled": bool(CLARE_ENABLE_LANGSMITH_LOG),
239
+ "langsmith_async": bool(CLARE_LANGSMITH_ASYNC),
240
+ "ts": int(time.time()),
241
+ }
242
+
243
+
244
+ @app.get("/ready")
245
+ def ready():
246
+ # readiness probe: optionally block until warmup completes
247
+ if not CLARE_ENABLE_WARMUP or not CLARE_WARMUP_BLOCK_READY:
248
+ return {"ready": True}
249
+
250
+ if WARMUP_DONE:
251
+ return {"ready": True}
252
+
253
+ return JSONResponse({"ready": False, "error": WARMUP_ERROR}, status_code=503)
254
 
255
 
256
  # ----------------------------
 
348
  sess["cognitive_state"] = update_cognitive_state_from_message(msg, sess["cognitive_state"])
349
  marks_ms["cognitive_update_done"] = (time.time() - t0) * 1000.0
350
 
351
+ # rag retrieve (optional micro-gate for very short messages)
352
+ if len(msg) < 20 and ("?" not in msg):
353
+ rag_context_text, rag_used_chunks = "", []
354
+ else:
355
+ rag_context_text, rag_used_chunks = retrieve_relevant_chunks(msg, sess["rag_chunks"])
356
+
357
  marks_ms["rag_retrieve_done"] = (time.time() - t0) * 1000.0
358
 
359
  # llm
 
378
  total_ms = marks_ms["llm_done"]
379
 
380
  # segments (delta)
381
+ ordered = [
382
+ "start",
383
+ "language_detect_done",
384
+ "weakness_update_done",
385
+ "cognitive_update_done",
386
+ "rag_retrieve_done",
387
+ "llm_done",
388
+ ]
389
  segments_ms: Dict[str, float] = {}
390
  for i in range(1, len(ordered)):
391
  a = ordered[i - 1]
 
406
  rag_used_chunks_count = len(rag_used_chunks or [])
407
  history_len = len(sess["history"])
408
 
409
+ # ✅ log chat_turn to LangSmith (optional; async by default)
410
  _log_event_to_langsmith(
411
  {
412
  "experiment_id": EXPERIMENT_ID,