raviix46 commited on
Commit
aba4ae4
·
verified ·
1 Parent(s): c2756e4

Update api.py

Browse files
Files changed (1) hide show
  1. api.py +9 -0
api.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  from fastapi import FastAPI, Query, HTTPException
2
  from pydantic import BaseModel
3
  from typing import List, Optional, Any
@@ -60,6 +62,7 @@ class AskResponse(BaseModel):
60
  rewrite: str
61
  retrieved: List[RetrievedChunk]
62
  trace_id: str
 
63
 
64
 
65
  class SwitchThreadRequest(BaseModel):
@@ -103,6 +106,9 @@ def api_ask(
103
  # combine body + query flag (OR)
104
  search_flag = bool(payload.search_outside_thread or search_outside_thread)
105
 
 
 
 
106
  # rewrite using thread + entity memory
107
  rewrite = rewrite_query(payload.text, session)
108
 
@@ -117,6 +123,8 @@ def api_ask(
117
  # build answer
118
  answer, citations = build_answer(payload.text, rewrite, retrieved)
119
 
 
 
120
  # log and get trace_id
121
  trace_id = log_trace(payload.session_id, payload.text, rewrite, retrieved, answer, citations)
122
 
@@ -150,6 +158,7 @@ def api_ask(
150
  rewrite=rewrite,
151
  retrieved=retrieved_out,
152
  trace_id=trace_id,
 
153
  )
154
 
155
 
 
1
+ # api.py
2
+ import time
3
  from fastapi import FastAPI, Query, HTTPException
4
  from pydantic import BaseModel
5
  from typing import List, Optional, Any
 
62
  rewrite: str
63
  retrieved: List[RetrievedChunk]
64
  trace_id: str
65
+ latency_sec: float # ⬅️ latency included in response
66
 
67
 
68
  class SwitchThreadRequest(BaseModel):
 
106
  # combine body + query flag (OR)
107
  search_flag = bool(payload.search_outside_thread or search_outside_thread)
108
 
109
+ # ---- measure latency for core RAG pipeline ----
110
+ t0 = time.perf_counter()
111
+
112
  # rewrite using thread + entity memory
113
  rewrite = rewrite_query(payload.text, session)
114
 
 
123
  # build answer
124
  answer, citations = build_answer(payload.text, rewrite, retrieved)
125
 
126
+ elapsed = time.perf_counter() - t0 # seconds
127
+
128
  # log and get trace_id
129
  trace_id = log_trace(payload.session_id, payload.text, rewrite, retrieved, answer, citations)
130
 
 
158
  rewrite=rewrite,
159
  retrieved=retrieved_out,
160
  trace_id=trace_id,
161
+ latency_sec=elapsed,
162
  )
163
 
164