Spaces:
Sleeping
Sleeping
Update api.py
Browse files
api.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
|
|
|
|
|
| 1 |
from fastapi import FastAPI, Query, HTTPException
|
| 2 |
from pydantic import BaseModel
|
| 3 |
from typing import List, Optional, Any
|
|
@@ -60,6 +62,7 @@ class AskResponse(BaseModel):
|
|
| 60 |
rewrite: str
|
| 61 |
retrieved: List[RetrievedChunk]
|
| 62 |
trace_id: str
|
|
|
|
| 63 |
|
| 64 |
|
| 65 |
class SwitchThreadRequest(BaseModel):
|
|
@@ -103,6 +106,9 @@ def api_ask(
|
|
| 103 |
# combine body + query flag (OR)
|
| 104 |
search_flag = bool(payload.search_outside_thread or search_outside_thread)
|
| 105 |
|
|
|
|
|
|
|
|
|
|
| 106 |
# rewrite using thread + entity memory
|
| 107 |
rewrite = rewrite_query(payload.text, session)
|
| 108 |
|
|
@@ -117,6 +123,8 @@ def api_ask(
|
|
| 117 |
# build answer
|
| 118 |
answer, citations = build_answer(payload.text, rewrite, retrieved)
|
| 119 |
|
|
|
|
|
|
|
| 120 |
# log and get trace_id
|
| 121 |
trace_id = log_trace(payload.session_id, payload.text, rewrite, retrieved, answer, citations)
|
| 122 |
|
|
@@ -150,6 +158,7 @@ def api_ask(
|
|
| 150 |
rewrite=rewrite,
|
| 151 |
retrieved=retrieved_out,
|
| 152 |
trace_id=trace_id,
|
|
|
|
| 153 |
)
|
| 154 |
|
| 155 |
|
|
|
|
| 1 |
+
# api.py
|
| 2 |
+
import time
|
| 3 |
from fastapi import FastAPI, Query, HTTPException
|
| 4 |
from pydantic import BaseModel
|
| 5 |
from typing import List, Optional, Any
|
|
|
|
| 62 |
rewrite: str
|
| 63 |
retrieved: List[RetrievedChunk]
|
| 64 |
trace_id: str
|
| 65 |
+
latency_sec: float # ⬅️ latency included in response
|
| 66 |
|
| 67 |
|
| 68 |
class SwitchThreadRequest(BaseModel):
|
|
|
|
| 106 |
# combine body + query flag (OR)
|
| 107 |
search_flag = bool(payload.search_outside_thread or search_outside_thread)
|
| 108 |
|
| 109 |
+
# ---- measure latency for core RAG pipeline ----
|
| 110 |
+
t0 = time.perf_counter()
|
| 111 |
+
|
| 112 |
# rewrite using thread + entity memory
|
| 113 |
rewrite = rewrite_query(payload.text, session)
|
| 114 |
|
|
|
|
| 123 |
# build answer
|
| 124 |
answer, citations = build_answer(payload.text, rewrite, retrieved)
|
| 125 |
|
| 126 |
+
elapsed = time.perf_counter() - t0 # seconds
|
| 127 |
+
|
| 128 |
# log and get trace_id
|
| 129 |
trace_id = log_trace(payload.session_id, payload.text, rewrite, retrieved, answer, citations)
|
| 130 |
|
|
|
|
| 158 |
rewrite=rewrite,
|
| 159 |
retrieved=retrieved_out,
|
| 160 |
trace_id=trace_id,
|
| 161 |
+
latency_sec=elapsed,
|
| 162 |
)
|
| 163 |
|
| 164 |
|