Spaces:
Running
Running
feat: add environment variables for reranking and timeout settings; enhance logging for retrieval and generation durations
Browse files- Dockerfile +3 -0
- Dockerfile.backend +3 -0
- src/rag_chatbot/api/routes/query.py +18 -2
Dockerfile
CHANGED
|
@@ -236,6 +236,9 @@ ENV NODE_ENV=production
|
|
| 236 |
ENV HF_HOME=/app/.cache
|
| 237 |
ENV PREWARM_ON_STARTUP=true
|
| 238 |
ENV PREWARM_TOP_K=1
|
|
|
|
|
|
|
|
|
|
| 239 |
|
| 240 |
# -----------------------------------------------------------------------------
|
| 241 |
# Install System Dependencies
|
|
|
|
| 236 |
ENV HF_HOME=/app/.cache
|
| 237 |
ENV PREWARM_ON_STARTUP=true
|
| 238 |
ENV PREWARM_TOP_K=1
|
| 239 |
+
ENV USE_RERANKER=false
|
| 240 |
+
ENV TOP_K=4
|
| 241 |
+
ENV PROVIDER_TIMEOUT_MS=12000
|
| 242 |
|
| 243 |
# -----------------------------------------------------------------------------
|
| 244 |
# Install System Dependencies
|
Dockerfile.backend
CHANGED
|
@@ -98,6 +98,9 @@ ENV PYTHONUNBUFFERED=1
|
|
| 98 |
ENV PYTHONPATH=/app/src
|
| 99 |
ENV PREWARM_ON_STARTUP=true
|
| 100 |
ENV PREWARM_TOP_K=1
|
|
|
|
|
|
|
|
|
|
| 101 |
|
| 102 |
# -----------------------------------------------------------------------------
|
| 103 |
# Create Non-Root User
|
|
|
|
| 98 |
ENV PYTHONPATH=/app/src
|
| 99 |
ENV PREWARM_ON_STARTUP=true
|
| 100 |
ENV PREWARM_TOP_K=1
|
| 101 |
+
ENV USE_RERANKER=false
|
| 102 |
+
ENV TOP_K=4
|
| 103 |
+
ENV PROVIDER_TIMEOUT_MS=12000
|
| 104 |
|
| 105 |
# -----------------------------------------------------------------------------
|
| 106 |
# Create Non-Root User
|
src/rag_chatbot/api/routes/query.py
CHANGED
|
@@ -1151,6 +1151,7 @@ def _create_router() -> APIRouter: # noqa: PLR0915
|
|
| 1151 |
# Retrieve context chunks for streaming
|
| 1152 |
# ---------------------------------------------------------------
|
| 1153 |
logger.debug("Retrieving context for streaming with top_k=%d", top_k)
|
|
|
|
| 1154 |
|
| 1155 |
try:
|
| 1156 |
retrieval_results = retriever.retrieve(query_text, top_k=top_k)
|
|
@@ -1162,7 +1163,9 @@ def _create_router() -> APIRouter: # noqa: PLR0915
|
|
| 1162 |
) from e
|
| 1163 |
|
| 1164 |
logger.info(
|
| 1165 |
-
"Retrieved %d context chunks for streaming",
|
|
|
|
|
|
|
| 1166 |
)
|
| 1167 |
|
| 1168 |
# ---------------------------------------------------------------
|
|
@@ -1263,6 +1266,7 @@ def _create_router() -> APIRouter: # noqa: PLR0915
|
|
| 1263 |
# using Reciprocal Rank Fusion for optimal results.
|
| 1264 |
# =====================================================================
|
| 1265 |
logger.debug("Retrieving context with top_k=%d", top_k)
|
|
|
|
| 1266 |
|
| 1267 |
try:
|
| 1268 |
retrieval_results = retriever.retrieve(query_text, top_k=top_k)
|
|
@@ -1273,7 +1277,11 @@ def _create_router() -> APIRouter: # noqa: PLR0915
|
|
| 1273 |
detail=f"Retrieval failed: {e}",
|
| 1274 |
) from e
|
| 1275 |
|
| 1276 |
-
logger.info(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1277 |
|
| 1278 |
# =====================================================================
|
| 1279 |
# Step 4: Build context strings and LLM request (with history)
|
|
@@ -1314,6 +1322,7 @@ def _create_router() -> APIRouter: # noqa: PLR0915
|
|
| 1314 |
# =====================================================================
|
| 1315 |
# Step 6: Generate response with fallback handling
|
| 1316 |
# =====================================================================
|
|
|
|
| 1317 |
try:
|
| 1318 |
llm_response: LLMResponse = await registry.generate(llm_request)
|
| 1319 |
except Exception as e:
|
|
@@ -1346,6 +1355,13 @@ def _create_router() -> APIRouter: # noqa: PLR0915
|
|
| 1346 |
detail=f"LLM generation failed: {e}",
|
| 1347 |
) from e
|
| 1348 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1349 |
# =====================================================================
|
| 1350 |
# Step 7: Build and return response
|
| 1351 |
# =====================================================================
|
|
|
|
| 1151 |
# Retrieve context chunks for streaming
|
| 1152 |
# ---------------------------------------------------------------
|
| 1153 |
logger.debug("Retrieving context for streaming with top_k=%d", top_k)
|
| 1154 |
+
retrieval_start = time.perf_counter()
|
| 1155 |
|
| 1156 |
try:
|
| 1157 |
retrieval_results = retriever.retrieve(query_text, top_k=top_k)
|
|
|
|
| 1163 |
) from e
|
| 1164 |
|
| 1165 |
logger.info(
|
| 1166 |
+
"Retrieved %d context chunks for streaming in %d ms",
|
| 1167 |
+
len(retrieval_results),
|
| 1168 |
+
int((time.perf_counter() - retrieval_start) * 1000),
|
| 1169 |
)
|
| 1170 |
|
| 1171 |
# ---------------------------------------------------------------
|
|
|
|
| 1266 |
# using Reciprocal Rank Fusion for optimal results.
|
| 1267 |
# =====================================================================
|
| 1268 |
logger.debug("Retrieving context with top_k=%d", top_k)
|
| 1269 |
+
retrieval_start = time.perf_counter()
|
| 1270 |
|
| 1271 |
try:
|
| 1272 |
retrieval_results = retriever.retrieve(query_text, top_k=top_k)
|
|
|
|
| 1277 |
detail=f"Retrieval failed: {e}",
|
| 1278 |
) from e
|
| 1279 |
|
| 1280 |
+
logger.info(
|
| 1281 |
+
"Retrieved %d context chunks in %d ms",
|
| 1282 |
+
len(retrieval_results),
|
| 1283 |
+
int((time.perf_counter() - retrieval_start) * 1000),
|
| 1284 |
+
)
|
| 1285 |
|
| 1286 |
# =====================================================================
|
| 1287 |
# Step 4: Build context strings and LLM request (with history)
|
|
|
|
| 1322 |
# =====================================================================
|
| 1323 |
# Step 6: Generate response with fallback handling
|
| 1324 |
# =====================================================================
|
| 1325 |
+
generation_start = time.perf_counter()
|
| 1326 |
try:
|
| 1327 |
llm_response: LLMResponse = await registry.generate(llm_request)
|
| 1328 |
except Exception as e:
|
|
|
|
| 1355 |
detail=f"LLM generation failed: {e}",
|
| 1356 |
) from e
|
| 1357 |
|
| 1358 |
+
logger.info(
|
| 1359 |
+
"LLM generation completed in %d ms (provider=%s, model=%s)",
|
| 1360 |
+
int((time.perf_counter() - generation_start) * 1000),
|
| 1361 |
+
llm_response.provider,
|
| 1362 |
+
llm_response.model,
|
| 1363 |
+
)
|
| 1364 |
+
|
| 1365 |
# =====================================================================
|
| 1366 |
# Step 7: Build and return response
|
| 1367 |
# =====================================================================
|