sadickam commited on
Commit
aa91e86
·
1 Parent(s): eebb865

feat: add environment variables for reranking and timeout settings; enhance logging for retrieval and generation durations

Browse files
Dockerfile CHANGED
@@ -236,6 +236,9 @@ ENV NODE_ENV=production
236
  ENV HF_HOME=/app/.cache
237
  ENV PREWARM_ON_STARTUP=true
238
  ENV PREWARM_TOP_K=1
 
 
 
239
 
240
  # -----------------------------------------------------------------------------
241
  # Install System Dependencies
 
236
  ENV HF_HOME=/app/.cache
237
  ENV PREWARM_ON_STARTUP=true
238
  ENV PREWARM_TOP_K=1
239
+ ENV USE_RERANKER=false
240
+ ENV TOP_K=4
241
+ ENV PROVIDER_TIMEOUT_MS=12000
242
 
243
  # -----------------------------------------------------------------------------
244
  # Install System Dependencies
Dockerfile.backend CHANGED
@@ -98,6 +98,9 @@ ENV PYTHONUNBUFFERED=1
98
  ENV PYTHONPATH=/app/src
99
  ENV PREWARM_ON_STARTUP=true
100
  ENV PREWARM_TOP_K=1
 
 
 
101
 
102
  # -----------------------------------------------------------------------------
103
  # Create Non-Root User
 
98
  ENV PYTHONPATH=/app/src
99
  ENV PREWARM_ON_STARTUP=true
100
  ENV PREWARM_TOP_K=1
101
+ ENV USE_RERANKER=false
102
+ ENV TOP_K=4
103
+ ENV PROVIDER_TIMEOUT_MS=12000
104
 
105
  # -----------------------------------------------------------------------------
106
  # Create Non-Root User
src/rag_chatbot/api/routes/query.py CHANGED
@@ -1151,6 +1151,7 @@ def _create_router() -> APIRouter: # noqa: PLR0915
1151
  # Retrieve context chunks for streaming
1152
  # ---------------------------------------------------------------
1153
  logger.debug("Retrieving context for streaming with top_k=%d", top_k)
 
1154
 
1155
  try:
1156
  retrieval_results = retriever.retrieve(query_text, top_k=top_k)
@@ -1162,7 +1163,9 @@ def _create_router() -> APIRouter: # noqa: PLR0915
1162
  ) from e
1163
 
1164
  logger.info(
1165
- "Retrieved %d context chunks for streaming", len(retrieval_results)
 
 
1166
  )
1167
 
1168
  # ---------------------------------------------------------------
@@ -1263,6 +1266,7 @@ def _create_router() -> APIRouter: # noqa: PLR0915
1263
  # using Reciprocal Rank Fusion for optimal results.
1264
  # =====================================================================
1265
  logger.debug("Retrieving context with top_k=%d", top_k)
 
1266
 
1267
  try:
1268
  retrieval_results = retriever.retrieve(query_text, top_k=top_k)
@@ -1273,7 +1277,11 @@ def _create_router() -> APIRouter: # noqa: PLR0915
1273
  detail=f"Retrieval failed: {e}",
1274
  ) from e
1275
 
1276
- logger.info("Retrieved %d context chunks", len(retrieval_results))
 
 
 
 
1277
 
1278
  # =====================================================================
1279
  # Step 4: Build context strings and LLM request (with history)
@@ -1314,6 +1322,7 @@ def _create_router() -> APIRouter: # noqa: PLR0915
1314
  # =====================================================================
1315
  # Step 6: Generate response with fallback handling
1316
  # =====================================================================
 
1317
  try:
1318
  llm_response: LLMResponse = await registry.generate(llm_request)
1319
  except Exception as e:
@@ -1346,6 +1355,13 @@ def _create_router() -> APIRouter: # noqa: PLR0915
1346
  detail=f"LLM generation failed: {e}",
1347
  ) from e
1348
 
 
 
 
 
 
 
 
1349
  # =====================================================================
1350
  # Step 7: Build and return response
1351
  # =====================================================================
 
1151
  # Retrieve context chunks for streaming
1152
  # ---------------------------------------------------------------
1153
  logger.debug("Retrieving context for streaming with top_k=%d", top_k)
1154
+ retrieval_start = time.perf_counter()
1155
 
1156
  try:
1157
  retrieval_results = retriever.retrieve(query_text, top_k=top_k)
 
1163
  ) from e
1164
 
1165
  logger.info(
1166
+ "Retrieved %d context chunks for streaming in %d ms",
1167
+ len(retrieval_results),
1168
+ int((time.perf_counter() - retrieval_start) * 1000),
1169
  )
1170
 
1171
  # ---------------------------------------------------------------
 
1266
  # using Reciprocal Rank Fusion for optimal results.
1267
  # =====================================================================
1268
  logger.debug("Retrieving context with top_k=%d", top_k)
1269
+ retrieval_start = time.perf_counter()
1270
 
1271
  try:
1272
  retrieval_results = retriever.retrieve(query_text, top_k=top_k)
 
1277
  detail=f"Retrieval failed: {e}",
1278
  ) from e
1279
 
1280
+ logger.info(
1281
+ "Retrieved %d context chunks in %d ms",
1282
+ len(retrieval_results),
1283
+ int((time.perf_counter() - retrieval_start) * 1000),
1284
+ )
1285
 
1286
  # =====================================================================
1287
  # Step 4: Build context strings and LLM request (with history)
 
1322
  # =====================================================================
1323
  # Step 6: Generate response with fallback handling
1324
  # =====================================================================
1325
+ generation_start = time.perf_counter()
1326
  try:
1327
  llm_response: LLMResponse = await registry.generate(llm_request)
1328
  except Exception as e:
 
1355
  detail=f"LLM generation failed: {e}",
1356
  ) from e
1357
 
1358
+ logger.info(
1359
+ "LLM generation completed in %d ms (provider=%s, model=%s)",
1360
+ int((time.perf_counter() - generation_start) * 1000),
1361
+ llm_response.provider,
1362
+ llm_response.model,
1363
+ )
1364
+
1365
  # =====================================================================
1366
  # Step 7: Build and return response
1367
  # =====================================================================