Spaces:

sadickam
/

pythermalcomfort_Chat

Running

App Files Files Community

sadickam commited on Mar 9

Commit

aa91e86

1 Parent(s): eebb865

feat: add environment variables for reranking and timeout settings; enhance logging for retrieval and generation durations

Browse files

Files changed (3) hide show

Dockerfile +3 -0
Dockerfile.backend +3 -0
src/rag_chatbot/api/routes/query.py +18 -2

Dockerfile CHANGED Viewed

@@ -236,6 +236,9 @@ ENV NODE_ENV=production
 ENV HF_HOME=/app/.cache
 ENV PREWARM_ON_STARTUP=true
 ENV PREWARM_TOP_K=1
 # -----------------------------------------------------------------------------
 # Install System Dependencies

 ENV HF_HOME=/app/.cache
 ENV PREWARM_ON_STARTUP=true
 ENV PREWARM_TOP_K=1
+ENV USE_RERANKER=false
+ENV TOP_K=4
+ENV PROVIDER_TIMEOUT_MS=12000
 # -----------------------------------------------------------------------------
 # Install System Dependencies

Dockerfile.backend CHANGED Viewed

@@ -98,6 +98,9 @@ ENV PYTHONUNBUFFERED=1
 ENV PYTHONPATH=/app/src
 ENV PREWARM_ON_STARTUP=true
 ENV PREWARM_TOP_K=1
 # -----------------------------------------------------------------------------
 # Create Non-Root User

 ENV PYTHONPATH=/app/src
 ENV PREWARM_ON_STARTUP=true
 ENV PREWARM_TOP_K=1
+ENV USE_RERANKER=false
+ENV TOP_K=4
+ENV PROVIDER_TIMEOUT_MS=12000
 # -----------------------------------------------------------------------------
 # Create Non-Root User

src/rag_chatbot/api/routes/query.py CHANGED Viewed

@@ -1151,6 +1151,7 @@ def _create_router() -> APIRouter:  # noqa: PLR0915
             # Retrieve context chunks for streaming
             # ---------------------------------------------------------------
             logger.debug("Retrieving context for streaming with top_k=%d", top_k)
             try:
                 retrieval_results = retriever.retrieve(query_text, top_k=top_k)
@@ -1162,7 +1163,9 @@ def _create_router() -> APIRouter:  # noqa: PLR0915
                 ) from e
             logger.info(
-                "Retrieved %d context chunks for streaming", len(retrieval_results)
             )
             # ---------------------------------------------------------------
@@ -1263,6 +1266,7 @@ def _create_router() -> APIRouter:  # noqa: PLR0915
         # using Reciprocal Rank Fusion for optimal results.
         # =====================================================================
         logger.debug("Retrieving context with top_k=%d", top_k)
         try:
             retrieval_results = retriever.retrieve(query_text, top_k=top_k)
@@ -1273,7 +1277,11 @@ def _create_router() -> APIRouter:  # noqa: PLR0915
                 detail=f"Retrieval failed: {e}",
             ) from e
-        logger.info("Retrieved %d context chunks", len(retrieval_results))
         # =====================================================================
         # Step 4: Build context strings and LLM request (with history)
@@ -1314,6 +1322,7 @@ def _create_router() -> APIRouter:  # noqa: PLR0915
         # =====================================================================
         # Step 6: Generate response with fallback handling
         # =====================================================================
         try:
             llm_response: LLMResponse = await registry.generate(llm_request)
         except Exception as e:
@@ -1346,6 +1355,13 @@ def _create_router() -> APIRouter:  # noqa: PLR0915
                 detail=f"LLM generation failed: {e}",
             ) from e
         # =====================================================================
         # Step 7: Build and return response
         # =====================================================================

             # Retrieve context chunks for streaming
             # ---------------------------------------------------------------
             logger.debug("Retrieving context for streaming with top_k=%d", top_k)
+            retrieval_start = time.perf_counter()
             try:
                 retrieval_results = retriever.retrieve(query_text, top_k=top_k)
                 ) from e
             logger.info(
+                "Retrieved %d context chunks for streaming in %d ms",
+                len(retrieval_results),
+                int((time.perf_counter() - retrieval_start) * 1000),
             )
             # ---------------------------------------------------------------
         # using Reciprocal Rank Fusion for optimal results.
         # =====================================================================
         logger.debug("Retrieving context with top_k=%d", top_k)
+        retrieval_start = time.perf_counter()
         try:
             retrieval_results = retriever.retrieve(query_text, top_k=top_k)
                 detail=f"Retrieval failed: {e}",
             ) from e
+        logger.info(
+            "Retrieved %d context chunks in %d ms",
+            len(retrieval_results),
+            int((time.perf_counter() - retrieval_start) * 1000),
+        )
         # =====================================================================
         # Step 4: Build context strings and LLM request (with history)
         # =====================================================================
         # Step 6: Generate response with fallback handling
         # =====================================================================
+        generation_start = time.perf_counter()
         try:
             llm_response: LLMResponse = await registry.generate(llm_request)
         except Exception as e:
                 detail=f"LLM generation failed: {e}",
             ) from e
+        logger.info(
+            "LLM generation completed in %d ms (provider=%s, model=%s)",
+            int((time.perf_counter() - generation_start) * 1000),
+            llm_response.provider,
+            llm_response.model,
+        )
         # =====================================================================
         # Step 7: Build and return response
         # =====================================================================