Spaces:
Running
Running
Commit ·
2798de4
1
Parent(s): fa2f7b0
fix: add stream error handling + startup diagnostics, bump CACHEBUST
Browse files- Dockerfile +1 -1
- src/api/main.py +10 -0
Dockerfile
CHANGED
|
@@ -15,7 +15,7 @@ RUN pip install --no-cache-dir -r requirements.txt
|
|
| 15 |
|
| 16 |
# Cache-bust: forces Docker to re-copy source code on every build
|
| 17 |
# This ensures HuggingFace always gets the latest code from git
|
| 18 |
-
ARG CACHEBUST=
|
| 19 |
|
| 20 |
# Copy source code
|
| 21 |
COPY src/ ./src/
|
|
|
|
| 15 |
|
| 16 |
# Cache-bust: forces Docker to re-copy source code on every build
|
| 17 |
# This ensures HuggingFace always gets the latest code from git
|
| 18 |
+
ARG CACHEBUST=20260414_3
|
| 19 |
|
| 20 |
# Copy source code
|
| 21 |
COPY src/ ./src/
|
src/api/main.py
CHANGED
|
@@ -49,6 +49,7 @@ class FeedbackRequest(BaseModel):
|
|
| 49 |
total_time_ms: float
|
| 50 |
from src.rag.pipeline import RAGPipeline, ConversationTurn
|
| 51 |
from src.utils.logger import setup_logger, get_logger
|
|
|
|
| 52 |
|
| 53 |
|
| 54 |
setup_logger()
|
|
@@ -77,6 +78,11 @@ async def lifespan(app: FastAPI):
|
|
| 77 |
# We store it on app.state so all request handlers can access it
|
| 78 |
app.state.rag_pipeline = RAGPipeline()
|
| 79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
elapsed = time.time() - start
|
| 81 |
logger.info(f"API ready in {elapsed:.1f}s")
|
| 82 |
|
|
@@ -193,6 +199,10 @@ async def stream_query_papers(
|
|
| 193 |
filter_year_gte = query_input.filter_year_gte,
|
| 194 |
):
|
| 195 |
loop.call_soon_threadsafe(queue.put_nowait, chunk)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
finally:
|
| 197 |
loop.call_soon_threadsafe(queue.put_nowait, SENTINEL)
|
| 198 |
|
|
|
|
| 49 |
total_time_ms: float
|
| 50 |
from src.rag.pipeline import RAGPipeline, ConversationTurn
|
| 51 |
from src.utils.logger import setup_logger, get_logger
|
| 52 |
+
from config.settings import HF_API_KEY
|
| 53 |
|
| 54 |
|
| 55 |
setup_logger()
|
|
|
|
| 78 |
# We store it on app.state so all request handlers can access it
|
| 79 |
app.state.rag_pipeline = RAGPipeline()
|
| 80 |
|
| 81 |
+
# Log the active model chain for deployment verification
|
| 82 |
+
from src.rag.llm_client import MultiModelClient
|
| 83 |
+
logger.info(f"Model chain: {MultiModelClient.MODEL_CHAIN}")
|
| 84 |
+
logger.info(f"HF_API_KEY configured: {bool(HF_API_KEY)}")
|
| 85 |
+
|
| 86 |
elapsed = time.time() - start
|
| 87 |
logger.info(f"API ready in {elapsed:.1f}s")
|
| 88 |
|
|
|
|
| 199 |
filter_year_gte = query_input.filter_year_gte,
|
| 200 |
):
|
| 201 |
loop.call_soon_threadsafe(queue.put_nowait, chunk)
|
| 202 |
+
except Exception as e:
|
| 203 |
+
logger.error(f"Stream pipeline error: {e}", exc_info=True)
|
| 204 |
+
error_event = f'data: {json.dumps({"error": str(e)})}\n\n'
|
| 205 |
+
loop.call_soon_threadsafe(queue.put_nowait, error_event)
|
| 206 |
finally:
|
| 207 |
loop.call_soon_threadsafe(queue.put_nowait, SENTINEL)
|
| 208 |
|