Update app.py
Browse files
app.py
CHANGED
|
@@ -94,49 +94,34 @@ async def chat_endpoint(request: ChatRequest):
|
|
| 94 |
}
|
| 95 |
|
| 96 |
async def event_generator():
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
chunk = event["data"]["chunk"]
|
| 112 |
-
content = chunk.content if hasattr(chunk, "content") else str(chunk)
|
| 113 |
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
yield f"data: {
|
| 118 |
-
|
| 119 |
-
# Update last yield time
|
| 120 |
-
last_yield_time = asyncio.get_event_loop().time()
|
| 121 |
-
|
| 122 |
-
# CRITICAL: Force immediate flush
|
| 123 |
-
await asyncio.sleep(0.01)
|
| 124 |
-
|
| 125 |
-
yield "data: [DONE]\n\n"
|
| 126 |
-
|
| 127 |
-
except Exception as e:
|
| 128 |
-
error_msg = json.dumps({"error": str(e)})
|
| 129 |
-
yield f"data: {error_msg}\n\n"
|
| 130 |
|
| 131 |
return StreamingResponse(
|
| 132 |
event_generator(),
|
| 133 |
media_type="text/event-stream",
|
| 134 |
headers={
|
| 135 |
-
"Cache-Control": "no-cache
|
| 136 |
"Connection": "keep-alive",
|
| 137 |
-
"Content-
|
| 138 |
"X-Accel-Buffering": "no",
|
| 139 |
-
"Transfer-Encoding": "chunked",
|
| 140 |
},
|
| 141 |
)
|
| 142 |
|
|
|
|
| 94 |
}
|
| 95 |
|
| 96 |
async def event_generator():
|
| 97 |
+
# [TRICK 1] Send a "padding" comment to force Nginx to flush the buffer immediately
|
| 98 |
+
# 2KB of whitespace usually does the trick
|
| 99 |
+
padding = " " * 2048
|
| 100 |
+
yield f": {padding}\n\n"
|
| 101 |
+
|
| 102 |
+
async for event in rag_app.astream_events(inputs, config=config, version="v1"):
|
| 103 |
+
kind = event["event"]
|
| 104 |
+
if kind == "on_chat_model_stream":
|
| 105 |
+
content = event["data"]["chunk"].content
|
| 106 |
+
|
| 107 |
+
if content:
|
| 108 |
+
# We use JSON serialization to be safe with newlines
|
| 109 |
+
# But if you prefer your manual replace, that's fine too:
|
| 110 |
+
# data = content.replace("\n", "\\n")
|
|
|
|
|
|
|
| 111 |
|
| 112 |
+
# Ensure we aren't sending empty chunks
|
| 113 |
+
if content.strip() != "":
|
| 114 |
+
data = content.replace("\n", "\\n")
|
| 115 |
+
yield f"data: {data}\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
|
| 117 |
return StreamingResponse(
|
| 118 |
event_generator(),
|
| 119 |
media_type="text/event-stream",
|
| 120 |
headers={
|
| 121 |
+
"Cache-Control": "no-cache",
|
| 122 |
"Connection": "keep-alive",
|
| 123 |
+
"Content-Encoding": "none", # [TRICK 2] Disable compression
|
| 124 |
"X-Accel-Buffering": "no",
|
|
|
|
| 125 |
},
|
| 126 |
)
|
| 127 |
|