Spaces:
Running
Running
File size: 6,930 Bytes
f992c25 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 | """Agent chat streaming endpoint (SSE)."""
from __future__ import annotations
import asyncio
import json
import logging
import time
from typing import Any
from fastapi import APIRouter, Request
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from app.agent.graph import run_agent_stream
from app.storage.repository import (
create_session_async,
get_session_async,
list_messages_async,
save_message_async,
)
router = APIRouter()
logger = logging.getLogger(__name__)
# SSE-level keep-alive: emit a comment every K seconds so intermediate
# proxies (HF Space, Cloudflare, nginx) don't kill an idle connection
# while the agent is thinking. The client ignores SSE comments.
SSE_KEEPALIVE_INTERVAL = 15.0
class ChatRequest(BaseModel):
query: str
session_id: str | None = None
user_id: str = "default"
def _sse(event: str, data: dict[str, Any]) -> str:
return f"event: {event}\ndata: {json.dumps(data, ensure_ascii=False)}\n\n"
def _sse_keepalive() -> str:
"""SSE comment line — clients ignore it but the connection stays alive."""
return ": keep-alive\n\n"
@router.post("/chat/stream")
async def chat_stream(body: ChatRequest, request: Request) -> StreamingResponse:
"""Stream agent progress as Server-Sent Events."""
async def event_gen():
session_id = body.session_id
if not session_id or not await get_session_async(session_id):
session_id = await create_session_async(
title=(body.query[:30] if body.query else "新对话") or "新对话",
user_id=body.user_id,
)
yield _sse("session", {"session_id": session_id, "title": body.query[:30] or "新对话"})
# Persist user message
await save_message_async(session_id=session_id, role="user", content=body.query)
# Load recent history
history = await list_messages_async(session_id)
history = [m for m in history if not (m["role"] == "user" and m["content"] == body.query)]
yield _sse("ping", {"ts": 0})
# Set up the keep-alive ticker. We use an asyncio.Queue to ferry
# "tick" markers from a background task to the consumer; when a
# tick arrives (or after a max idle window) the consumer emits
# an SSE comment.
stop = asyncio.Event()
ticker_q: asyncio.Queue[str] = asyncio.Queue()
async def _ticker() -> None:
try:
while not stop.is_set():
await asyncio.sleep(SSE_KEEPALIVE_INTERVAL)
if stop.is_set():
return
await ticker_q.put("tick")
except asyncio.CancelledError:
return
ticker = asyncio.create_task(_ticker())
final_text = ""
tool_calls_log: list[dict[str, Any]] = []
agent_iter = run_agent_stream(
user_query=body.query, history=history, session_id=session_id
)
# Bridge: convert the async generator into a queue
agent_q: asyncio.Queue[Any] = asyncio.Queue(maxsize=64)
async def _pump_agent() -> None:
try:
async for ev in agent_iter:
await agent_q.put(ev)
except Exception as exc: # noqa: BLE001
await agent_q.put(exc)
finally:
await agent_q.put(None) # sentinel: agent done
pump = asyncio.create_task(_pump_agent())
try:
while True:
# If client disconnected, stop
if await request.is_disconnected():
logger.info("client disconnected, aborting stream")
break
# Wait for either: an agent event, a keep-alive tick, or a small idle window
# (so we re-check request.is_disconnected() periodically).
queue_wait: asyncio.Task[Any] = asyncio.create_task(agent_q.get())
tick_wait: asyncio.Task[Any] = asyncio.create_task(ticker_q.get())
disconnect_tick: asyncio.Task[Any] = asyncio.create_task(
asyncio.sleep(1.0)
)
done, _pending = await asyncio.wait(
{queue_wait, tick_wait, disconnect_tick},
return_when=asyncio.FIRST_COMPLETED,
)
for t in _pending:
t.cancel()
if queue_wait in done:
ev = queue_wait.result()
if ev is None:
# Agent finished
break
if isinstance(ev, Exception):
raise ev
event_name = ev.get("event", "")
event_data = ev.get("data", {})
yield _sse(event_name, event_data)
if event_name == "tool_result":
tool_calls_log.append(event_data)
if event_name == "message_final":
final_text = event_data.get("content", "")
elif tick_wait in done:
# Keep-alive tick: emit a no-op SSE comment AND a
# proper `event: heartbeat` so the frontend's
# heartbeat handler fires (it adds a "💭 思考中…"
# placeholder, which is critical when the LLM is
# taking 20-30s and the synthesizer is idle waiting
# for the first chunk).
yield _sse_keepalive()
yield _sse(
"heartbeat",
{
"ts": time.time(),
"in_think": True,
"pending_chars": 0,
"source": "endpoint_keepalive",
},
)
# else: just a disconnect-check tick; loop again
except Exception as exc: # noqa: BLE001
logger.exception("stream failed")
yield _sse("error", {"message": str(exc)})
finally:
stop.set()
for t in (ticker, pump):
t.cancel()
try:
await t
except (asyncio.CancelledError, Exception):
pass
if final_text:
await save_message_async(
session_id=session_id,
role="assistant",
content=final_text,
tool_calls=tool_calls_log or None,
thinking={"trace": tool_calls_log},
)
return StreamingResponse(
event_gen(),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"X-Accel-Buffering": "no",
"Connection": "keep-alive",
},
)
|