File size: 6,930 Bytes
f992c25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
"""Agent chat streaming endpoint (SSE)."""
from __future__ import annotations

import asyncio
import json
import logging
import time
from typing import Any

from fastapi import APIRouter, Request
from fastapi.responses import StreamingResponse
from pydantic import BaseModel

from app.agent.graph import run_agent_stream
from app.storage.repository import (
    create_session_async,
    get_session_async,
    list_messages_async,
    save_message_async,
)

router = APIRouter()
logger = logging.getLogger(__name__)

# SSE-level keep-alive: emit a comment every K seconds so intermediate
# proxies (HF Space, Cloudflare, nginx) don't kill an idle connection
# while the agent is thinking. The client ignores SSE comments.
SSE_KEEPALIVE_INTERVAL = 15.0


class ChatRequest(BaseModel):
    query: str
    session_id: str | None = None
    user_id: str = "default"


def _sse(event: str, data: dict[str, Any]) -> str:
    return f"event: {event}\ndata: {json.dumps(data, ensure_ascii=False)}\n\n"


def _sse_keepalive() -> str:
    """SSE comment line — clients ignore it but the connection stays alive."""
    return ": keep-alive\n\n"


@router.post("/chat/stream")
async def chat_stream(body: ChatRequest, request: Request) -> StreamingResponse:
    """Stream agent progress as Server-Sent Events."""

    async def event_gen():
        session_id = body.session_id
        if not session_id or not await get_session_async(session_id):
            session_id = await create_session_async(
                title=(body.query[:30] if body.query else "新对话") or "新对话",
                user_id=body.user_id,
            )
            yield _sse("session", {"session_id": session_id, "title": body.query[:30] or "新对话"})

        # Persist user message
        await save_message_async(session_id=session_id, role="user", content=body.query)

        # Load recent history
        history = await list_messages_async(session_id)
        history = [m for m in history if not (m["role"] == "user" and m["content"] == body.query)]

        yield _sse("ping", {"ts": 0})

        # Set up the keep-alive ticker. We use an asyncio.Queue to ferry
        # "tick" markers from a background task to the consumer; when a
        # tick arrives (or after a max idle window) the consumer emits
        # an SSE comment.
        stop = asyncio.Event()
        ticker_q: asyncio.Queue[str] = asyncio.Queue()

        async def _ticker() -> None:
            try:
                while not stop.is_set():
                    await asyncio.sleep(SSE_KEEPALIVE_INTERVAL)
                    if stop.is_set():
                        return
                    await ticker_q.put("tick")
            except asyncio.CancelledError:
                return

        ticker = asyncio.create_task(_ticker())

        final_text = ""
        tool_calls_log: list[dict[str, Any]] = []
        agent_iter = run_agent_stream(
            user_query=body.query, history=history, session_id=session_id
        )
        # Bridge: convert the async generator into a queue
        agent_q: asyncio.Queue[Any] = asyncio.Queue(maxsize=64)

        async def _pump_agent() -> None:
            try:
                async for ev in agent_iter:
                    await agent_q.put(ev)
            except Exception as exc:  # noqa: BLE001
                await agent_q.put(exc)
            finally:
                await agent_q.put(None)  # sentinel: agent done

        pump = asyncio.create_task(_pump_agent())

        try:
            while True:
                # If client disconnected, stop
                if await request.is_disconnected():
                    logger.info("client disconnected, aborting stream")
                    break

                # Wait for either: an agent event, a keep-alive tick, or a small idle window
                # (so we re-check request.is_disconnected() periodically).
                queue_wait: asyncio.Task[Any] = asyncio.create_task(agent_q.get())
                tick_wait: asyncio.Task[Any] = asyncio.create_task(ticker_q.get())
                disconnect_tick: asyncio.Task[Any] = asyncio.create_task(
                    asyncio.sleep(1.0)
                )
                done, _pending = await asyncio.wait(
                    {queue_wait, tick_wait, disconnect_tick},
                    return_when=asyncio.FIRST_COMPLETED,
                )
                for t in _pending:
                    t.cancel()

                if queue_wait in done:
                    ev = queue_wait.result()
                    if ev is None:
                        # Agent finished
                        break
                    if isinstance(ev, Exception):
                        raise ev
                    event_name = ev.get("event", "")
                    event_data = ev.get("data", {})
                    yield _sse(event_name, event_data)
                    if event_name == "tool_result":
                        tool_calls_log.append(event_data)
                    if event_name == "message_final":
                        final_text = event_data.get("content", "")
                elif tick_wait in done:
                    # Keep-alive tick: emit a no-op SSE comment AND a
                    # proper `event: heartbeat` so the frontend's
                    # heartbeat handler fires (it adds a "💭 思考中…"
                    # placeholder, which is critical when the LLM is
                    # taking 20-30s and the synthesizer is idle waiting
                    # for the first chunk).
                    yield _sse_keepalive()
                    yield _sse(
                        "heartbeat",
                        {
                            "ts": time.time(),
                            "in_think": True,
                            "pending_chars": 0,
                            "source": "endpoint_keepalive",
                        },
                    )
                # else: just a disconnect-check tick; loop again
        except Exception as exc:  # noqa: BLE001
            logger.exception("stream failed")
            yield _sse("error", {"message": str(exc)})
        finally:
            stop.set()
            for t in (ticker, pump):
                t.cancel()
                try:
                    await t
                except (asyncio.CancelledError, Exception):
                    pass

        if final_text:
            await save_message_async(
                session_id=session_id,
                role="assistant",
                content=final_text,
                tool_calls=tool_calls_log or None,
                thinking={"trace": tool_calls_log},
            )

    return StreamingResponse(
        event_gen(),
        media_type="text/event-stream",
        headers={
            "Cache-Control": "no-cache",
            "X-Accel-Buffering": "no",
            "Connection": "keep-alive",
        },
    )