File size: 3,564 Bytes
3aaa9f7
f8692e0
3aaa9f7
f8692e0
3aaa9f7
 
f8692e0
3aaa9f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8692e0
3aaa9f7
 
 
 
d90067c
 
f8692e0
 
 
034245d
 
 
 
 
 
 
 
 
f8692e0
034245d
 
 
 
f8692e0
 
034245d
f8692e0
 
 
 
 
3aaa9f7
 
 
 
 
 
 
 
 
 
 
 
 
 
f8692e0
 
3aaa9f7
 
 
 
 
034245d
 
 
 
 
 
 
 
 
 
3aaa9f7
 
 
 
 
 
d90067c
 
 
 
 
 
 
 
 
 
3aaa9f7
 
f8692e0
 
 
 
 
 
3aaa9f7
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import asyncio
import json
import logging
from typing import AsyncGenerator, List

from fastapi import FastAPI, HTTPException
from fastapi.responses import StreamingResponse
from pydantic import BaseModel, Field

from agent import agent_router
from model import get_model_manager


logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app = FastAPI(
    title="General AI Assistant Backend",
    description="Production-ready FastAPI backend with tools, memory, and CPU-friendly LLM inference.",
    version="1.0.0",
)


class ChatRequest(BaseModel):
    user_id: str = Field(..., min_length=1, max_length=128)
    message: str = Field(..., min_length=1, max_length=4000)
    stream: bool = Field(default=False)


class ChatResponse(BaseModel):
    response: str
    route_used: str
    tools_used: List[str]
    stream_enabled: bool


def _next_event_or_none(iterator):
    try:
        return next(iterator)
    except StopIteration:
        return None


async def _sse_stream_from_agent(user_id: str, message: str) -> AsyncGenerator[str, None]:
    iterator = agent_router.stream_respond(user_id, message)

    while True:
        event = await asyncio.to_thread(_next_event_or_none, iterator)
        if event is None:
            break

        payload = {
            **event,
            "stream_enabled": True,
        }
        yield f"data: {json.dumps(payload, ensure_ascii=True)}\n\n"
        await asyncio.sleep(0)


@app.on_event("startup")
async def startup_event() -> None:
    # Warm load the model so first request latency is lower.
    logger.info("Loading language model...")
    await asyncio.to_thread(get_model_manager().load)
    logger.info("Model loaded.")


@app.get("/health")
async def health() -> dict:
    return {"status": "ok"}


@app.post("/chat")
async def chat(payload: ChatRequest):
    try:
        message = payload.message.strip()
        if not message:
            raise HTTPException(status_code=400, detail="Message cannot be empty.")

        if payload.stream:
            return StreamingResponse(
                _sse_stream_from_agent(payload.user_id, message),
                media_type="text/event-stream",
                headers={
                    "Cache-Control": "no-cache",
                    "Connection": "keep-alive",
                },
            )

        response = await asyncio.to_thread(
            agent_router.respond,
            payload.user_id,
            message,
        )

        if isinstance(response, dict):
            text = str(response.get("response", "")).strip()
            route_used = str(response.get("route_used", "llm"))
            tools_used = [str(t) for t in response.get("tools_used", [])]
        else:
            text = str(response).strip()
            route_used = "llm"
            tools_used = []

        if not text:
            raise HTTPException(status_code=500, detail="Model returned an empty response.")

        return ChatResponse(
            response=text,
            route_used=route_used,
            tools_used=tools_used,
            stream_enabled=False,
        )
    except HTTPException:
        raise
    except Exception as exc:
        logger.exception("Chat endpoint failed")
        raise HTTPException(status_code=500, detail=f"Internal server error: {exc}") from exc


# For local execution and Hugging Face Spaces startup command:
# uvicorn main:app --host 0.0.0.0 --port 7860