File size: 2,028 Bytes
ef18673
 
 
 
a21a30f
ef18673
 
 
1e799aa
ef18673
4c64fd6
b4f432f
ef18673
 
a21a30f
ef18673
 
4c64fd6
 
 
 
 
a21a30f
4c64fd6
a21a30f
 
4c64fd6
 
1e799aa
 
 
 
 
 
 
ef18673
 
 
1e799aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b4f432f
 
 
 
 
 
 
4c64fd6
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
"""CPU and llama.cpp serving helpers."""

from __future__ import annotations

import logging
import shutil

from fastapi import FastAPI
from pydantic import BaseModel

from serve.control_plane import build_control_router, get_runtime_access_info


app = FastAPI(title="SAGE CPU Server")
_LOGGER = logging.getLogger("uvicorn.error")


def _print_startup_banner() -> None:
    """Print the login details for the browser control UI."""
    access = get_runtime_access_info()
    local_url = (access["local_url"] or "http://127.0.0.1:8001").rstrip("/")
    public_url = access["public_url"]
    _LOGGER.info("SAGE local URL: %s/", local_url)
    if public_url:
        _LOGGER.info("SAGE public URL: %s/", public_url.rstrip("/"))
    _LOGGER.info("SAGE login password: %s", access["password"])


class ChatRequest(BaseModel):
    """Request schema for the browser chat surface."""

    prompt: str
    max_new_tokens: int = 64


@app.get("/health")
def health() -> dict[str, object]:
    """Report llama.cpp availability for CPU serving."""
    return {"status": "ok", "llama_cpp_available": shutil.which("llama-server") is not None, "chat": chat_status()}


def chat_status() -> dict[str, object]:
    """Return chat readiness for the CPU server."""
    return {
        "available": False,
        "warning": "Browser chat is only wired to the PyTorch GPU server in this repo. Use serve.server:app for direct interaction.",
    }


@app.get("/chat/status")
def get_chat_status() -> dict[str, object]:
    """Expose browser-chat readiness."""
    return chat_status()


@app.post("/chat")
def chat(_: ChatRequest) -> dict[str, object]:
    """Return a clear error for CPU-only control-plane mode."""
    return {"success": False, "detail": chat_status()["warning"], **chat_status()}


def _health_action(_: dict[str, object]) -> dict[str, object]:
    return health()


app.include_router(build_control_router({"health_check": _health_action}))


@app.on_event("startup")
def _startup_banner() -> None:
    _print_startup_banner()