| """CPU and llama.cpp serving helpers.""" |
|
|
| from __future__ import annotations |
|
|
| import logging |
| import shutil |
|
|
| from fastapi import FastAPI |
| from pydantic import BaseModel |
|
|
| from serve.control_plane import build_control_router, get_runtime_access_info |
|
|
|
|
| app = FastAPI(title="SAGE CPU Server") |
| _LOGGER = logging.getLogger("uvicorn.error") |
|
|
|
|
| def _print_startup_banner() -> None: |
| """Print the login details for the browser control UI.""" |
| access = get_runtime_access_info() |
| local_url = (access["local_url"] or "http://127.0.0.1:8001").rstrip("/") |
| public_url = access["public_url"] |
| _LOGGER.info("SAGE local URL: %s/", local_url) |
| if public_url: |
| _LOGGER.info("SAGE public URL: %s/", public_url.rstrip("/")) |
| _LOGGER.info("SAGE login password: %s", access["password"]) |
|
|
|
|
| class ChatRequest(BaseModel): |
| """Request schema for the browser chat surface.""" |
|
|
| prompt: str |
| max_new_tokens: int = 64 |
|
|
|
|
| @app.get("/health") |
| def health() -> dict[str, object]: |
| """Report llama.cpp availability for CPU serving.""" |
| return {"status": "ok", "llama_cpp_available": shutil.which("llama-server") is not None, "chat": chat_status()} |
|
|
|
|
| def chat_status() -> dict[str, object]: |
| """Return chat readiness for the CPU server.""" |
| return { |
| "available": False, |
| "warning": "Browser chat is only wired to the PyTorch GPU server in this repo. Use serve.server:app for direct interaction.", |
| } |
|
|
|
|
| @app.get("/chat/status") |
| def get_chat_status() -> dict[str, object]: |
| """Expose browser-chat readiness.""" |
| return chat_status() |
|
|
|
|
| @app.post("/chat") |
| def chat(_: ChatRequest) -> dict[str, object]: |
| """Return a clear error for CPU-only control-plane mode.""" |
| return {"success": False, "detail": chat_status()["warning"], **chat_status()} |
|
|
|
|
| def _health_action(_: dict[str, object]) -> dict[str, object]: |
| return health() |
|
|
|
|
| app.include_router(build_control_router({"health_check": _health_action})) |
|
|
|
|
| @app.on_event("startup") |
| def _startup_banner() -> None: |
| _print_startup_banner() |
|
|