from __future__ import annotations from typing import Any import gradio as gr from core.deployment import DeploymentPolicy, current_policy from models.local_backend_config import ( LocalBackendConfig, build_llama_server_command, load_local_backend_config, local_backend_summary, save_local_backend_config, ) from models.model_catalog import ModelInfo, model_summary, validate_catalog from models.ollama_service import OllamaService from models.openai_compatible_service import OpenAICompatibleService from models.service_factory import backend_statuses from models.sglang_runner import SGLangConfig, SGLangService, build_sglang_run_plan from ui.progress import CLICK_PROGRESS from ui.server_controls import create_serving_controls def build_status_tab( catalog: dict[str, ModelInfo], policy: DeploymentPolicy | None = None, ) -> None: active_policy = policy or current_policy() gr.Markdown("Model and backend status. Real backend checks will be added after local setup.") gr.Dataframe( headers=[ "config_id", "hf_id", "type", "backend", "parameters_b", "context_length", "thinking", "capabilities", ], value=[ [ model.config_id, model.hf_id, model.type, model.backend, model.parameters_b, model.context_length, model.thinking_mode, ", ".join( sorted( { capability for capabilities in model.backend_capabilities.values() for capability in capabilities } ) ), ] for model in catalog.values() ], label="Configured models", interactive=False, ) gr.JSON(validate_catalog(catalog), label="Validation warnings") gr.Dataframe( headers=["backend", "available", "detail"], value=[ [status.name, status.available, status.detail] for status in backend_statuses(active_policy) ], label="Backend status", interactive=False, ) selected = gr.Dropdown(list(catalog), value=next(iter(catalog)), label="Inspect model") details = gr.JSON(model_summary(catalog[next(iter(catalog))]), label="Details") def inspect(model_id: str) -> dict: return model_summary(catalog[model_id]) selected.change(inspect, selected, details) build_llama_cpp_setup_panel() build_openai_compatible_setup_panel() build_sglang_setup_panel(catalog) build_ollama_setup_panel(catalog) def build_sglang_setup_panel(catalog: dict[str, ModelInfo]) -> None: gr.Markdown("### SGLang local setup") controls = create_sglang_controls(catalog) command = gr.Textbox(label="SGLang start command", interactive=False) summary = gr.JSON(label="SGLang plan/status") def config_from_inputs( url: str, server_host: str, server_port: int | float, tp: int | float, parser: str, ) -> SGLangConfig: return SGLangConfig( base_url=url.strip() or "http://127.0.0.1:30000", host=server_host.strip() or "127.0.0.1", port=int(server_port), tp_size=int(tp), tool_parser=parser.strip() or "minicpm", ) def prepare_sglang( model_id: str, url: str, server_host: str, server_port: int | float, tp: int | float, parser: str, ) -> tuple[str, dict]: config = config_from_inputs(url, server_host, server_port, tp, parser) plan = build_sglang_run_plan(catalog[model_id], config) return " ".join(plan.start_command), plan.to_dict() def check_sglang(url: str) -> dict: status = SGLangService.status(url.strip() or "http://127.0.0.1:30000") return {"backend": status.name, "available": status.available, "detail": status.detail} def stop_sglang(model_id: str, url: str) -> str: service = SGLangService( catalog[model_id], SGLangConfig(base_url=url.strip() or "http://127.0.0.1:30000"), ) return service.stop_server() controls["prepare"].click( prepare_sglang, [ controls["selected"], controls["base_url"], controls["host"], controls["port"], controls["parallel"], controls["tool_parser"], ], [command, summary], show_progress=CLICK_PROGRESS, ) controls["check"].click( check_sglang, controls["base_url"], summary, show_progress=CLICK_PROGRESS, ) controls["stop"].click( stop_sglang, [controls["selected"], controls["base_url"]], command, show_progress=CLICK_PROGRESS, ) def create_sglang_controls(catalog: dict[str, ModelInfo]) -> dict[str, Any]: controls = create_serving_controls(catalog, "SGLang", "http://127.0.0.1:30000", 30000) controls["tool_parser"] = gr.Textbox(label="Tool parser", value="minicpm") with gr.Row(): controls["prepare"] = gr.Button("Prepare SGLang command", variant="primary") controls["check"] = gr.Button("Check SGLang") controls["stop"] = gr.Button("Request SGLang stop") return controls def build_openai_compatible_setup_panel() -> None: gr.Markdown("### LM Studio / OpenAI-compatible local setup") local_config = load_local_backend_config() base_url = gr.Textbox( label="Server URL", value=local_config.openai_compatible_base_url, placeholder="http://127.0.0.1:1234", ) model_name = gr.Textbox( label="Served model name", value=local_config.openai_compatible_model_name, placeholder="Optional; leave blank to use the selected HF model ID", ) prepare = gr.Button("Save OpenAI-compatible config", variant="primary") check = gr.Button("Check server") summary = gr.JSON(local_backend_summary(local_config), label="OpenAI-compatible config") status = gr.JSON(label="OpenAI-compatible status") def save_openai_config(url: str, served_model_name: str) -> dict: current = load_local_backend_config() config = LocalBackendConfig( llama_cpp_server_url=current.llama_cpp_server_url, llama_server_path=current.llama_server_path, openai_compatible_base_url=url.strip() or LocalBackendConfig.openai_compatible_base_url, openai_compatible_model_name=served_model_name.strip(), gguf_path=current.gguf_path, mmproj_path=current.mmproj_path, n_ctx=current.n_ctx, n_gpu_layers=current.n_gpu_layers, ) save_local_backend_config(config) return local_backend_summary(config) def check_openai_server(url: str) -> dict: server_status = OpenAICompatibleService.status( url.strip() or LocalBackendConfig.openai_compatible_base_url ) return { "backend": server_status.name, "available": server_status.available, "detail": server_status.detail, } prepare.click( save_openai_config, [base_url, model_name], summary, show_progress=CLICK_PROGRESS, ) check.click(check_openai_server, base_url, status, show_progress=CLICK_PROGRESS) def build_ollama_setup_panel(catalog: dict[str, ModelInfo]) -> None: gr.Markdown("### Ollama local setup") selected = gr.Dropdown(list(catalog), value=next(iter(catalog)), label="Model config") ollama_name = gr.Textbox( label="Ollama model name", placeholder="Example: minicpm-v or a local Ollama model tag", ) refresh = gr.Button("List local Ollama models") prepare = gr.Button("Prepare pull command", variant="primary") local_models = gr.JSON(label="Local Ollama models") pull_command = gr.Textbox(label="Ollama pull command", interactive=False) def list_models() -> dict: models = OllamaService.list_local_models() return { "models": models, "note": "Empty means Ollama is not running, not installed, or has no local models.", } def prepare_pull(model_id: str, model_name: str) -> str: name = model_name.strip() or catalog[model_id].hf_id return " ".join(OllamaService.pull_command(name)) refresh.click(list_models, outputs=local_models, show_progress=CLICK_PROGRESS) prepare.click( prepare_pull, [selected, ollama_name], pull_command, show_progress=CLICK_PROGRESS, ) def build_llama_cpp_setup_panel() -> None: gr.Markdown("### llama.cpp local setup") local_config = load_local_backend_config() server_url = gr.Textbox( label="llama-server URL", value=local_config.llama_cpp_server_url, ) server_path = gr.Textbox( label="llama-server executable", value=local_config.llama_server_path, placeholder="C:\\llama-b9587-bin-win-cuda-13.3-x64\\llama-server.exe", ) gguf_path = gr.Textbox( label="GGUF model path", value=local_config.gguf_path, placeholder="C:\\models\\MiniCPM5-1B-Q4_K_M.gguf", ) gguf_file = gr.File(label="Pick GGUF model", file_types=[".gguf"], type="filepath") mmproj_path = gr.Textbox( label="mmproj path", value=local_config.mmproj_path, placeholder="Optional vision projector GGUF path", ) mmproj_file = gr.File(label="Pick mmproj", file_types=[".gguf"], type="filepath") with gr.Row(): n_ctx = gr.Number(label="Context length", value=local_config.n_ctx, precision=0) n_gpu_layers = gr.Number( label="GPU layers", value=local_config.n_gpu_layers, precision=0, ) prepare = gr.Button("Prepare local model config", variant="primary") command = gr.Textbox(label="llama-server command", interactive=False) local_summary = gr.JSON(local_backend_summary(local_config), label="Local backend config") def prepare_local_config( url: str, executable_path: str, model_path: str, model_file: str | None, projector_path: str, projector_file: str | None, context_length: int | float, gpu_layers: int | float, ) -> tuple[str, dict]: current = load_local_backend_config() config = LocalBackendConfig( llama_cpp_server_url=url or "http://127.0.0.1:8080", llama_server_path=executable_path, openai_compatible_base_url=current.openai_compatible_base_url, openai_compatible_model_name=current.openai_compatible_model_name, gguf_path=model_file or model_path, mmproj_path=projector_file or projector_path, n_ctx=int(context_length), n_gpu_layers=int(gpu_layers), ) save_local_backend_config(config) built_command = build_llama_server_command(config) return " ".join(built_command), local_backend_summary(config) prepare.click( prepare_local_config, [ server_url, server_path, gguf_path, gguf_file, mmproj_path, mmproj_file, n_ctx, n_gpu_layers, ], [command, local_summary], show_progress=CLICK_PROGRESS, )