Spaces:

build-small-hackathon
/

workbench

Running on Zero

GitHub Actions

Initial ZeroGPU deployment with spaces shim

7f9dfed 23 days ago

4.58 kB

	from __future__ import annotations

	import importlib.util
	import shutil
	from collections.abc import Callable
	from dataclasses import asdict, dataclass
	from typing import Any

	import requests

	from models.base import BackendStatus
	from models.model_catalog import ModelInfo
	from models.server_helpers import host_port_args, request_openai_chat_text


	@dataclass(frozen=True)
	class SGLangConfig:
	base_url: str = "http://127.0.0.1:30000"
	host: str = "127.0.0.1"
	port: int = 30000
	tp_size: int = 1
	tool_parser: str = "minicpm"
	temperature: float = 0.7
	max_tokens: int = 512
	timeout_seconds: float = 120


	@dataclass(frozen=True)
	class SGLangRunPlan:
	start_command: list[str]
	stop_url: str
	health_url: str
	chat_url: str
	startup_downloads: bool
	auto_model_load: bool
	notes: list[str]

	def to_dict(self) -> dict[str, Any]:
	return asdict(self)


	class SGLangService:
	"""SGLang OpenAI-compatible client and local server command planner."""

	def __init__(
	self,
	model: ModelInfo,
	config: SGLangConfig \| None = None,
	get_func: Callable[..., requests.Response] = requests.get,
	post_func: Callable[..., requests.Response] = requests.post,
	) -> None:
	self.model = model
	self.config = config or SGLangConfig()
	self.get_func = get_func
	self.post_func = post_func

	@staticmethod
	def status(
	base_url: str = "http://127.0.0.1:30000",
	which_func: Callable[[str], str \| None] = shutil.which,
	find_spec: Callable[[str], object \| None] = importlib.util.find_spec,
	get_func: Callable[..., requests.Response] = requests.get,
	) -> BackendStatus:
	has_cli = which_func("python") is not None and find_spec("sglang") is not None
	try:
	response = get_func(f"{base_url.rstrip('/')}/health", timeout=2)
	except requests.RequestException as exc:
	detail = f"SGLang server is not reachable: {exc}"
	if not has_cli:
	detail += " Python package sglang is also not installed."
	return BackendStatus("sglang", False, detail)
	if response.ok:
	return BackendStatus("sglang", True, "SGLang server is reachable.")
	return BackendStatus("sglang", False, f"SGLang responded with HTTP {response.status_code}.")

	def run_plan(self) -> SGLangRunPlan:
	return build_sglang_run_plan(self.model, self.config)

	def chat(self, system_prompt: str, user_prompt: str) -> str:
	status = self.status(self.config.base_url, get_func=self.get_func)
	if not status.available:
	return (
	"[SGLang unavailable]\n\n"
	f"{status.detail}\n\n"
	"Install SGLang, start the planned local server command, then retry."
	)

	return request_openai_chat_text(
	self.post_func,
	self.config.base_url,
	self.model.hf_id,
	system_prompt,
	user_prompt,
	self.config.temperature,
	self.config.max_tokens,
	self.config.timeout_seconds,
	"SGLang",
	)

	def stop_server(self) -> str:
	try:
	response = self.post_func(
	f"{self.config.base_url.rstrip('/')}/shutdown",
	timeout=5,
	)
	response.raise_for_status()
	except requests.RequestException as exc:
	return f"[SGLang stop failed]\n\n{exc}"
	return "SGLang shutdown request sent."


	def build_sglang_run_plan(
	model: ModelInfo,
	config: SGLangConfig \| None = None,
	) -> SGLangRunPlan:
	cfg = config or SGLangConfig()
	base_url = cfg.base_url.rstrip("/")
	return SGLangRunPlan(
	start_command=[
	"python",
	"-m",
	"sglang.launch_server",
	"--model-path",
	model.hf_id,
	*host_port_args(cfg.host, cfg.port),
	"--tp-size",
	str(cfg.tp_size),
	"--tool-call-parser",
	cfg.tool_parser,
	],
	stop_url=f"{base_url}/shutdown",
	health_url=f"{base_url}/health",
	chat_url=f"{base_url}/v1/chat/completions",
	startup_downloads=False,
	auto_model_load=False,
	notes=[
	"The workbench does not start SGLang on app startup.",
	"Run the command explicitly after installing SGLang and choosing hardware.",
	"MiniCPM tool parsing is configured through --tool-call-parser.",
	],
	)