Spaces:

build-small-hackathon
/

workbench

Sleeping

GitHub Actions

Initial ZeroGPU deployment with spaces shim

7f9dfed 17 days ago

4.08 kB

	from __future__ import annotations

	import shutil
	from collections.abc import Callable
	from dataclasses import dataclass
	from pathlib import Path
	from typing import Any

	from models.model_catalog import ModelInfo

	QUANTIZATION_CHOICES = ["F16", "Q4_K_M", "Q5_K_M", "Q8_0"]


	@dataclass(frozen=True)
	class ToolStatus:
	"""Availability of a local export-related tool."""

	name: str
	available: bool
	path: str


	@dataclass(frozen=True)
	class ExportPlan:
	"""Non-executing GGUF export plan."""

	model_id: str
	hf_id: str
	quantization: str
	output_dir: str
	official_gguf_repo: str
	official_gguf_file: str
	download_command: list[str]
	convert_command: list[str]
	quantize_command: list[str]
	tool_statuses: list[ToolStatus]
	notes: list[str]

	def as_dict(self) -> dict[str, Any]:
	return {
	"model_id": self.model_id,
	"hf_id": self.hf_id,
	"quantization": self.quantization,
	"output_dir": self.output_dir,
	"official_gguf_repo": self.official_gguf_repo,
	"official_gguf_file": self.official_gguf_file,
	"download_command": self.download_command,
	"convert_command": self.convert_command,
	"quantize_command": self.quantize_command,
	"tools": [tool.__dict__ for tool in self.tool_statuses],
	"notes": self.notes,
	"executes_commands": False,
	"startup_downloads": False,
	}


	def detect_llama_cpp_tools(
	which_func: Callable[[str], str \| None] = shutil.which,
	) -> list[ToolStatus]:
	tool_names = ["llama-server", "llama-cli", "llama-quantize"]
	return [
	ToolStatus(name=name, available=bool(path := which_func(name)), path=path or "")
	for name in tool_names
	]


	def list_exported_files(output_dir: str \| Path = "exports") -> list[list[str]]:
	root = Path(output_dir)
	if not root.exists():
	return []

	rows: list[list[str]] = []
	for path in sorted(root.rglob("*")):
	if path.is_file():
	rows.append([str(path), str(path.stat().st_size)])
	return rows


	def build_export_plan(
	model: ModelInfo,
	quantization: str,
	output_dir: str \| Path = "exports",
	tools: list[ToolStatus] \| None = None,
	) -> ExportPlan:
	if quantization not in QUANTIZATION_CHOICES:
	raise ValueError(f"Unsupported quantization: {quantization}")

	root = Path(output_dir)
	model_output_dir = root / model.config_id
	gguf = model.gguf or {}
	repo = str(gguf.get("repo", ""))
	official_file = str(
	gguf.get("main_file")
	or f"{model.display_name.replace(' ', '-')}-{quantization}.gguf"
	)
	base_gguf = model_output_dir / "converted-f16.gguf"
	quantized_gguf = model_output_dir / official_file

	notes = [
	"This plan does not execute downloads, conversion, or quantization.",
	"Run commands manually after installing dependencies and verifying paths.",
	]
	if not repo:
	notes.append("No official GGUF repo is configured for this model.")

	download_command = []
	if repo:
	download_command = [
	"huggingface-cli",
	"download",
	repo,
	official_file,
	"--local-dir",
	str(model_output_dir),
	]

	convert_command = [
	"python",
	"path\\to\\llama.cpp\\convert_hf_to_gguf.py",
	model.hf_id,
	"--outfile",
	str(base_gguf),
	]

	quantize_command = [
	"llama-quantize",
	str(base_gguf),
	str(quantized_gguf),
	quantization,
	]

	return ExportPlan(
	model_id=model.config_id,
	hf_id=model.hf_id,
	quantization=quantization,
	output_dir=str(model_output_dir),
	official_gguf_repo=repo,
	official_gguf_file=official_file,
	download_command=download_command,
	convert_command=convert_command,
	quantize_command=quantize_command,
	tool_statuses=tools or detect_llama_cpp_tools(),
	notes=notes,
	)