workbench / training /export.py
GitHub Actions
Initial ZeroGPU deployment with spaces shim
7f9dfed
Raw
History Blame Contribute Delete
4.08 kB
from __future__ import annotations
import shutil
from collections.abc import Callable
from dataclasses import dataclass
from pathlib import Path
from typing import Any
from models.model_catalog import ModelInfo
QUANTIZATION_CHOICES = ["F16", "Q4_K_M", "Q5_K_M", "Q8_0"]
@dataclass(frozen=True)
class ToolStatus:
"""Availability of a local export-related tool."""
name: str
available: bool
path: str
@dataclass(frozen=True)
class ExportPlan:
"""Non-executing GGUF export plan."""
model_id: str
hf_id: str
quantization: str
output_dir: str
official_gguf_repo: str
official_gguf_file: str
download_command: list[str]
convert_command: list[str]
quantize_command: list[str]
tool_statuses: list[ToolStatus]
notes: list[str]
def as_dict(self) -> dict[str, Any]:
return {
"model_id": self.model_id,
"hf_id": self.hf_id,
"quantization": self.quantization,
"output_dir": self.output_dir,
"official_gguf_repo": self.official_gguf_repo,
"official_gguf_file": self.official_gguf_file,
"download_command": self.download_command,
"convert_command": self.convert_command,
"quantize_command": self.quantize_command,
"tools": [tool.__dict__ for tool in self.tool_statuses],
"notes": self.notes,
"executes_commands": False,
"startup_downloads": False,
}
def detect_llama_cpp_tools(
which_func: Callable[[str], str | None] = shutil.which,
) -> list[ToolStatus]:
tool_names = ["llama-server", "llama-cli", "llama-quantize"]
return [
ToolStatus(name=name, available=bool(path := which_func(name)), path=path or "")
for name in tool_names
]
def list_exported_files(output_dir: str | Path = "exports") -> list[list[str]]:
root = Path(output_dir)
if not root.exists():
return []
rows: list[list[str]] = []
for path in sorted(root.rglob("*")):
if path.is_file():
rows.append([str(path), str(path.stat().st_size)])
return rows
def build_export_plan(
model: ModelInfo,
quantization: str,
output_dir: str | Path = "exports",
tools: list[ToolStatus] | None = None,
) -> ExportPlan:
if quantization not in QUANTIZATION_CHOICES:
raise ValueError(f"Unsupported quantization: {quantization}")
root = Path(output_dir)
model_output_dir = root / model.config_id
gguf = model.gguf or {}
repo = str(gguf.get("repo", ""))
official_file = str(
gguf.get("main_file")
or f"{model.display_name.replace(' ', '-')}-{quantization}.gguf"
)
base_gguf = model_output_dir / "converted-f16.gguf"
quantized_gguf = model_output_dir / official_file
notes = [
"This plan does not execute downloads, conversion, or quantization.",
"Run commands manually after installing dependencies and verifying paths.",
]
if not repo:
notes.append("No official GGUF repo is configured for this model.")
download_command = []
if repo:
download_command = [
"huggingface-cli",
"download",
repo,
official_file,
"--local-dir",
str(model_output_dir),
]
convert_command = [
"python",
"path\\to\\llama.cpp\\convert_hf_to_gguf.py",
model.hf_id,
"--outfile",
str(base_gguf),
]
quantize_command = [
"llama-quantize",
str(base_gguf),
str(quantized_gguf),
quantization,
]
return ExportPlan(
model_id=model.config_id,
hf_id=model.hf_id,
quantization=quantization,
output_dir=str(model_output_dir),
official_gguf_repo=repo,
official_gguf_file=official_file,
download_command=download_command,
convert_command=convert_command,
quantize_command=quantize_command,
tool_statuses=tools or detect_llama_cpp_tools(),
notes=notes,
)