Spaces:
Running on Zero
Running on Zero
File size: 4,075 Bytes
7f9dfed | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 | from __future__ import annotations
import shutil
from collections.abc import Callable
from dataclasses import dataclass
from pathlib import Path
from typing import Any
from models.model_catalog import ModelInfo
QUANTIZATION_CHOICES = ["F16", "Q4_K_M", "Q5_K_M", "Q8_0"]
@dataclass(frozen=True)
class ToolStatus:
"""Availability of a local export-related tool."""
name: str
available: bool
path: str
@dataclass(frozen=True)
class ExportPlan:
"""Non-executing GGUF export plan."""
model_id: str
hf_id: str
quantization: str
output_dir: str
official_gguf_repo: str
official_gguf_file: str
download_command: list[str]
convert_command: list[str]
quantize_command: list[str]
tool_statuses: list[ToolStatus]
notes: list[str]
def as_dict(self) -> dict[str, Any]:
return {
"model_id": self.model_id,
"hf_id": self.hf_id,
"quantization": self.quantization,
"output_dir": self.output_dir,
"official_gguf_repo": self.official_gguf_repo,
"official_gguf_file": self.official_gguf_file,
"download_command": self.download_command,
"convert_command": self.convert_command,
"quantize_command": self.quantize_command,
"tools": [tool.__dict__ for tool in self.tool_statuses],
"notes": self.notes,
"executes_commands": False,
"startup_downloads": False,
}
def detect_llama_cpp_tools(
which_func: Callable[[str], str | None] = shutil.which,
) -> list[ToolStatus]:
tool_names = ["llama-server", "llama-cli", "llama-quantize"]
return [
ToolStatus(name=name, available=bool(path := which_func(name)), path=path or "")
for name in tool_names
]
def list_exported_files(output_dir: str | Path = "exports") -> list[list[str]]:
root = Path(output_dir)
if not root.exists():
return []
rows: list[list[str]] = []
for path in sorted(root.rglob("*")):
if path.is_file():
rows.append([str(path), str(path.stat().st_size)])
return rows
def build_export_plan(
model: ModelInfo,
quantization: str,
output_dir: str | Path = "exports",
tools: list[ToolStatus] | None = None,
) -> ExportPlan:
if quantization not in QUANTIZATION_CHOICES:
raise ValueError(f"Unsupported quantization: {quantization}")
root = Path(output_dir)
model_output_dir = root / model.config_id
gguf = model.gguf or {}
repo = str(gguf.get("repo", ""))
official_file = str(
gguf.get("main_file")
or f"{model.display_name.replace(' ', '-')}-{quantization}.gguf"
)
base_gguf = model_output_dir / "converted-f16.gguf"
quantized_gguf = model_output_dir / official_file
notes = [
"This plan does not execute downloads, conversion, or quantization.",
"Run commands manually after installing dependencies and verifying paths.",
]
if not repo:
notes.append("No official GGUF repo is configured for this model.")
download_command = []
if repo:
download_command = [
"huggingface-cli",
"download",
repo,
official_file,
"--local-dir",
str(model_output_dir),
]
convert_command = [
"python",
"path\\to\\llama.cpp\\convert_hf_to_gguf.py",
model.hf_id,
"--outfile",
str(base_gguf),
]
quantize_command = [
"llama-quantize",
str(base_gguf),
str(quantized_gguf),
quantization,
]
return ExportPlan(
model_id=model.config_id,
hf_id=model.hf_id,
quantization=quantization,
output_dir=str(model_output_dir),
official_gguf_repo=repo,
official_gguf_file=official_file,
download_command=download_command,
convert_command=convert_command,
quantize_command=quantize_command,
tool_statuses=tools or detect_llama_cpp_tools(),
notes=notes,
)
|