Spaces:

build-small-hackathon
/

workbench

Running on Zero

File size: 4,075 Bytes

7f9dfed

from __future__ import annotations

import shutil
from collections.abc import Callable
from dataclasses import dataclass
from pathlib import Path
from typing import Any

from models.model_catalog import ModelInfo

QUANTIZATION_CHOICES = ["F16", "Q4_K_M", "Q5_K_M", "Q8_0"]


@dataclass(frozen=True)
class ToolStatus:
    """Availability of a local export-related tool."""

    name: str
    available: bool
    path: str


@dataclass(frozen=True)
class ExportPlan:
    """Non-executing GGUF export plan."""

    model_id: str
    hf_id: str
    quantization: str
    output_dir: str
    official_gguf_repo: str
    official_gguf_file: str
    download_command: list[str]
    convert_command: list[str]
    quantize_command: list[str]
    tool_statuses: list[ToolStatus]
    notes: list[str]

    def as_dict(self) -> dict[str, Any]:
        return {
            "model_id": self.model_id,
            "hf_id": self.hf_id,
            "quantization": self.quantization,
            "output_dir": self.output_dir,
            "official_gguf_repo": self.official_gguf_repo,
            "official_gguf_file": self.official_gguf_file,
            "download_command": self.download_command,
            "convert_command": self.convert_command,
            "quantize_command": self.quantize_command,
            "tools": [tool.__dict__ for tool in self.tool_statuses],
            "notes": self.notes,
            "executes_commands": False,
            "startup_downloads": False,
        }


def detect_llama_cpp_tools(
    which_func: Callable[[str], str | None] = shutil.which,
) -> list[ToolStatus]:
    tool_names = ["llama-server", "llama-cli", "llama-quantize"]
    return [
        ToolStatus(name=name, available=bool(path := which_func(name)), path=path or "")
        for name in tool_names
    ]


def list_exported_files(output_dir: str | Path = "exports") -> list[list[str]]:
    root = Path(output_dir)
    if not root.exists():
        return []

    rows: list[list[str]] = []
    for path in sorted(root.rglob("*")):
        if path.is_file():
            rows.append([str(path), str(path.stat().st_size)])
    return rows


def build_export_plan(
    model: ModelInfo,
    quantization: str,
    output_dir: str | Path = "exports",
    tools: list[ToolStatus] | None = None,
) -> ExportPlan:
    if quantization not in QUANTIZATION_CHOICES:
        raise ValueError(f"Unsupported quantization: {quantization}")

    root = Path(output_dir)
    model_output_dir = root / model.config_id
    gguf = model.gguf or {}
    repo = str(gguf.get("repo", ""))
    official_file = str(
        gguf.get("main_file")
        or f"{model.display_name.replace(' ', '-')}-{quantization}.gguf"
    )
    base_gguf = model_output_dir / "converted-f16.gguf"
    quantized_gguf = model_output_dir / official_file

    notes = [
        "This plan does not execute downloads, conversion, or quantization.",
        "Run commands manually after installing dependencies and verifying paths.",
    ]
    if not repo:
        notes.append("No official GGUF repo is configured for this model.")

    download_command = []
    if repo:
        download_command = [
            "huggingface-cli",
            "download",
            repo,
            official_file,
            "--local-dir",
            str(model_output_dir),
        ]

    convert_command = [
        "python",
        "path\\to\\llama.cpp\\convert_hf_to_gguf.py",
        model.hf_id,
        "--outfile",
        str(base_gguf),
    ]

    quantize_command = [
        "llama-quantize",
        str(base_gguf),
        str(quantized_gguf),
        quantization,
    ]

    return ExportPlan(
        model_id=model.config_id,
        hf_id=model.hf_id,
        quantization=quantization,
        output_dir=str(model_output_dir),
        official_gguf_repo=repo,
        official_gguf_file=official_file,
        download_command=download_command,
        convert_command=convert_command,
        quantize_command=quantize_command,
        tool_statuses=tools or detect_llama_cpp_tools(),
        notes=notes,
    )