File size: 4,075 Bytes
7f9dfed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
from __future__ import annotations

import shutil
from collections.abc import Callable
from dataclasses import dataclass
from pathlib import Path
from typing import Any

from models.model_catalog import ModelInfo

QUANTIZATION_CHOICES = ["F16", "Q4_K_M", "Q5_K_M", "Q8_0"]


@dataclass(frozen=True)
class ToolStatus:
    """Availability of a local export-related tool."""

    name: str
    available: bool
    path: str


@dataclass(frozen=True)
class ExportPlan:
    """Non-executing GGUF export plan."""

    model_id: str
    hf_id: str
    quantization: str
    output_dir: str
    official_gguf_repo: str
    official_gguf_file: str
    download_command: list[str]
    convert_command: list[str]
    quantize_command: list[str]
    tool_statuses: list[ToolStatus]
    notes: list[str]

    def as_dict(self) -> dict[str, Any]:
        return {
            "model_id": self.model_id,
            "hf_id": self.hf_id,
            "quantization": self.quantization,
            "output_dir": self.output_dir,
            "official_gguf_repo": self.official_gguf_repo,
            "official_gguf_file": self.official_gguf_file,
            "download_command": self.download_command,
            "convert_command": self.convert_command,
            "quantize_command": self.quantize_command,
            "tools": [tool.__dict__ for tool in self.tool_statuses],
            "notes": self.notes,
            "executes_commands": False,
            "startup_downloads": False,
        }


def detect_llama_cpp_tools(
    which_func: Callable[[str], str | None] = shutil.which,
) -> list[ToolStatus]:
    tool_names = ["llama-server", "llama-cli", "llama-quantize"]
    return [
        ToolStatus(name=name, available=bool(path := which_func(name)), path=path or "")
        for name in tool_names
    ]


def list_exported_files(output_dir: str | Path = "exports") -> list[list[str]]:
    root = Path(output_dir)
    if not root.exists():
        return []

    rows: list[list[str]] = []
    for path in sorted(root.rglob("*")):
        if path.is_file():
            rows.append([str(path), str(path.stat().st_size)])
    return rows


def build_export_plan(
    model: ModelInfo,
    quantization: str,
    output_dir: str | Path = "exports",
    tools: list[ToolStatus] | None = None,
) -> ExportPlan:
    if quantization not in QUANTIZATION_CHOICES:
        raise ValueError(f"Unsupported quantization: {quantization}")

    root = Path(output_dir)
    model_output_dir = root / model.config_id
    gguf = model.gguf or {}
    repo = str(gguf.get("repo", ""))
    official_file = str(
        gguf.get("main_file")
        or f"{model.display_name.replace(' ', '-')}-{quantization}.gguf"
    )
    base_gguf = model_output_dir / "converted-f16.gguf"
    quantized_gguf = model_output_dir / official_file

    notes = [
        "This plan does not execute downloads, conversion, or quantization.",
        "Run commands manually after installing dependencies and verifying paths.",
    ]
    if not repo:
        notes.append("No official GGUF repo is configured for this model.")

    download_command = []
    if repo:
        download_command = [
            "huggingface-cli",
            "download",
            repo,
            official_file,
            "--local-dir",
            str(model_output_dir),
        ]

    convert_command = [
        "python",
        "path\\to\\llama.cpp\\convert_hf_to_gguf.py",
        model.hf_id,
        "--outfile",
        str(base_gguf),
    ]

    quantize_command = [
        "llama-quantize",
        str(base_gguf),
        str(quantized_gguf),
        quantization,
    ]

    return ExportPlan(
        model_id=model.config_id,
        hf_id=model.hf_id,
        quantization=quantization,
        output_dir=str(model_output_dir),
        official_gguf_repo=repo,
        official_gguf_file=official_file,
        download_command=download_command,
        convert_command=convert_command,
        quantize_command=quantize_command,
        tool_statuses=tools or detect_llama_cpp_tools(),
        notes=notes,
    )