gguf-converter / app.py
3morixd's picture
Upload app.py with huggingface_hub
b26c68a verified
Raw
History Blame Contribute Delete
4.77 kB
import gradio as gr
import os
import subprocess
import tempfile
import shutil
from huggingface_hub import HfApi, snapshot_download, upload_file
import spaces
QUANT_LEVELS = {
"Q2_K": "q2_k",
"Q3_K_M": "q3_k_m",
"Q4_K_M": "q4_k_m",
"Q5_K_M": "q5_k_m",
"Q6_K": "q6_k",
"Q8_0": "q8_0",
"F16": "f16",
}
@spaces.GPU(duration=300)
def convert_model(model_id, quant_levels, hf_token, progress=gr.Progress()):
"""Convert a HF model to GGUF format with specified quant levels."""
if not model_id:
return "❌ Please enter a model ID", ""
if not hf_token:
return "❌ Please enter your HF token", ""
results = []
api = HfApi(token=hf_token)
progress(0.1, desc="Downloading model...")
try:
model_path = snapshot_download(model_id, token=hf_token,
ignore_patterns=["*.gguf", "*.pth", "*.bin"])
except Exception as e:
return f"❌ Download failed: {e}", ""
for i, (q_name, q_code) in enumerate(quant_levels):
progress(0.2 + 0.7 * (i / len(quant_levels)), desc=f"Converting {q_name}...")
output_file = f"/tmp/{model_id.replace('/', '_')}_{q_name}.gguf"
try:
# Convert using llama.cpp's convert script
cmd = [
"python", "llama.cpp/convert_hf_to_gguf.py",
model_path,
"--outtype", q_code,
"--outfile", output_file
]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
if result.returncode != 0:
results.append(f"❌ {q_name}: conversion failed - {result.stderr[:200]}")
continue
file_size = os.path.getsize(output_file) / 1024 / 1024
results.append(f"βœ… {q_name}: {file_size:.0f}MB")
# Upload to the model repo
filename = f"{q_name.lower()}.gguf"
upload_file(
path_or_fileobj=output_file,
path_in_repo=filename,
repo_id=model_id,
token=hf_token,
)
results.append(f" β†’ Uploaded as {filename}")
# Cleanup
os.remove(output_file)
except subprocess.TimeoutExpired:
results.append(f"❌ {q_name}: conversion timed out")
except Exception as e:
results.append(f"❌ {q_name}: {e}")
# Cleanup
shutil.rmtree(model_path, ignore_errors=True)
return "\n".join(results), "\n".join(results)
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), title="dispatchAI GGUF Converter") as demo:
gr.Markdown("""
# πŸ”„ dispatchAI GGUF Converter
Convert any HuggingFace model to GGUF format with multiple quantization levels.
Runs on ZeroGPU β€” free, fast, no local compute needed.
## How it works
1. Enter the model ID (e.g., `dispatchAI/SmolLM2-135M-Instruct-mobile`)
2. Select quant levels (Q4_K_M is the sweet spot for mobile)
3. Enter your HF token (needs write access to the model repo)
4. Click Convert β€” the GGUF files will be uploaded to the model repo
## Quant Level Guide
| Level | Size vs FP16 | Quality | Use Case |
|-------|-------------|---------|----------|
| Q2_K | ~25% | Low | Ultra-low RAM (1GB devices) |
| Q3_K_M | ~30% | Fair | Very constrained devices |
| Q4_K_M | ~40% | Good | **Sweet spot for mobile** |
| Q5_K_M | ~50% | Very Good | Quality-sensitive mobile |
| Q6_K | ~60% | Excellent | Near-lossless mobile |
| Q8_0 | ~70% | Excellent | High-quality, smaller than FP16 |
| F16 | 100% | Lossless | Reference / debugging |
""")
with gr.Row():
model_input = gr.Textbox(
label="Model ID",
placeholder="dispatchAI/SmolLM2-135M-Instruct-mobile",
scale=3
)
token_input = gr.Textbox(
label="HF Token (write access)",
type="password",
scale=2
)
quant_checkboxes = gr.CheckboxGroup(
choices=list(QUANT_LEVELS.keys()),
value=["Q4_K_M", "Q5_K_M", "Q8_0"],
label="Quantization Levels",
)
convert_btn = gr.Button("πŸ”„ Convert", variant="primary", size="lg")
output = gr.Textbox(label="Results", lines=15)
convert_btn.click(
fn=convert_model,
inputs=[model_input, quant_checkboxes, token_input],
outputs=[output, output]
)
gr.Markdown("""
---
πŸš€ [dispatchAI](https://huggingface.co/dispatchAI) β€” Small. Mobile. Free. UAE-built.
""")
if __name__ == "__main__":
demo.launch()