Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import os | |
| import subprocess | |
| import tempfile | |
| import shutil | |
| from huggingface_hub import HfApi, snapshot_download, upload_file | |
| import spaces | |
| QUANT_LEVELS = { | |
| "Q2_K": "q2_k", | |
| "Q3_K_M": "q3_k_m", | |
| "Q4_K_M": "q4_k_m", | |
| "Q5_K_M": "q5_k_m", | |
| "Q6_K": "q6_k", | |
| "Q8_0": "q8_0", | |
| "F16": "f16", | |
| } | |
| def convert_model(model_id, quant_levels, hf_token, progress=gr.Progress()): | |
| """Convert a HF model to GGUF format with specified quant levels.""" | |
| if not model_id: | |
| return "β Please enter a model ID", "" | |
| if not hf_token: | |
| return "β Please enter your HF token", "" | |
| results = [] | |
| api = HfApi(token=hf_token) | |
| progress(0.1, desc="Downloading model...") | |
| try: | |
| model_path = snapshot_download(model_id, token=hf_token, | |
| ignore_patterns=["*.gguf", "*.pth", "*.bin"]) | |
| except Exception as e: | |
| return f"β Download failed: {e}", "" | |
| for i, (q_name, q_code) in enumerate(quant_levels): | |
| progress(0.2 + 0.7 * (i / len(quant_levels)), desc=f"Converting {q_name}...") | |
| output_file = f"/tmp/{model_id.replace('/', '_')}_{q_name}.gguf" | |
| try: | |
| # Convert using llama.cpp's convert script | |
| cmd = [ | |
| "python", "llama.cpp/convert_hf_to_gguf.py", | |
| model_path, | |
| "--outtype", q_code, | |
| "--outfile", output_file | |
| ] | |
| result = subprocess.run(cmd, capture_output=True, text=True, timeout=300) | |
| if result.returncode != 0: | |
| results.append(f"β {q_name}: conversion failed - {result.stderr[:200]}") | |
| continue | |
| file_size = os.path.getsize(output_file) / 1024 / 1024 | |
| results.append(f"β {q_name}: {file_size:.0f}MB") | |
| # Upload to the model repo | |
| filename = f"{q_name.lower()}.gguf" | |
| upload_file( | |
| path_or_fileobj=output_file, | |
| path_in_repo=filename, | |
| repo_id=model_id, | |
| token=hf_token, | |
| ) | |
| results.append(f" β Uploaded as {filename}") | |
| # Cleanup | |
| os.remove(output_file) | |
| except subprocess.TimeoutExpired: | |
| results.append(f"β {q_name}: conversion timed out") | |
| except Exception as e: | |
| results.append(f"β {q_name}: {e}") | |
| # Cleanup | |
| shutil.rmtree(model_path, ignore_errors=True) | |
| return "\n".join(results), "\n".join(results) | |
| with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), title="dispatchAI GGUF Converter") as demo: | |
| gr.Markdown(""" | |
| # π dispatchAI GGUF Converter | |
| Convert any HuggingFace model to GGUF format with multiple quantization levels. | |
| Runs on ZeroGPU β free, fast, no local compute needed. | |
| ## How it works | |
| 1. Enter the model ID (e.g., `dispatchAI/SmolLM2-135M-Instruct-mobile`) | |
| 2. Select quant levels (Q4_K_M is the sweet spot for mobile) | |
| 3. Enter your HF token (needs write access to the model repo) | |
| 4. Click Convert β the GGUF files will be uploaded to the model repo | |
| ## Quant Level Guide | |
| | Level | Size vs FP16 | Quality | Use Case | | |
| |-------|-------------|---------|----------| | |
| | Q2_K | ~25% | Low | Ultra-low RAM (1GB devices) | | |
| | Q3_K_M | ~30% | Fair | Very constrained devices | | |
| | Q4_K_M | ~40% | Good | **Sweet spot for mobile** | | |
| | Q5_K_M | ~50% | Very Good | Quality-sensitive mobile | | |
| | Q6_K | ~60% | Excellent | Near-lossless mobile | | |
| | Q8_0 | ~70% | Excellent | High-quality, smaller than FP16 | | |
| | F16 | 100% | Lossless | Reference / debugging | | |
| """) | |
| with gr.Row(): | |
| model_input = gr.Textbox( | |
| label="Model ID", | |
| placeholder="dispatchAI/SmolLM2-135M-Instruct-mobile", | |
| scale=3 | |
| ) | |
| token_input = gr.Textbox( | |
| label="HF Token (write access)", | |
| type="password", | |
| scale=2 | |
| ) | |
| quant_checkboxes = gr.CheckboxGroup( | |
| choices=list(QUANT_LEVELS.keys()), | |
| value=["Q4_K_M", "Q5_K_M", "Q8_0"], | |
| label="Quantization Levels", | |
| ) | |
| convert_btn = gr.Button("π Convert", variant="primary", size="lg") | |
| output = gr.Textbox(label="Results", lines=15) | |
| convert_btn.click( | |
| fn=convert_model, | |
| inputs=[model_input, quant_checkboxes, token_input], | |
| outputs=[output, output] | |
| ) | |
| gr.Markdown(""" | |
| --- | |
| π [dispatchAI](https://huggingface.co/dispatchAI) β Small. Mobile. Free. UAE-built. | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() | |