Spaces:

dispatchAI
/

gguf-converter

Runtime error

App Files Files Community

gguf-converter / app.py

3morixd

Upload app.py with huggingface_hub

b26c68a verified 9 days ago

Raw

History Blame Contribute Delete

4.77 kB

	import gradio as gr
	import os
	import subprocess
	import tempfile
	import shutil
	from huggingface_hub import HfApi, snapshot_download, upload_file
	import spaces

	QUANT_LEVELS = {
	"Q2_K": "q2_k",
	"Q3_K_M": "q3_k_m",
	"Q4_K_M": "q4_k_m",
	"Q5_K_M": "q5_k_m",
	"Q6_K": "q6_k",
	"Q8_0": "q8_0",
	"F16": "f16",
	}

	@spaces.GPU(duration=300)
	def convert_model(model_id, quant_levels, hf_token, progress=gr.Progress()):
	"""Convert a HF model to GGUF format with specified quant levels."""
	if not model_id:
	return "❌ Please enter a model ID", ""
	if not hf_token:
	return "❌ Please enter your HF token", ""

	results = []
	api = HfApi(token=hf_token)

	progress(0.1, desc="Downloading model...")
	try:
	model_path = snapshot_download(model_id, token=hf_token,
	ignore_patterns=[".gguf", ".pth", "*.bin"])
	except Exception as e:
	return f"❌ Download failed: {e}", ""

	for i, (q_name, q_code) in enumerate(quant_levels):
	progress(0.2 + 0.7 * (i / len(quant_levels)), desc=f"Converting {q_name}...")

	output_file = f"/tmp/{model_id.replace('/', '_')}_{q_name}.gguf"

	try:
	# Convert using llama.cpp's convert script
	cmd = [
	"python", "llama.cpp/convert_hf_to_gguf.py",
	model_path,
	"--outtype", q_code,
	"--outfile", output_file
	]
	result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)

	if result.returncode != 0:
	results.append(f"❌ {q_name}: conversion failed - {result.stderr[:200]}")
	continue

	file_size = os.path.getsize(output_file) / 1024 / 1024
	results.append(f"✅ {q_name}: {file_size:.0f}MB")

	# Upload to the model repo
	filename = f"{q_name.lower()}.gguf"
	upload_file(
	path_or_fileobj=output_file,
	path_in_repo=filename,
	repo_id=model_id,
	token=hf_token,
	)
	results.append(f" → Uploaded as {filename}")

	# Cleanup
	os.remove(output_file)

	except subprocess.TimeoutExpired:
	results.append(f"❌ {q_name}: conversion timed out")
	except Exception as e:
	results.append(f"❌ {q_name}: {e}")

	# Cleanup
	shutil.rmtree(model_path, ignore_errors=True)

	return "\n".join(results), "\n".join(results)

	with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), title="dispatchAI GGUF Converter") as demo:
	gr.Markdown("""
	# 🔄 dispatchAI GGUF Converter

	Convert any HuggingFace model to GGUF format with multiple quantization levels.
	Runs on ZeroGPU — free, fast, no local compute needed.

	## How it works
	1. Enter the model ID (e.g., `dispatchAI/SmolLM2-135M-Instruct-mobile`)
	2. Select quant levels (Q4_K_M is the sweet spot for mobile)
	3. Enter your HF token (needs write access to the model repo)
	4. Click Convert — the GGUF files will be uploaded to the model repo

	## Quant Level Guide
	\| Level \| Size vs FP16 \| Quality \| Use Case \|
	\|-------\|-------------\|---------\|----------\|
	\| Q2_K \| ~25% \| Low \| Ultra-low RAM (1GB devices) \|
	\| Q3_K_M \| ~30% \| Fair \| Very constrained devices \|
	\| Q4_K_M \| ~40% \| Good \| Sweet spot for mobile \|
	\| Q5_K_M \| ~50% \| Very Good \| Quality-sensitive mobile \|
	\| Q6_K \| ~60% \| Excellent \| Near-lossless mobile \|
	\| Q8_0 \| ~70% \| Excellent \| High-quality, smaller than FP16 \|
	\| F16 \| 100% \| Lossless \| Reference / debugging \|
	""")

	with gr.Row():
	model_input = gr.Textbox(
	label="Model ID",
	placeholder="dispatchAI/SmolLM2-135M-Instruct-mobile",
	scale=3
	)
	token_input = gr.Textbox(
	label="HF Token (write access)",
	type="password",
	scale=2
	)

	quant_checkboxes = gr.CheckboxGroup(
	choices=list(QUANT_LEVELS.keys()),
	value=["Q4_K_M", "Q5_K_M", "Q8_0"],
	label="Quantization Levels",
	)

	convert_btn = gr.Button("🔄 Convert", variant="primary", size="lg")

	output = gr.Textbox(label="Results", lines=15)

	convert_btn.click(
	fn=convert_model,
	inputs=[model_input, quant_checkboxes, token_input],
	outputs=[output, output]
	)

	gr.Markdown("""
	---
	🚀 [dispatchAI](https://huggingface.co/dispatchAI) — Small. Mobile. Free. UAE-built.
	""")

	if __name__ == "__main__":
	demo.launch()