import gradio as gr
import os
import subprocess
import tempfile
import shutil
from huggingface_hub import HfApi, snapshot_download, upload_file
import spaces

QUANT_LEVELS = {
    "Q2_K": "q2_k",
    "Q3_K_M": "q3_k_m", 
    "Q4_K_M": "q4_k_m",
    "Q5_K_M": "q5_k_m",
    "Q6_K": "q6_k",
    "Q8_0": "q8_0",
    "F16": "f16",
}

@spaces.GPU(duration=300)
def convert_model(model_id, quant_levels, hf_token, progress=gr.Progress()):
    """Convert a HF model to GGUF format with specified quant levels."""
    if not model_id:
        return "❌ Please enter a model ID", ""
    if not hf_token:
        return "❌ Please enter your HF token", ""
    
    results = []
    api = HfApi(token=hf_token)
    
    progress(0.1, desc="Downloading model...")
    try:
        model_path = snapshot_download(model_id, token=hf_token, 
                                        ignore_patterns=["*.gguf", "*.pth", "*.bin"])
    except Exception as e:
        return f"❌ Download failed: {e}", ""
    
    for i, (q_name, q_code) in enumerate(quant_levels):
        progress(0.2 + 0.7 * (i / len(quant_levels)), desc=f"Converting {q_name}...")
        
        output_file = f"/tmp/{model_id.replace('/', '_')}_{q_name}.gguf"
        
        try:
            # Convert using llama.cpp's convert script
            cmd = [
                "python", "llama.cpp/convert_hf_to_gguf.py",
                model_path,
                "--outtype", q_code,
                "--outfile", output_file
            ]
            result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
            
            if result.returncode != 0:
                results.append(f"❌ {q_name}: conversion failed - {result.stderr[:200]}")
                continue
            
            file_size = os.path.getsize(output_file) / 1024 / 1024
            results.append(f"✅ {q_name}: {file_size:.0f}MB")
            
            # Upload to the model repo
            filename = f"{q_name.lower()}.gguf"
            upload_file(
                path_or_fileobj=output_file,
                path_in_repo=filename,
                repo_id=model_id,
                token=hf_token,
            )
            results.append(f"   → Uploaded as {filename}")
            
            # Cleanup
            os.remove(output_file)
            
        except subprocess.TimeoutExpired:
            results.append(f"❌ {q_name}: conversion timed out")
        except Exception as e:
            results.append(f"❌ {q_name}: {e}")
    
    # Cleanup
    shutil.rmtree(model_path, ignore_errors=True)
    
    return "\n".join(results), "\n".join(results)

with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), title="dispatchAI GGUF Converter") as demo:
    gr.Markdown("""
    # 🔄 dispatchAI GGUF Converter
    
    Convert any HuggingFace model to GGUF format with multiple quantization levels.
    Runs on ZeroGPU — free, fast, no local compute needed.
    
    ## How it works
    1. Enter the model ID (e.g., `dispatchAI/SmolLM2-135M-Instruct-mobile`)
    2. Select quant levels (Q4_K_M is the sweet spot for mobile)
    3. Enter your HF token (needs write access to the model repo)
    4. Click Convert — the GGUF files will be uploaded to the model repo
    
    ## Quant Level Guide
    | Level | Size vs FP16 | Quality | Use Case |
    |-------|-------------|---------|----------|
    | Q2_K | ~25% | Low | Ultra-low RAM (1GB devices) |
    | Q3_K_M | ~30% | Fair | Very constrained devices |
    | Q4_K_M | ~40% | Good | **Sweet spot for mobile** |
    | Q5_K_M | ~50% | Very Good | Quality-sensitive mobile |
    | Q6_K | ~60% | Excellent | Near-lossless mobile |
    | Q8_0 | ~70% | Excellent | High-quality, smaller than FP16 |
    | F16 | 100% | Lossless | Reference / debugging |
    """)
    
    with gr.Row():
        model_input = gr.Textbox(
            label="Model ID", 
            placeholder="dispatchAI/SmolLM2-135M-Instruct-mobile",
            scale=3
        )
        token_input = gr.Textbox(
            label="HF Token (write access)",
            type="password",
            scale=2
        )
    
    quant_checkboxes = gr.CheckboxGroup(
        choices=list(QUANT_LEVELS.keys()),
        value=["Q4_K_M", "Q5_K_M", "Q8_0"],
        label="Quantization Levels",
    )
    
    convert_btn = gr.Button("🔄 Convert", variant="primary", size="lg")
    
    output = gr.Textbox(label="Results", lines=15)
    
    convert_btn.click(
        fn=convert_model,
        inputs=[model_input, quant_checkboxes, token_input],
        outputs=[output, output]
    )
    
    gr.Markdown("""
    ---
    🚀 [dispatchAI](https://huggingface.co/dispatchAI) — Small. Mobile. Free. UAE-built.
    """)

if __name__ == "__main__":
    demo.launch()