Spaces:
Runtime error
Runtime error
| #!/usr/bin/env python3 | |
| """ | |
| #301: On-device readiness checker β a Gradio Space that evaluates whether a | |
| given model will run on a mobile device. | |
| Paste a HuggingFace model ID or upload a config, get a "will it run on a phone?" report: | |
| - Parameter count vs memory budget | |
| - Architecture compatibility | |
| - Quantization recommendations | |
| - Estimated phone farm performance | |
| - Recommended dispatchAI model alternatives | |
| """ | |
| import gradio as gr | |
| import json | |
| import requests | |
| from huggingface_hub import hf_hub_download, HfApi | |
| import os | |
| token = os.environ.get("HF_TOKEN", "") | |
| # Phone farm specs | |
| PHONE_SPECS = { | |
| "Samsung S20 FE (Snapdragon 865, 8GB)": { | |
| "chipset": "Snapdragon 865", | |
| "ram_gb": 8, | |
| "usable_ram_gb": 6, # After OS overhead | |
| "cpu_cores": 8, | |
| "max_model_size_gb": 4, # Safe limit for 8GB phone | |
| }, | |
| "Samsung S23 (Snapdragon 8 Gen 2, 8GB)": { | |
| "chipset": "Snapdragon 8 Gen 2", | |
| "ram_gb": 8, | |
| "usable_ram_gb": 6, | |
| "cpu_cores": 8, | |
| "max_model_size_gb": 4, | |
| }, | |
| "iPhone 15 Pro (A17 Pro, 8GB)": { | |
| "chipset": "Apple A17 Pro", | |
| "ram_gb": 8, | |
| "usable_ram_gb": 6, | |
| "cpu_cores": 6, | |
| "max_model_size_gb": 4, | |
| }, | |
| "Budget Android (4GB RAM)": { | |
| "chipset": "Mid-range", | |
| "ram_gb": 4, | |
| "usable_ram_gb": 3, | |
| "cpu_cores": 8, | |
| "max_model_size_gb": 2, | |
| }, | |
| } | |
| # dispatchAI model catalog for recommendations | |
| DISPATCHAI_MODELS = [ | |
| {"id": "dispatchAI/SmolLM2-135M-Instruct-mobile", "params_m": 135, "size_mb": 270, "task": "chat"}, | |
| {"id": "dispatchAI/SmolLM2-360M-Instruct-mobile", "params_m": 360, "size_mb": 720, "task": "chat"}, | |
| {"id": "dispatchAI/Qwen2.5-0.5B-Instruct-mobile-int4", "params_m": 500, "size_mb": 350, "task": "chat"}, | |
| {"id": "dispatchAI/Qwen2.5-0.5B-Coder-mobile", "params_m": 500, "size_mb": 350, "task": "code"}, | |
| {"id": "dispatchAI/Llama-3.2-1B-Instruct-mobile", "params_m": 1000, "size_mb": 2000, "task": "chat"}, | |
| {"id": "dispatchAI/TinyLlama-1.1B-Chat-Q5-mobile", "params_m": 1100, "size_mb": 450, "task": "chat"}, | |
| {"id": "dispatchAI/Qwen2.5-1.5B-Instruct-Q5-mobile", "params_m": 1500, "size_mb": 900, "task": "chat"}, | |
| {"id": "dispatchAI/Gemma-2-2B-IT-Q5-mobile", "params_m": 2000, "size_mb": 1300, "task": "chat"}, | |
| {"id": "dispatchAI/Phi-3.5-mini-instruct-Q5-mobile", "params_m": 2000, "size_mb": 1300, "task": "chat"}, | |
| {"id": "dispatchAI/Gemma-2B-Arabic-mobile", "params_m": 2000, "size_mb": 1300, "task": "arabic"}, | |
| ] | |
| def fetch_model_info(model_id): | |
| """Fetch config.json from HuggingFace.""" | |
| try: | |
| config_path = hf_hub_download(model_id, "config.json", token=token) | |
| with open(config_path, "r") as f: | |
| config = json.load(f) | |
| # Try to get model size from safetensors | |
| api = HfApi(token=token) | |
| files = api.list_repo_files(model_id, token=token) | |
| size_mb = 0 | |
| for f in files: | |
| if f.endswith(".safetensors") or f.endswith(".bin") or f.endswith(".gguf"): | |
| try: | |
| info = api.get_paths_info(model_id, [f], repo_type="model", token=token) | |
| if info and hasattr(info[0], 'size'): | |
| size_mb += info[0].size / 1e6 | |
| except: | |
| pass | |
| return config, size_mb | |
| except Exception as e: | |
| return None, str(e) | |
| def estimate_params(config): | |
| """Estimate parameter count from config.""" | |
| try: | |
| hidden = config.get("hidden_size", 0) | |
| layers = config.get("num_hidden_layers", config.get("num_layers", 0)) | |
| vocab = config.get("vocab_size", 0) | |
| intermediate = config.get("intermediate_size", hidden * 4) | |
| # Rough estimate: transformers params | |
| # Attention: 4 * hidden^2 per layer (Q, K, V, O) | |
| # MLP: 2 * hidden * intermediate per layer | |
| # Embeddings: vocab * hidden | |
| attention_params = 4 * hidden * hidden * layers | |
| mlp_params = 2 * hidden * intermediate * layers | |
| embed_params = vocab * hidden | |
| total = attention_params + mlp_params + embed_params | |
| return total / 1e6 # in millions | |
| except: | |
| return 0 | |
| def check_readiness(model_id, target_device): | |
| """Check if a model will run on the target device.""" | |
| if not model_id.strip(): | |
| return "Please enter a HuggingFace model ID." | |
| config_result = fetch_model_info(model_id.strip()) | |
| if isinstance(config_result[1], str) and not config_result[0]: | |
| return f"β **Error fetching model info**: {config_result[1]}\n\nCheck the model ID and try again." | |
| config, size_mb = config_result | |
| if not config: | |
| return f"β Could not fetch config for `{model_id}`" | |
| specs = PHONE_SPECS.get(target_device, PHONE_SPECS["Samsung S20 FE (Snapdragon 865, 8GB)"]) | |
| # Estimate parameters | |
| params_m = estimate_params(config) | |
| model_type = config.get("model_type", "unknown") | |
| hidden_size = config.get("hidden_size", 0) | |
| num_layers = config.get("num_hidden_layers", 0) | |
| # If we couldn't get size from API, estimate it | |
| if size_mb == 0: | |
| size_mb = params_m * 2 # fp16: 2 bytes per param | |
| # Estimates for different quantizations | |
| size_fp16_mb = params_m * 2 | |
| size_q8_mb = params_m * 1 | |
| size_q5_mb = params_m * 0.625 | |
| size_q4_mb = params_m * 0.5 | |
| # Phone farm performance estimate (based on real benchmarks) | |
| # S20 FE: ~18 t/s for 135M, ~10 t/s for 500M, ~6 t/s for 1B, ~3 t/s for 2B | |
| if params_m < 200: | |
| est_tps = "15-20 t/s" | |
| rating = "π’ Excellent" | |
| elif params_m < 600: | |
| est_tps = "8-12 t/s" | |
| rating = "π’ Good" | |
| elif params_m < 1200: | |
| est_tps = "5-7 t/s" | |
| rating = "π‘ Usable" | |
| elif params_m < 2500: | |
| est_tps = "2-4 t/s" | |
| rating = "π Slow" | |
| else: | |
| est_tps = "< 2 t/s" | |
| rating = "π΄ Too large" | |
| # Memory check | |
| fits_fp16 = size_fp16_mb < specs["max_model_size_gb"] * 1024 | |
| fits_q5 = size_q5_mb < specs["max_model_size_gb"] * 1024 | |
| fits_q4 = size_q4_mb < specs["max_model_size_gb"] * 1024 | |
| # Find recommended dispatchAI alternatives | |
| recommendations = [] | |
| for m in DISPATCHAI_MODELS: | |
| if m["params_m"] <= params_m * 1.2 and m["params_m"] >= params_m * 0.5: | |
| recommendations.append(m) | |
| if not recommendations: | |
| # Find closest smaller model | |
| smaller = [m for m in DISPATCHAI_MODELS if m["params_m"] < params_m] | |
| if smaller: | |
| recommendations = sorted(smaller, key=lambda x: x["params_m"], reverse=True)[:3] | |
| rec_text = "\n".join([f"- [`{m['id']}`](https://huggingface.co/{m['id']}) β {m['params_m']}M params, {m['size_mb']}MB" | |
| for m in recommendations[:5]]) | |
| report = f"""## π± On-Device Readiness Report | |
| ### Model: `{model_id}` | |
| | Property | Value | | |
| |----------|-------| | |
| | Architecture | {model_type} | | |
| | Hidden size | {hidden_size} | | |
| | Layers | {num_layers} | | |
| | Estimated params | ~{params_m:.0f}M | | |
| ### Size estimates by quantization | |
| | Format | Size | Fits {target_device.split('(')[0].strip()}? | | |
| |--------|------|------| | |
| | FP16 | {size_fp16_mb:.0f}MB | {"β " if fits_fp16 else "β"} | | |
| | Q8 | {size_q8_mb:.0f}MB | {"β " if fits_q8 else "β"} | | |
| | Q5_K_M | {size_q5_mb:.0f}MB | {"β " if fits_q5 else "β"} | | |
| | Q4_K_M | {size_q4_mb:.0f}MB | {"β " if fits_q4 else "β"} | | |
| ### Performance estimate (Snapdragon 865) | |
| | Metric | Value | | |
| |--------|-------| | |
| | Estimated speed | {est_tps} | | |
| | Readiness | {rating} | | |
| ### Target device: {target_device} | |
| | Property | Value | | |
| |----------|-------| | |
| | Chipset | {specs['chipset']} | | |
| | RAM | {specs['ram_gb']}GB | | |
| | Max model size | {specs['max_model_size_gb']}GB | | |
| ### Recommended dispatchAI alternatives | |
| {rec_text if rec_text else "No close matches found."} | |
| ### Recommendation | |
| """ | |
| if "π’" in rating: | |
| report += "β **This model is ready for mobile deployment.** Use Q4_K_M or Q5_K_M GGUF for best size/quality balance." | |
| elif "π‘" in rating: | |
| report += "β οΈ **This model is usable but may be slow.** Consider Q4_K_M quantization and test on target hardware." | |
| elif "π " in rating: | |
| report += "β οΈ **This model will be slow on mobile.** Consider a smaller alternative from dispatchAI." | |
| else: | |
| report += "β **This model is too large for mobile deployment.** Use a dispatchAI alternative above." | |
| return report | |
| # Custom CSS | |
| custom_css = """ | |
| .gradio-container { background: #0A0F1A !important; color: #F5F7FA !important; } | |
| h1, h2, h3 { color: #1FE0E6 !important; } | |
| .gr-button { background: linear-gradient(135deg, #2E6BFF, #1FE0E6) !important; color: #0A0F1A !important; } | |
| """ | |
| with gr.Blocks(css=custom_css, title="On-Device Readiness Checker") as demo: | |
| gr.Markdown(""" | |
| # π± On-Device Readiness Checker | |
| **Will your model run on a phone?** Paste a HuggingFace model ID and find out. | |
| Powered by [dispatchAI](https://huggingface.co/dispatchAI) β mobile AI that runs. | |
| """) | |
| with gr.Row(): | |
| model_input = gr.Textbox( | |
| label="HuggingFace Model ID", | |
| placeholder="e.g., Qwen/Qwen2.5-0.5B-Instruct", | |
| scale=3 | |
| ) | |
| device_input = gr.Dropdown( | |
| choices=list(PHONE_SPECS.keys()), | |
| value="Samsung S20 FE (Snapdragon 865, 8GB)", | |
| label="Target Device", | |
| scale=2 | |
| ) | |
| check_btn = gr.Button("Check Readiness", variant="primary", scale=1) | |
| report_output = gr.Markdown(label="Readiness Report") | |
| check_btn.click( | |
| fn=check_readiness, | |
| inputs=[model_input, device_input], | |
| outputs=report_output | |
| ) | |
| gr.Markdown(""" | |
| --- | |
| ### How it works | |
| 1. Fetches the model's `config.json` from HuggingFace | |
| 2. Estimates parameter count and size for each quantization level | |
| 3. Compares against the target device's memory budget | |
| 4. Estimates inference speed based on real phone farm benchmarks | |
| 5. Recommends dispatchAI mobile-optimized alternatives | |
| ### Try these models | |
| - `Qwen/Qwen2.5-0.5B-Instruct` β small model, should pass | |
| - `Qwen/Qwen2.5-7B-Instruct` β large model, should fail | |
| - `meta-llama/Llama-3.2-1B-Instruct` β borderline | |
| - `HuggingFaceTB/SmolLM2-135M-Instruct` β tiny, excellent | |
| --- | |
| *Dispatch AI (FZE), Sharjah SRTI Free Zone, License No. 10818.* | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() | |