Spaces:

dispatchAI
/

model-recommender

Runtime error

App Files Files Community

model-recommender / app.py

3morixd

Upload app.py with huggingface_hub

153626c verified 1 day ago

Raw

History Blame Contribute Delete

10.6 kB

	#!/usr/bin/env python3
	"""
	#301: On-device readiness checker — a Gradio Space that evaluates whether a
	given model will run on a mobile device.

	Paste a HuggingFace model ID or upload a config, get a "will it run on a phone?" report:
	- Parameter count vs memory budget
	- Architecture compatibility
	- Quantization recommendations
	- Estimated phone farm performance
	- Recommended dispatchAI model alternatives
	"""
	import gradio as gr
	import json
	import requests
	from huggingface_hub import hf_hub_download, HfApi
	import os

	token = os.environ.get("HF_TOKEN", "")

	# Phone farm specs
	PHONE_SPECS = {
	"Samsung S20 FE (Snapdragon 865, 8GB)": {
	"chipset": "Snapdragon 865",
	"ram_gb": 8,
	"usable_ram_gb": 6, # After OS overhead
	"cpu_cores": 8,
	"max_model_size_gb": 4, # Safe limit for 8GB phone
	},
	"Samsung S23 (Snapdragon 8 Gen 2, 8GB)": {
	"chipset": "Snapdragon 8 Gen 2",
	"ram_gb": 8,
	"usable_ram_gb": 6,
	"cpu_cores": 8,
	"max_model_size_gb": 4,
	},
	"iPhone 15 Pro (A17 Pro, 8GB)": {
	"chipset": "Apple A17 Pro",
	"ram_gb": 8,
	"usable_ram_gb": 6,
	"cpu_cores": 6,
	"max_model_size_gb": 4,
	},
	"Budget Android (4GB RAM)": {
	"chipset": "Mid-range",
	"ram_gb": 4,
	"usable_ram_gb": 3,
	"cpu_cores": 8,
	"max_model_size_gb": 2,
	},
	}

	# dispatchAI model catalog for recommendations
	DISPATCHAI_MODELS = [
	{"id": "dispatchAI/SmolLM2-135M-Instruct-mobile", "params_m": 135, "size_mb": 270, "task": "chat"},
	{"id": "dispatchAI/SmolLM2-360M-Instruct-mobile", "params_m": 360, "size_mb": 720, "task": "chat"},
	{"id": "dispatchAI/Qwen2.5-0.5B-Instruct-mobile-int4", "params_m": 500, "size_mb": 350, "task": "chat"},
	{"id": "dispatchAI/Qwen2.5-0.5B-Coder-mobile", "params_m": 500, "size_mb": 350, "task": "code"},
	{"id": "dispatchAI/Llama-3.2-1B-Instruct-mobile", "params_m": 1000, "size_mb": 2000, "task": "chat"},
	{"id": "dispatchAI/TinyLlama-1.1B-Chat-Q5-mobile", "params_m": 1100, "size_mb": 450, "task": "chat"},
	{"id": "dispatchAI/Qwen2.5-1.5B-Instruct-Q5-mobile", "params_m": 1500, "size_mb": 900, "task": "chat"},
	{"id": "dispatchAI/Gemma-2-2B-IT-Q5-mobile", "params_m": 2000, "size_mb": 1300, "task": "chat"},
	{"id": "dispatchAI/Phi-3.5-mini-instruct-Q5-mobile", "params_m": 2000, "size_mb": 1300, "task": "chat"},
	{"id": "dispatchAI/Gemma-2B-Arabic-mobile", "params_m": 2000, "size_mb": 1300, "task": "arabic"},
	]

	def fetch_model_info(model_id):
	"""Fetch config.json from HuggingFace."""
	try:
	config_path = hf_hub_download(model_id, "config.json", token=token)
	with open(config_path, "r") as f:
	config = json.load(f)

	# Try to get model size from safetensors
	api = HfApi(token=token)
	files = api.list_repo_files(model_id, token=token)

	size_mb = 0
	for f in files:
	if f.endswith(".safetensors") or f.endswith(".bin") or f.endswith(".gguf"):
	try:
	info = api.get_paths_info(model_id, [f], repo_type="model", token=token)
	if info and hasattr(info[0], 'size'):
	size_mb += info[0].size / 1e6
	except:
	pass

	return config, size_mb
	except Exception as e:
	return None, str(e)

	def estimate_params(config):
	"""Estimate parameter count from config."""
	try:
	hidden = config.get("hidden_size", 0)
	layers = config.get("num_hidden_layers", config.get("num_layers", 0))
	vocab = config.get("vocab_size", 0)
	intermediate = config.get("intermediate_size", hidden * 4)

	# Rough estimate: transformers params
	# Attention: 4 * hidden^2 per layer (Q, K, V, O)
	# MLP: 2 * hidden * intermediate per layer
	# Embeddings: vocab * hidden
	attention_params = 4 * hidden * hidden * layers
	mlp_params = 2 * hidden * intermediate * layers
	embed_params = vocab * hidden

	total = attention_params + mlp_params + embed_params
	return total / 1e6 # in millions
	except:
	return 0

	def check_readiness(model_id, target_device):
	"""Check if a model will run on the target device."""
	if not model_id.strip():
	return "Please enter a HuggingFace model ID."

	config_result = fetch_model_info(model_id.strip())

	if isinstance(config_result[1], str) and not config_result[0]:
	return f"❌ Error fetching model info: {config_result[1]}\n\nCheck the model ID and try again."

	config, size_mb = config_result
	if not config:
	return f"❌ Could not fetch config for `{model_id}`"

	specs = PHONE_SPECS.get(target_device, PHONE_SPECS["Samsung S20 FE (Snapdragon 865, 8GB)"])

	# Estimate parameters
	params_m = estimate_params(config)
	model_type = config.get("model_type", "unknown")
	hidden_size = config.get("hidden_size", 0)
	num_layers = config.get("num_hidden_layers", 0)

	# If we couldn't get size from API, estimate it
	if size_mb == 0:
	size_mb = params_m * 2 # fp16: 2 bytes per param

	# Estimates for different quantizations
	size_fp16_mb = params_m * 2
	size_q8_mb = params_m * 1
	size_q5_mb = params_m * 0.625
	size_q4_mb = params_m * 0.5

	# Phone farm performance estimate (based on real benchmarks)
	# S20 FE: ~18 t/s for 135M, ~10 t/s for 500M, ~6 t/s for 1B, ~3 t/s for 2B
	if params_m < 200:
	est_tps = "15-20 t/s"
	rating = "🟢 Excellent"
	elif params_m < 600:
	est_tps = "8-12 t/s"
	rating = "🟢 Good"
	elif params_m < 1200:
	est_tps = "5-7 t/s"
	rating = "🟡 Usable"
	elif params_m < 2500:
	est_tps = "2-4 t/s"
	rating = "🟠 Slow"
	else:
	est_tps = "< 2 t/s"
	rating = "🔴 Too large"

	# Memory check
	fits_fp16 = size_fp16_mb < specs["max_model_size_gb"] * 1024
	fits_q5 = size_q5_mb < specs["max_model_size_gb"] * 1024
	fits_q4 = size_q4_mb < specs["max_model_size_gb"] * 1024

	# Find recommended dispatchAI alternatives
	recommendations = []
	for m in DISPATCHAI_MODELS:
	if m["params_m"] <= params_m * 1.2 and m["params_m"] >= params_m * 0.5:
	recommendations.append(m)
	if not recommendations:
	# Find closest smaller model
	smaller = [m for m in DISPATCHAI_MODELS if m["params_m"] < params_m]
	if smaller:
	recommendations = sorted(smaller, key=lambda x: x["params_m"], reverse=True)[:3]

	rec_text = "\n".join([f"- [`{m['id']}`](https://huggingface.co/{m['id']}) — {m['params_m']}M params, {m['size_mb']}MB"
	for m in recommendations[:5]])

	report = f"""## 📱 On-Device Readiness Report

	### Model: `{model_id}`

	\| Property \| Value \|
	\|----------\|-------\|
	\| Architecture \| {model_type} \|
	\| Hidden size \| {hidden_size} \|
	\| Layers \| {num_layers} \|
	\| Estimated params \| ~{params_m:.0f}M \|

	### Size estimates by quantization

	\| Format \| Size \| Fits {target_device.split('(')[0].strip()}? \|
	\|--------\|------\|------\|
	\| FP16 \| {size_fp16_mb:.0f}MB \| {"✅" if fits_fp16 else "❌"} \|
	\| Q8 \| {size_q8_mb:.0f}MB \| {"✅" if fits_q8 else "❌"} \|
	\| Q5_K_M \| {size_q5_mb:.0f}MB \| {"✅" if fits_q5 else "❌"} \|
	\| Q4_K_M \| {size_q4_mb:.0f}MB \| {"✅" if fits_q4 else "❌"} \|

	### Performance estimate (Snapdragon 865)

	\| Metric \| Value \|
	\|--------\|-------\|
	\| Estimated speed \| {est_tps} \|
	\| Readiness \| {rating} \|

	### Target device: {target_device}

	\| Property \| Value \|
	\|----------\|-------\|
	\| Chipset \| {specs['chipset']} \|
	\| RAM \| {specs['ram_gb']}GB \|
	\| Max model size \| {specs['max_model_size_gb']}GB \|

	### Recommended dispatchAI alternatives

	{rec_text if rec_text else "No close matches found."}

	### Recommendation

	"""
	if "🟢" in rating:
	report += "✅ This model is ready for mobile deployment. Use Q4_K_M or Q5_K_M GGUF for best size/quality balance."
	elif "🟡" in rating:
	report += "⚠️ This model is usable but may be slow. Consider Q4_K_M quantization and test on target hardware."
	elif "🟠" in rating:
	report += "⚠️ This model will be slow on mobile. Consider a smaller alternative from dispatchAI."
	else:
	report += "❌ This model is too large for mobile deployment. Use a dispatchAI alternative above."

	return report

	# Custom CSS
	custom_css = """
	.gradio-container { background: #0A0F1A !important; color: #F5F7FA !important; }
	h1, h2, h3 { color: #1FE0E6 !important; }
	.gr-button { background: linear-gradient(135deg, #2E6BFF, #1FE0E6) !important; color: #0A0F1A !important; }
	"""

	with gr.Blocks(css=custom_css, title="On-Device Readiness Checker") as demo:
	gr.Markdown("""
	# 📱 On-Device Readiness Checker

	Will your model run on a phone? Paste a HuggingFace model ID and find out.

	Powered by [dispatchAI](https://huggingface.co/dispatchAI) — mobile AI that runs.
	""")

	with gr.Row():
	model_input = gr.Textbox(
	label="HuggingFace Model ID",
	placeholder="e.g., Qwen/Qwen2.5-0.5B-Instruct",
	scale=3
	)
	device_input = gr.Dropdown(
	choices=list(PHONE_SPECS.keys()),
	value="Samsung S20 FE (Snapdragon 865, 8GB)",
	label="Target Device",
	scale=2
	)
	check_btn = gr.Button("Check Readiness", variant="primary", scale=1)

	report_output = gr.Markdown(label="Readiness Report")

	check_btn.click(
	fn=check_readiness,
	inputs=[model_input, device_input],
	outputs=report_output
	)

	gr.Markdown("""
	---
	### How it works

	1. Fetches the model's `config.json` from HuggingFace
	2. Estimates parameter count and size for each quantization level
	3. Compares against the target device's memory budget
	4. Estimates inference speed based on real phone farm benchmarks
	5. Recommends dispatchAI mobile-optimized alternatives

	### Try these models

	- `Qwen/Qwen2.5-0.5B-Instruct` — small model, should pass
	- `Qwen/Qwen2.5-7B-Instruct` — large model, should fail
	- `meta-llama/Llama-3.2-1B-Instruct` — borderline
	- `HuggingFaceTB/SmolLM2-135M-Instruct` — tiny, excellent

	---
	Dispatch AI (FZE), Sharjah SRTI Free Zone, License No. 10818.
	""")

	if __name__ == "__main__":
	demo.launch()