3morixd commited on
Commit
48a542a
·
verified ·
1 Parent(s): c30d215

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +81 -0
app.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+
4
+ MODELS = [
5
+ {"Model": "SmolLM2-135M-Instruct-mobile", "Params": "135M", "Size_MB": 270, "RAM_MB": 400, "Task": "Chat", "Quant": "FP16", "Speed_tps": 25.5},
6
+ {"Model": "SmolLM2-360M-Instruct-mobile", "Params": "360M", "Size_MB": 720, "RAM_MB": 700, "Task": "Chat", "Quant": "FP16", "Speed_tps": 21.0},
7
+ {"Model": "Qwen2.5-0.5B-Instruct-mobile-int4", "Params": "500M", "Size_MB": 350, "RAM_MB": 550, "Task": "Chat", "Quant": "INT4", "Speed_tps": 20.0},
8
+ {"Model": "Llama-3.2-1B-Instruct-Q4-mobile", "Params": "1B", "Size_MB": 700, "RAM_MB": 1100, "Task": "Chat", "Quant": "Q4", "Speed_tps": 18.2},
9
+ {"Model": "Llama-3.2-1B-Instruct-Q6-mobile", "Params": "1B", "Size_MB": 1100, "RAM_MB": 1300, "Task": "Chat", "Quant": "Q6", "Speed_tps": 16.8},
10
+ {"Model": "TinyLlama-1.1B-Chat-Q5-mobile", "Params": "1.1B", "Size_MB": 800, "RAM_MB": 1200, "Task": "Chat", "Quant": "Q5", "Speed_tps": 17.5},
11
+ {"Model": "Qwen2.5-0.5B-Coder-mobile", "Params": "500M", "Size_MB": 1000, "RAM_MB": 1500, "Task": "Code", "Quant": "FP16", "Speed_tps": 20.0},
12
+ {"Model": "Qwen2.5-Coder-1.5B-mobile", "Params": "1.5B", "Size_MB": 3000, "RAM_MB": 4000, "Task": "Code", "Quant": "FP16", "Speed_tps": 10.5},
13
+ {"Model": "Qwen2.5-Math-1.5B-mobile", "Params": "1.5B", "Size_MB": 3000, "RAM_MB": 4000, "Task": "Math", "Quant": "FP16", "Speed_tps": 10.5},
14
+ {"Model": "Gemma-2B-Arabic-mobile", "Params": "2B", "Size_MB": 5000, "RAM_MB": 5500, "Task": "Arabic", "Quant": "FP16", "Speed_tps": 8.0},
15
+ {"Model": "Gemma-2-2B-IT-Q5-mobile", "Params": "2B", "Size_MB": 1500, "RAM_MB": 2200, "Task": "Chat", "Quant": "Q5", "Speed_tps": 12.0},
16
+ {"Model": "Llama-3.2-3B-Instruct-Q5-mobile", "Params": "3B", "Size_MB": 2100, "RAM_MB": 2700, "Task": "Chat", "Quant": "Q5", "Speed_tps": 8.5},
17
+ {"Model": "Llama-3.2-1B-FunctionCall-mobile", "Params": "1B", "Size_MB": 2500, "RAM_MB": 3000, "Task": "Function Call", "Quant": "FP16", "Speed_tps": 12.0},
18
+ {"Model": "Moondream2-Vision-Q5-mobile", "Params": "1.9B", "Size_MB": 1400, "RAM_MB": 2000, "Task": "Vision", "Quant": "Q5", "Speed_tps": 8.5},
19
+ {"Model": "EmbeddingGemma-300M-Q8-mobile", "Params": "300M", "Size_MB": 300, "RAM_MB": 500, "Task": "Embedding", "Quant": "Q8", "Speed_tps": 22.0},
20
+ ]
21
+
22
+ df = pd.DataFrame(MODELS)
23
+
24
+ PHONE_PROFILES = {
25
+ "Low-end (2GB RAM)": 2048,
26
+ "Mid-range (4GB RAM)": 4096,
27
+ "High-end (6GB RAM)": 6144,
28
+ "Flagship (8GB+ RAM)": 8192,
29
+ }
30
+
31
+ TASKS = ["Chat", "Code", "Math", "Arabic", "Function Call", "Vision", "Embedding", "Any"]
32
+
33
+ def recommend(phone_profile, task, priority):
34
+ ram = PHONE_PROFILES[phone_profile]
35
+ filtered = df.copy()
36
+
37
+ if task != "Any":
38
+ filtered = filtered[filtered["Task"] == task]
39
+
40
+ filtered = filtered[filtered["RAM_MB"] <= ram]
41
+
42
+ if len(filtered) == 0:
43
+ return pd.DataFrame([{"Error": f"No models fit in {ram}MB RAM for task '{task}'. Try a different phone or task."}])
44
+
45
+ if priority == "Smallest size":
46
+ filtered = filtered.sort_values("Size_MB")
47
+ elif priority == "Fastest":
48
+ filtered = filtered.sort_values("Speed_tps", ascending=False)
49
+ elif priority == "Best quality":
50
+ # Quality roughly correlates with params and quant level
51
+ filtered = filtered.sort_values(["Params"], ascending=False)
52
+
53
+ return filtered.head(5)
54
+
55
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), title="dispatchAI Model Recommender") as demo:
56
+ gr.Markdown("""
57
+ # 📱 dispatchAI Mobile Model Recommender
58
+
59
+ Find the perfect dispatchAI model for your phone and use case.
60
+ """)
61
+
62
+ with gr.Row():
63
+ phone = gr.Dropdown(choices=list(PHONE_PROFILES.keys()), value="Mid-range (4GB RAM)", label="Your Phone")
64
+ task = gr.Dropdown(choices=TASKS, value="Chat", label="Primary Task")
65
+ priority = gr.Radio(["Smallest size", "Fastest", "Best quality"], value="Smallest size", label="Priority")
66
+
67
+ btn = gr.Button("Find My Model", variant="primary", size="lg")
68
+ table = gr.DataFrame(label="Recommended Models")
69
+
70
+ btn.click(fn=recommend, inputs=[phone, task, priority], outputs=table)
71
+ demo.load(fn=recommend, inputs=[phone, task, priority], outputs=table)
72
+
73
+ gr.Markdown("""
74
+ ---
75
+ All benchmarks measured on **Snapdragon 865 (Samsung S20 FE)**.
76
+
77
+ 🚀 [dispatchAI](https://huggingface.co/dispatchAI) — Small. Mobile. Free. UAE-built.
78
+ """)
79
+
80
+ if __name__ == "__main__":
81
+ demo.launch()