File size: 11,598 Bytes
006db03 2017640 006db03 2017640 006db03 2017640 006db03 2017640 006db03 2017640 006db03 2017640 006db03 2017640 006db03 2017640 006db03 2017640 006db03 2017640 006db03 2017640 006db03 2017640 006db03 2017640 006db03 2017640 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 | import gradio as gr
import requests
import os
import urllib.parse
NL = chr(10)
# Map user-facing modality choices to HF Hub task tags
TASK_MAP = {
"Text in -> text out": ["text-generation", "text2text-generation"],
"Images in (docs, UI, scans)": ["image-to-text", "visual-question-answering"],
"Audio in (speech)": ["automatic-speech-recognition"],
"Structured data (tables, logs)": ["tabular-classification", "tabular-regression"],
"Code": ["text-generation"],
}
LICENSE_FILTER = {
"Standard SaaS API is fine": None,
"Must stay in a specific cloud region": None,
"Strict: prefer on-prem / VPC only": "apache-2.0",
}
def fetch_top_models(tasks, license_filter=None, top_n=5):
seen = set()
results = []
for task in tasks:
try:
params = {
"pipeline_tag": task,
"sort": "downloads",
"direction": "-1",
"limit": "20",
}
if license_filter:
params["license"] = license_filter
resp = requests.get(
"https://huggingface.co/api/models",
params=params,
timeout=15,
)
if resp.status_code == 200:
data = resp.json()
for m in data:
mid = m.get("modelId", m.get("id", ""))
if mid and mid not in seen:
seen.add(mid)
downloads = m.get("downloads", 0) or 0
likes = m.get("likes", 0) or 0
results.append((mid, task, downloads, likes))
if len(results) >= top_n * len(tasks):
break
except Exception:
pass
results.sort(key=lambda x: x[2], reverse=True)
return results[:top_n]
def format_model_table(models):
if not models:
return "No models found via HF API. Please try again."
header = "| # | Model | Task | Downloads | Likes |" + NL
header += "|---|-------|------|-----------|-------|" + NL
rows = ""
for i, (mid, task, dl, lk) in enumerate(models, 1):
dl_fmt = f"{dl:,}" if dl else "N/A"
lk_fmt = f"{lk:,}" if lk else "N/A"
rows += f"| {i} | [{mid}](https://huggingface.co/{mid}) | `{task}` | {dl_fmt} | {lk_fmt} |" + NL
return header + rows
def recommend_model(
modality, outputs, domains, data_sensitivity, volume, latency, context_size, customization
):
# Collect HF tasks to query
hf_tasks = []
for m in modality:
hf_tasks.extend(TASK_MAP.get(m, []))
# Deduplicate, keep order
seen_t = set()
unique_tasks = []
for t in hf_tasks:
if t not in seen_t:
seen_t.add(t)
unique_tasks.append(t)
if not unique_tasks:
unique_tasks = ["text-generation"]
license_filter = LICENSE_FILTER.get(data_sensitivity)
# Fetch live models
live_models = fetch_top_models(unique_tasks, license_filter=license_filter, top_n=5)
model_table = format_model_table(live_models)
# Deployment path
if data_sensitivity == "Strict: prefer on-prem / VPC only":
deploy_path = "π **Private Self-Hosted** - Run top open-source models above via Ollama or vLLM on your own infra."
elif volume == "100,000+":
deploy_path = "π° **Cost-Optimized Scale** - Use provisioned throughput for closed models, or self-host on GPU clusters."
else:
deploy_path = "β‘ **Serverless API** - Closed models: OpenAI/Anthropic/Google APIs. Open-source: HF Inference Endpoints."
# Smart tips
tips = []
if any(d in ["Healthcare", "Finance", "Legal"] for d in domains):
tips.append("π **Zero Data Retention (ZDR)** - For regulated domains, enable ZDR on OpenAI/Anthropic/Google or use on-prem.")
if latency == "< 500 ms (Instant)":
tips.append("β‘ **Pick a smaller distilled variant** - Sort by 'likes' and look for 7B/8B versions of the top model.")
if context_size == "32K - 200K tokens (Long)":
tips.append("π **Add a RAG layer** - Even for long-context models, pair with Pinecone/Weaviate/pgvector for accuracy.")
if "Style fine-tuning" in customization:
tips.append("π¨ **Fine-tune with QLoRA** - Take the top model from the table above and fine-tune on 1k-5k domain examples.")
if "RAG + Tool Calling" in customization:
tips.append("π§ **Enable Tool Calling** - Most top models support function calling / tool use. Check the model card for schema.")
if not tips:
tips.append("β¨ Start with the #1 model in the table above and iterate. A great system prompt gets you 80% of the way.")
tips_text = (NL + "- ").join(tips)
how_to = (
"### π Your 3-Step Rollout Guide" + NL +
"1. **Sandbox (Week 1):** Clone the top 3 models from the table above. Run 50-100 real queries from your dataset." + NL +
"2. **Evaluate (Week 2):** Score on accuracy, latency, and cost per 1K tokens. Eliminate bottom performers." + NL +
"3. **Deploy (Week 3):** Integrate the winner via API or self-hosted endpoint. Set up monitoring with LangSmith or Helicone." + NL
)
tasks_label = ", ".join(f"`{t}`" for t in unique_tasks)
license_label = f"license: `{license_filter}`" if license_filter else "all licenses"
summary = (
"## π Your Live AI Model Strategy" + NL + NL +
"### π¦ Top Models on HuggingFace Hub Right Now" + NL +
f"*Live from HF Hub API | Tasks: {tasks_label} | Filter: {license_label} | Sorted by downloads*" + NL + NL +
model_table + NL + NL +
"### πΊοΈ Best Deployment Path" + NL +
deploy_path + NL + NL +
"### π‘ Pro-Tips for Your Use Case" + NL +
"- " + tips_text + NL + NL +
how_to
)
return summary, gr.update(visible=True)
def handle_lead(name, email, company, infra):
if not email:
return gr.update(value="β οΈ Please provide an email."), gr.update(visible=False)
subject = urllib.parse.quote(f"Architecture Review Request: {company}")
body = urllib.parse.quote(
f"Hi AnkTechsol Team,{NL}{NL}"
f"I just used your AI Model Selection Wizard and would like a custom architecture PDF and cost estimate for my project.{NL}{NL}"
f"Name: {name}{NL}"
f"Company: {company}{NL}"
f"Current Infrastructure: {infra}{NL}{NL}"
f"Looking forward to hearing from you!"
)
mailto_url = f"mailto:colab@anktechsol.com?subject={subject}&body={body}"
msg = f"β
Thanks {name}! Click the button below to open your mail client and send your request to our architecture team."
return msg, gr.update(value=f"Open Mail Client to Send βοΈ", link=mailto_url, visible=True)
with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", secondary_hue="slate"), title="AI Model Picker | AnkTechsol") as demo:
gr.Markdown("# π AI Model Selection Wizard")
gr.Markdown("**Pick the perfect AI brain for your use case.** This tool queries the HuggingFace Hub live and returns the top trending models for your exact task - no hardcoded lists. By [AnkTechsol](https://anktechsol.com).")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### π Step 1: Describe Your Task")
modality = gr.CheckboxGroup(label="What inputs are you working with?", choices=["Text in -> text out", "Images in (docs, UI, scans)", "Audio in (speech)", "Structured data (tables, logs)", "Code"])
outputs = gr.CheckboxGroup(label="What output do you need?", choices=["Natural language answer / summary", "Classification / tagging", "Field extraction from text/PDF", "Content generation (copy, emails)", "Scoring / ranking / decision"])
domains = gr.CheckboxGroup(label="Your Industry / Domain", choices=["General / consumer", "Ecommerce / SaaS", "Finance", "Healthcare", "Legal", "Internal enterprise knowledge"])
with gr.Column(scale=1):
gr.Markdown("### βοΈ Step 2: Set Your Constraints")
data_sensitivity = gr.Radio(label="Data Privacy Requirements", choices=["Standard SaaS API is fine", "Must stay in a specific cloud region", "Strict: prefer on-prem / VPC only"], value="Standard SaaS API is fine")
volume = gr.Radio(label="Expected Daily Request Volume", choices=["< 1,000", "1,000 - 100,000", "100,000+"], value="< 1,000")
latency = gr.Radio(label="Latency Requirement", choices=["< 500 ms (Instant)", "0.5 - 5 s (Standard)", "> 5 s (Batch)"], value="0.5 - 5 s (Standard)")
context_size = gr.Radio(label="Max Context Window Needed", choices=["< 4K tokens (Short)", "4K - 32K tokens (Medium)", "32K - 200K tokens (Long)"], value="4K - 32K tokens (Medium)")
customization = gr.CheckboxGroup(label="Customization Needs", choices=["Prompt engineering only", "Style fine-tuning", "RAG + Tool Calling"])
gr.Markdown("### π What you'll get:")
gr.Markdown("- **Top 5 Live Models** from HF Hub matched to your task" + NL +
"- **Optimized Deployment Path** (Cloud vs On-Prem)" + NL +
"- **3-Week Implementation Roadmap**" + NL +
"- **Tailored Cost & Latency Pro-Tips**")
submit_btn = gr.Button("β¨ Fetch Live Models and Generate My Strategy", variant="primary", size="lg")
gr.Markdown("> π *Querying HuggingFace Hub live - may take 5-10 seconds. Please wait after clicking.*")
gr.Markdown("---")
output_md = gr.Markdown(label="Your Live AI Strategy")
with gr.Column(visible=False) as lead_section:
gr.Markdown("---")
gr.Markdown("### π§ Get Your Custom Architecture PDF")
gr.Markdown("We'll take your results above and generate a detailed 1-page architecture diagram and cost estimate for your stack.")
with gr.Row():
name = gr.Textbox(label="Name", placeholder="Anuj Karn")
email = gr.Textbox(label="Work Email", placeholder="anuj@anktechsol.com")
with gr.Row():
company = gr.Textbox(label="Company / Project", placeholder="AnkTechsol")
infra = gr.Dropdown(label="Current Infra", choices=["AWS", "GCP", "Azure", "On-Prem", "Other/None"], value="AWS")
lead_btn = gr.Button("π₯ Generate My Architecture Request", variant="primary")
lead_status = gr.Markdown("")
mail_btn = gr.Button("Open Mail Client βοΈ", visible=False, variant="secondary")
submit_btn.click(
fn=recommend_model,
inputs=[modality, outputs, domains, data_sensitivity, volume, latency, context_size, customization],
outputs=[output_md, lead_section]
)
lead_btn.click(
fn=handle_lead,
inputs=[name, email, company, infra],
outputs=[lead_status, mail_btn]
)
gr.Markdown("---")
gr.Markdown("### βοΈ Ready to build?")
gr.Markdown("Contact us at [**colab@anktechsol.com**](mailto:colab@anktechsol.com) to review these results with our architecture team and map your production roadmap.")
gr.Markdown("---")
gr.Markdown("### π οΈ Why Teams Trust AnkTechsol")
gr.Markdown("We help startups and enterprises go from AI confusion to production-grade AI systems. Fine-tuning, RAG pipelines, GPU inferencing as a service - we deliver.")
gr.Markdown("**[Visit anktechsol.com](https://anktechsol.com) | [LinkedIn](https://www.linkedin.com/company/anktechsol)**")
if __name__ == "__main__":
demo.launch() |