File size: 11,598 Bytes
006db03
 
 
2017640
006db03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2017640
006db03
 
2017640
006db03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2017640
006db03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2017640
 
 
 
 
 
 
 
 
 
 
 
 
 
 
006db03
 
 
 
 
 
 
 
 
 
 
2017640
006db03
 
 
 
 
 
 
 
 
2017640
 
 
 
006db03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2017640
006db03
2017640
006db03
 
2017640
 
006db03
 
2017640
006db03
 
 
2017640
006db03
 
2017640
 
 
 
006db03
 
 
 
 
 
2017640
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
import gradio as gr
import requests
import os
import urllib.parse

NL = chr(10)

# Map user-facing modality choices to HF Hub task tags
TASK_MAP = {
    "Text in -> text out": ["text-generation", "text2text-generation"],
    "Images in (docs, UI, scans)": ["image-to-text", "visual-question-answering"],
    "Audio in (speech)": ["automatic-speech-recognition"],
    "Structured data (tables, logs)": ["tabular-classification", "tabular-regression"],
    "Code": ["text-generation"],
}

LICENSE_FILTER = {
    "Standard SaaS API is fine": None,
    "Must stay in a specific cloud region": None,
    "Strict: prefer on-prem / VPC only": "apache-2.0",
}

def fetch_top_models(tasks, license_filter=None, top_n=5):
    seen = set()
    results = []
    for task in tasks:
        try:
            params = {
                "pipeline_tag": task,
                "sort": "downloads",
                "direction": "-1",
                "limit": "20",
            }
            if license_filter:
                params["license"] = license_filter
            resp = requests.get(
                "https://huggingface.co/api/models",
                params=params,
                timeout=15,
            )
            if resp.status_code == 200:
                data = resp.json()
                for m in data:
                    mid = m.get("modelId", m.get("id", ""))
                    if mid and mid not in seen:
                        seen.add(mid)
                        downloads = m.get("downloads", 0) or 0
                        likes = m.get("likes", 0) or 0
                        results.append((mid, task, downloads, likes))
                        if len(results) >= top_n * len(tasks):
                            break
        except Exception:
            pass
    
    results.sort(key=lambda x: x[2], reverse=True)
    return results[:top_n]

def format_model_table(models):
    if not models:
        return "No models found via HF API. Please try again."
    header = "| # | Model | Task | Downloads | Likes |" + NL
    header += "|---|-------|------|-----------|-------|" + NL
    rows = ""
    for i, (mid, task, dl, lk) in enumerate(models, 1):
        dl_fmt = f"{dl:,}" if dl else "N/A"
        lk_fmt = f"{lk:,}" if lk else "N/A"
        rows += f"| {i} | [{mid}](https://huggingface.co/{mid}) | `{task}` | {dl_fmt} | {lk_fmt} |" + NL
    return header + rows

def recommend_model(
    modality, outputs, domains, data_sensitivity, volume, latency, context_size, customization
):
    # Collect HF tasks to query
    hf_tasks = []
    for m in modality:
        hf_tasks.extend(TASK_MAP.get(m, []))
    
    # Deduplicate, keep order
    seen_t = set()
    unique_tasks = []
    for t in hf_tasks:
        if t not in seen_t:
            seen_t.add(t)
            unique_tasks.append(t)
    
    if not unique_tasks:
        unique_tasks = ["text-generation"]
        
    license_filter = LICENSE_FILTER.get(data_sensitivity)

    # Fetch live models
    live_models = fetch_top_models(unique_tasks, license_filter=license_filter, top_n=5)
    model_table = format_model_table(live_models)

    # Deployment path
    if data_sensitivity == "Strict: prefer on-prem / VPC only":
        deploy_path = "πŸš€ **Private Self-Hosted** - Run top open-source models above via Ollama or vLLM on your own infra."
    elif volume == "100,000+":
        deploy_path = "πŸ’° **Cost-Optimized Scale** - Use provisioned throughput for closed models, or self-host on GPU clusters."
    else:
        deploy_path = "⚑ **Serverless API** - Closed models: OpenAI/Anthropic/Google APIs. Open-source: HF Inference Endpoints."

    # Smart tips
    tips = []
    if any(d in ["Healthcare", "Finance", "Legal"] for d in domains):
        tips.append("πŸ”’ **Zero Data Retention (ZDR)** - For regulated domains, enable ZDR on OpenAI/Anthropic/Google or use on-prem.")
    if latency == "< 500 ms (Instant)":
        tips.append("⚑ **Pick a smaller distilled variant** - Sort by 'likes' and look for 7B/8B versions of the top model.")
    if context_size == "32K - 200K tokens (Long)":
        tips.append("πŸ“š **Add a RAG layer** - Even for long-context models, pair with Pinecone/Weaviate/pgvector for accuracy.")
    if "Style fine-tuning" in customization:
        tips.append("🎨 **Fine-tune with QLoRA** - Take the top model from the table above and fine-tune on 1k-5k domain examples.")
    if "RAG + Tool Calling" in customization:
        tips.append("πŸ”§ **Enable Tool Calling** - Most top models support function calling / tool use. Check the model card for schema.")
    
    if not tips:
        tips.append("✨ Start with the #1 model in the table above and iterate. A great system prompt gets you 80% of the way.")

    tips_text = (NL + "- ").join(tips)
    
    how_to = (
        "### πŸ“‹ Your 3-Step Rollout Guide" + NL +
        "1. **Sandbox (Week 1):** Clone the top 3 models from the table above. Run 50-100 real queries from your dataset." + NL +
        "2. **Evaluate (Week 2):** Score on accuracy, latency, and cost per 1K tokens. Eliminate bottom performers." + NL +
        "3. **Deploy (Week 3):** Integrate the winner via API or self-hosted endpoint. Set up monitoring with LangSmith or Helicone." + NL
    )

    tasks_label = ", ".join(f"`{t}`" for t in unique_tasks)
    license_label = f"license: `{license_filter}`" if license_filter else "all licenses"
    
    summary = (
        "## πŸŽ‰ Your Live AI Model Strategy" + NL + NL +
        "### πŸ“¦ Top Models on HuggingFace Hub Right Now" + NL +
        f"*Live from HF Hub API | Tasks: {tasks_label} | Filter: {license_label} | Sorted by downloads*" + NL + NL +
        model_table + NL + NL +
        "### πŸ—ΊοΈ Best Deployment Path" + NL +
        deploy_path + NL + NL +
        "### πŸ’‘ Pro-Tips for Your Use Case" + NL +
        "- " + tips_text + NL + NL +
        how_to
    )
    
    return summary, gr.update(visible=True)

def handle_lead(name, email, company, infra):
    if not email:
        return gr.update(value="⚠️ Please provide an email."), gr.update(visible=False)
    
    subject = urllib.parse.quote(f"Architecture Review Request: {company}")
    body = urllib.parse.quote(
        f"Hi AnkTechsol Team,{NL}{NL}"
        f"I just used your AI Model Selection Wizard and would like a custom architecture PDF and cost estimate for my project.{NL}{NL}"
        f"Name: {name}{NL}"
        f"Company: {company}{NL}"
        f"Current Infrastructure: {infra}{NL}{NL}"
        f"Looking forward to hearing from you!"
    )
    mailto_url = f"mailto:colab@anktechsol.com?subject={subject}&body={body}"
    
    msg = f"βœ… Thanks {name}! Click the button below to open your mail client and send your request to our architecture team."
    return msg, gr.update(value=f"Open Mail Client to Send βœ‰οΈ", link=mailto_url, visible=True)

with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", secondary_hue="slate"), title="AI Model Picker | AnkTechsol") as demo:
    gr.Markdown("# πŸš€ AI Model Selection Wizard")
    gr.Markdown("**Pick the perfect AI brain for your use case.** This tool queries the HuggingFace Hub live and returns the top trending models for your exact task - no hardcoded lists. By [AnkTechsol](https://anktechsol.com).")
    
    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### πŸ” Step 1: Describe Your Task")
            modality = gr.CheckboxGroup(label="What inputs are you working with?", choices=["Text in -> text out", "Images in (docs, UI, scans)", "Audio in (speech)", "Structured data (tables, logs)", "Code"])
            outputs = gr.CheckboxGroup(label="What output do you need?", choices=["Natural language answer / summary", "Classification / tagging", "Field extraction from text/PDF", "Content generation (copy, emails)", "Scoring / ranking / decision"])
            domains = gr.CheckboxGroup(label="Your Industry / Domain", choices=["General / consumer", "Ecommerce / SaaS", "Finance", "Healthcare", "Legal", "Internal enterprise knowledge"])

        with gr.Column(scale=1):
            gr.Markdown("### βš™οΈ Step 2: Set Your Constraints")
            data_sensitivity = gr.Radio(label="Data Privacy Requirements", choices=["Standard SaaS API is fine", "Must stay in a specific cloud region", "Strict: prefer on-prem / VPC only"], value="Standard SaaS API is fine")
            volume = gr.Radio(label="Expected Daily Request Volume", choices=["< 1,000", "1,000 - 100,000", "100,000+"], value="< 1,000")
            latency = gr.Radio(label="Latency Requirement", choices=["< 500 ms (Instant)", "0.5 - 5 s (Standard)", "> 5 s (Batch)"], value="0.5 - 5 s (Standard)")
            context_size = gr.Radio(label="Max Context Window Needed", choices=["< 4K tokens (Short)", "4K - 32K tokens (Medium)", "32K - 200K tokens (Long)"], value="4K - 32K tokens (Medium)")
            customization = gr.CheckboxGroup(label="Customization Needs", choices=["Prompt engineering only", "Style fine-tuning", "RAG + Tool Calling"])

    gr.Markdown("### 🎁 What you'll get:")
    gr.Markdown("- **Top 5 Live Models** from HF Hub matched to your task" + NL +
                "- **Optimized Deployment Path** (Cloud vs On-Prem)" + NL +
                "- **3-Week Implementation Roadmap**" + NL +
                "- **Tailored Cost & Latency Pro-Tips**")

    submit_btn = gr.Button("✨ Fetch Live Models and Generate My Strategy", variant="primary", size="lg")
    gr.Markdown("> πŸ• *Querying HuggingFace Hub live - may take 5-10 seconds. Please wait after clicking.*")
    
    gr.Markdown("---")
    output_md = gr.Markdown(label="Your Live AI Strategy")
    
    with gr.Column(visible=False) as lead_section:
        gr.Markdown("---")
        gr.Markdown("### πŸ“§ Get Your Custom Architecture PDF")
        gr.Markdown("We'll take your results above and generate a detailed 1-page architecture diagram and cost estimate for your stack.")
        with gr.Row():
            name = gr.Textbox(label="Name", placeholder="Anuj Karn")
            email = gr.Textbox(label="Work Email", placeholder="anuj@anktechsol.com")
        with gr.Row():
            company = gr.Textbox(label="Company / Project", placeholder="AnkTechsol")
            infra = gr.Dropdown(label="Current Infra", choices=["AWS", "GCP", "Azure", "On-Prem", "Other/None"], value="AWS")
        
        lead_btn = gr.Button("πŸ“₯ Generate My Architecture Request", variant="primary")
        lead_status = gr.Markdown("")
        mail_btn = gr.Button("Open Mail Client βœ‰οΈ", visible=False, variant="secondary")

    submit_btn.click(
        fn=recommend_model,
        inputs=[modality, outputs, domains, data_sensitivity, volume, latency, context_size, customization],
        outputs=[output_md, lead_section]
    )

    lead_btn.click(
        fn=handle_lead,
        inputs=[name, email, company, infra],
        outputs=[lead_status, mail_btn]
    )

    gr.Markdown("---")
    gr.Markdown("### βœ‰οΈ Ready to build?")
    gr.Markdown("Contact us at [**colab@anktechsol.com**](mailto:colab@anktechsol.com) to review these results with our architecture team and map your production roadmap.")

    gr.Markdown("---")
    gr.Markdown("### πŸ› οΈ Why Teams Trust AnkTechsol")
    gr.Markdown("We help startups and enterprises go from AI confusion to production-grade AI systems. Fine-tuning, RAG pipelines, GPU inferencing as a service - we deliver.")
    gr.Markdown("**[Visit anktechsol.com](https://anktechsol.com) | [LinkedIn](https://www.linkedin.com/company/anktechsol)**")

if __name__ == "__main__":
    demo.launch()