Spaces:

gr8monk3ys
/

model-selector

Sleeping

App Files Files Community

gr8monk3ys commited on Feb 1

Commit

11e4b92

verified ·

1 Parent(s): 211b3cb

Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

README.md +78 -5
app.py +397 -0
requirements.txt +2 -0

README.md CHANGED Viewed

@@ -1,12 +1,85 @@
 ---
 title: Model Selector
-emoji: 📉
-colorFrom: indigo
-colorTo: gray
 sdk: gradio
-sdk_version: 6.5.1
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Model Selector
+emoji: 🎯
+colorFrom: yellow
+colorTo: red
 sdk: gradio
+sdk_version: 5.9.1
+python_version: "3.10"
 app_file: app.py
 pinned: false
+license: mit
+short_description: Find the perfect HuggingFace model for your task
 ---
+# Model Selector
+Find the perfect HuggingFace model for your task. Answer a few simple questions and get personalized recommendations with ready-to-use code examples.
+## Features
+### 10 Task Categories
+- **Text Generation** - Chatbots, content writing, code
+- **Text Classification** - Sentiment, spam, topics
+- **Question Answering** - Document QA, FAQs
+- **Translation** - 200+ languages
+- **Summarization** - Articles, documents
+- **Image Classification** - Photos, medical images
+- **Object Detection** - Detect objects in images
+- **Image Generation** - Create images from text
+- **Speech Recognition** - Audio to text
+- **Embeddings** - Semantic search, RAG
+### Smart Filtering
+- Filter by model size (tiny to large)
+- Prioritize by speed, quality, or popularity
+- Get recommendations tailored to your use case
+### Ready-to-Use Code
+Every recommendation includes:
+- Working Python code example
+- Direct link to the model
+- License information
+- Size/speed tradeoffs
+## How to Use
+1. **Select your task** (e.g., Text Generation)
+2. **Choose size preference** based on your hardware
+3. **Set priority** (speed, quality, or popularity)
+4. **Describe your use case** (optional)
+5. **Get recommendations** with code examples!
+## Example Output
+For "Text Generation" with "Small" size preference:
+| Rank | Model | Size | License |
+|------|-------|------|---------|
+| 1 | microsoft/phi-3-mini | 3.8B | MIT |
+| 2 | Qwen/Qwen2.5-3B-Instruct | 3B | Apache |
+| 3 | mistralai/Mistral-7B | 7B | Apache |
+## Quick Reference
+| Task | Typical Size | Best For |
+|------|--------------|----------|
+| Text Generation | 3B - 70B | Chatbots, content |
+| Classification | 50M - 300M | Sentiment, spam |
+| Embeddings | 20M - 100M | Search, RAG |
+| Speech | 200M - 1.5B | Transcription |
+## Why Use This Tool?
+- **Save time** - Don't search through thousands of models
+- **Avoid mistakes** - Get proven, popular models
+- **Quick start** - Copy-paste code examples
+- **Right-sized** - Match models to your hardware
+## License
+MIT
+## Author
+Built by [Lorenzo Scaturchio](https://huggingface.co/gr8monk3ys)

app.py ADDED Viewed

	@@ -0,0 +1,397 @@

+"""
+Model Selector - Find the right HuggingFace model for your task.
+Answer a few questions and get personalized model recommendations.
+"""
+import gradio as gr
+from huggingface_hub import HfApi, list_models
+from typing import Optional
+# ---------------------------------------------------------------------------
+# Task Categories and Model Recommendations
+# ---------------------------------------------------------------------------
+TASKS = {
+    "Text Generation": {
+        "id": "text-generation",
+        "description": "Generate text, stories, code, or continue prompts",
+        "use_cases": ["Chatbots", "Content writing", "Code completion", "Story generation"],
+        "top_models": [
+            {"name": "meta-llama/Llama-3.1-8B-Instruct", "size": "8B", "license": "llama3.1"},
+            {"name": "mistralai/Mistral-7B-Instruct-v0.3", "size": "7B", "license": "apache-2.0"},
+            {"name": "Qwen/Qwen2.5-7B-Instruct", "size": "7B", "license": "apache-2.0"},
+            {"name": "google/gemma-2-9b-it", "size": "9B", "license": "gemma"},
+            {"name": "microsoft/phi-3-mini-4k-instruct", "size": "3.8B", "license": "mit"},
+        ]
+    },
+    "Text Classification": {
+        "id": "text-classification",
+        "description": "Classify text into categories (sentiment, topic, intent)",
+        "use_cases": ["Sentiment analysis", "Spam detection", "Topic classification", "Intent detection"],
+        "top_models": [
+            {"name": "distilbert-base-uncased-finetuned-sst-2-english", "size": "67M", "license": "apache-2.0"},
+            {"name": "cardiffnlp/twitter-roberta-base-sentiment-latest", "size": "125M", "license": "mit"},
+            {"name": "facebook/bart-large-mnli", "size": "400M", "license": "mit"},
+            {"name": "MoritzLaworoutedistilbert-base-uncased-sentiment", "size": "67M", "license": "apache-2.0"},
+        ]
+    },
+    "Question Answering": {
+        "id": "question-answering",
+        "description": "Answer questions based on context or knowledge",
+        "use_cases": ["Customer support", "Document QA", "Knowledge retrieval", "FAQ bots"],
+        "top_models": [
+            {"name": "deepset/roberta-base-squad2", "size": "125M", "license": "cc-by-4.0"},
+            {"name": "distilbert-base-cased-distilled-squad", "size": "67M", "license": "apache-2.0"},
+            {"name": "google/flan-t5-base", "size": "250M", "license": "apache-2.0"},
+            {"name": "Intel/dynamic_tinybert", "size": "15M", "license": "apache-2.0"},
+        ]
+    },
+    "Translation": {
+        "id": "translation",
+        "description": "Translate text between languages",
+        "use_cases": ["Multilingual apps", "Document translation", "Real-time translation"],
+        "top_models": [
+            {"name": "facebook/nllb-200-distilled-600M", "size": "600M", "license": "cc-by-nc-4.0"},
+            {"name": "Helsinki-NLP/opus-mt-en-de", "size": "74M", "license": "apache-2.0"},
+            {"name": "google/madlad400-3b-mt", "size": "3B", "license": "apache-2.0"},
+            {"name": "facebook/mbart-large-50-many-to-many-mmt", "size": "611M", "license": "mit"},
+        ]
+    },
+    "Summarization": {
+        "id": "summarization",
+        "description": "Summarize long documents or articles",
+        "use_cases": ["News summarization", "Document condensing", "Meeting notes", "Research papers"],
+        "top_models": [
+            {"name": "facebook/bart-large-cnn", "size": "400M", "license": "mit"},
+            {"name": "google/pegasus-xsum", "size": "568M", "license": "apache-2.0"},
+            {"name": "philschmid/bart-large-cnn-samsum", "size": "400M", "license": "mit"},
+            {"name": "google/flan-t5-large", "size": "780M", "license": "apache-2.0"},
+        ]
+    },
+    "Image Classification": {
+        "id": "image-classification",
+        "description": "Classify images into categories",
+        "use_cases": ["Product categorization", "Medical imaging", "Quality control", "Content moderation"],
+        "top_models": [
+            {"name": "google/vit-base-patch16-224", "size": "86M", "license": "apache-2.0"},
+            {"name": "microsoft/resnet-50", "size": "25M", "license": "apache-2.0"},
+            {"name": "facebook/convnext-base-224", "size": "88M", "license": "apache-2.0"},
+            {"name": "timm/efficientnet_b0.ra_in1k", "size": "5M", "license": "apache-2.0"},
+        ]
+    },
+    "Object Detection": {
+        "id": "object-detection",
+        "description": "Detect and locate objects in images",
+        "use_cases": ["Autonomous vehicles", "Security cameras", "Inventory management", "Sports analytics"],
+        "top_models": [
+            {"name": "facebook/detr-resnet-50", "size": "41M", "license": "apache-2.0"},
+            {"name": "hustvl/yolos-tiny", "size": "6M", "license": "apache-2.0"},
+            {"name": "microsoft/table-transformer-detection", "size": "42M", "license": "mit"},
+            {"name": "facebook/detr-resnet-101", "size": "60M", "license": "apache-2.0"},
+        ]
+    },
+    "Image Generation": {
+        "id": "text-to-image",
+        "description": "Generate images from text descriptions",
+        "use_cases": ["Art creation", "Product visualization", "Marketing content", "Game assets"],
+        "top_models": [
+            {"name": "stabilityai/stable-diffusion-xl-base-1.0", "size": "6.9B", "license": "openrail++"},
+            {"name": "black-forest-labs/FLUX.1-schnell", "size": "12B", "license": "apache-2.0"},
+            {"name": "runwayml/stable-diffusion-v1-5", "size": "1B", "license": "creativeml-openrail-m"},
+            {"name": "stabilityai/sdxl-turbo", "size": "6.9B", "license": "openrail++"},
+        ]
+    },
+    "Speech Recognition": {
+        "id": "automatic-speech-recognition",
+        "description": "Convert speech to text",
+        "use_cases": ["Transcription", "Voice commands", "Meeting notes", "Accessibility"],
+        "top_models": [
+            {"name": "openai/whisper-large-v3", "size": "1.5B", "license": "apache-2.0"},
+            {"name": "openai/whisper-medium", "size": "769M", "license": "apache-2.0"},
+            {"name": "openai/whisper-small", "size": "244M", "license": "apache-2.0"},
+            {"name": "facebook/wav2vec2-base-960h", "size": "95M", "license": "apache-2.0"},
+        ]
+    },
+    "Embeddings": {
+        "id": "feature-extraction",
+        "description": "Generate embeddings for semantic search and similarity",
+        "use_cases": ["Semantic search", "Recommendation systems", "Clustering", "RAG systems"],
+        "top_models": [
+            {"name": "sentence-transformers/all-MiniLM-L6-v2", "size": "22M", "license": "apache-2.0"},
+            {"name": "sentence-transformers/all-mpnet-base-v2", "size": "109M", "license": "apache-2.0"},
+            {"name": "BAAI/bge-small-en-v1.5", "size": "33M", "license": "mit"},
+            {"name": "intfloat/e5-small-v2", "size": "33M", "license": "mit"},
+        ]
+    },
+}
+SIZE_PREFERENCES = {
+    "Tiny (< 100M)": {"min": 0, "max": 100},
+    "Small (100M - 500M)": {"min": 100, "max": 500},
+    "Medium (500M - 2B)": {"min": 500, "max": 2000},
+    "Large (2B - 10B)": {"min": 2000, "max": 10000},
+    "Any size": {"min": 0, "max": 100000},
+}
+# ---------------------------------------------------------------------------
+# Core Functions
+# ---------------------------------------------------------------------------
+def get_recommendations(
+    task: str,
+    size_pref: str,
+    priority: str,
+    use_case: str
+) -> tuple[str, str]:
+    """Get model recommendations based on user preferences."""
+    if task not in TASKS:
+        return "Please select a task.", ""
+    task_info = TASKS[task]
+    models = task_info["top_models"]
+    # Filter by size if preference is set
+    size_range = SIZE_PREFERENCES.get(size_pref, SIZE_PREFERENCES["Any size"])
+    def parse_size(size_str):
+        """Parse size string to millions."""
+        size_str = size_str.upper()
+        if 'B' in size_str:
+            return float(size_str.replace('B', '')) * 1000
+        elif 'M' in size_str:
+            return float(size_str.replace('M', ''))
+        return 0
+    if size_pref != "Any size":
+        models = [m for m in models if size_range["min"] <= parse_size(m["size"]) <= size_range["max"]]
+    if not models:
+        return "No models match your size preference. Try 'Any size'.", ""
+    # Sort by priority
+    if priority == "Smallest/Fastest":
+        models = sorted(models, key=lambda x: parse_size(x["size"]))
+    elif priority == "Most Popular":
+        # Keep original order (already sorted by popularity)
+        pass
+    elif priority == "Best Quality":
+        # Larger models tend to be higher quality
+        models = sorted(models, key=lambda x: parse_size(x["size"]), reverse=True)
+    # Build recommendation output
+    recs = []
+    recs.append(f"## Recommendations for: {task}\n")
+    recs.append(f"*{task_info['description']}*\n")
+    if use_case:
+        recs.append(f"**Your use case:** {use_case}\n")
+    recs.append("---\n")
+    for i, model in enumerate(models[:4], 1):
+        recs.append(f"### {i}. {model['name']}")
+        recs.append(f"- **Size:** {model['size']} parameters")
+        recs.append(f"- **License:** {model['license']}")
+        recs.append(f"- **Link:** [View on HuggingFace](https://huggingface.co/{model['name']})")
+        recs.append("")
+    # Build code example
+    code = generate_code_example(task, models[0] if models else None)
+    return "\n".join(recs), code
+def generate_code_example(task: str, model: Optional[dict]) -> str:
+    """Generate code example for using the recommended model."""
+    if not model:
+        return ""
+    model_name = model["name"]
+    code_templates = {
+        "Text Generation": f'''```python
+from transformers import pipeline
+generator = pipeline("text-generation", model="{model_name}")
+result = generator(
+    "Write a story about a robot:",
+    max_length=100,
+    num_return_sequences=1
+)
+print(result[0]["generated_text"])
+```''',
+        "Text Classification": f'''```python
+from transformers import pipeline
+classifier = pipeline("text-classification", model="{model_name}")
+result = classifier("I love this product! It's amazing!")
+print(result)  # [{{'label': 'POSITIVE', 'score': 0.99}}]
+```''',
+        "Question Answering": f'''```python
+from transformers import pipeline
+qa = pipeline("question-answering", model="{model_name}")
+result = qa(
+    question="What is the capital of France?",
+    context="France is a country in Europe. Paris is its capital city."
+)
+print(result["answer"])  # Paris
+```''',
+        "Translation": f'''```python
+from transformers import pipeline
+translator = pipeline("translation", model="{model_name}")
+result = translator("Hello, how are you?")
+print(result[0]["translation_text"])
+```''',
+        "Summarization": f'''```python
+from transformers import pipeline
+summarizer = pipeline("summarization", model="{model_name}")
+long_text = """Your long article text here..."""
+result = summarizer(long_text, max_length=130, min_length=30)
+print(result[0]["summary_text"])
+```''',
+        "Image Classification": f'''```python
+from transformers import pipeline
+classifier = pipeline("image-classification", model="{model_name}")
+result = classifier("path/to/image.jpg")
+print(result)  # [{{'label': 'cat', 'score': 0.95}}]
+```''',
+        "Speech Recognition": f'''```python
+from transformers import pipeline
+transcriber = pipeline("automatic-speech-recognition", model="{model_name}")
+result = transcriber("audio.mp3")
+print(result["text"])
+```''',
+        "Embeddings": f'''```python
+from sentence_transformers import SentenceTransformer
+model = SentenceTransformer("{model_name}")
+sentences = ["This is a sentence", "This is another sentence"]
+embeddings = model.encode(sentences)
+print(embeddings.shape)  # (2, 384)
+```''',
+    }
+    return code_templates.get(task, f'''```python
+from transformers import pipeline
+pipe = pipeline("{TASKS[task]['id']}", model="{model_name}")
+result = pipe("Your input here")
+print(result)
+```''')
+# ---------------------------------------------------------------------------
+# Gradio Interface
+# ---------------------------------------------------------------------------
+with gr.Blocks(title="Model Selector", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("""
+    # Model Selector
+    Find the perfect HuggingFace model for your task. Answer a few questions
+    and get personalized recommendations with code examples.
+    """)
+    with gr.Row():
+        with gr.Column(scale=1):
+            task_select = gr.Dropdown(
+                choices=list(TASKS.keys()),
+                label="What do you want to do?",
+                value="Text Generation"
+            )
+            task_description = gr.Markdown(
+                value=f"*{TASKS['Text Generation']['description']}*"
+            )
+            size_select = gr.Dropdown(
+                choices=list(SIZE_PREFERENCES.keys()),
+                label="Model size preference?",
+                value="Any size",
+                info="Smaller = faster, larger = higher quality"
+            )
+            priority_select = gr.Radio(
+                choices=["Most Popular", "Smallest/Fastest", "Best Quality"],
+                label="What matters most?",
+                value="Most Popular"
+            )
+            use_case = gr.Textbox(
+                label="Describe your use case (optional)",
+                placeholder="e.g., Customer support chatbot for e-commerce"
+            )
+            recommend_btn = gr.Button("Get Recommendations", variant="primary", size="lg")
+        with gr.Column(scale=1):
+            recommendations = gr.Markdown(label="Recommendations")
+            code_example = gr.Markdown(label="Code Example")
+    # Use cases display
+    use_cases_display = gr.Markdown(
+        value=f"**Common use cases:** {', '.join(TASKS['Text Generation']['use_cases'])}"
+    )
+    # Event handlers
+    def update_task_info(task):
+        desc = f"*{TASKS[task]['description']}*"
+        uses = f"**Common use cases:** {', '.join(TASKS[task]['use_cases'])}"
+        return desc, uses
+    task_select.change(
+        fn=update_task_info,
+        inputs=[task_select],
+        outputs=[task_description, use_cases_display]
+    )
+    recommend_btn.click(
+        fn=get_recommendations,
+        inputs=[task_select, size_select, priority_select, use_case],
+        outputs=[recommendations, code_example]
+    )
+    gr.Markdown("""
+    ---
+    ### Quick Reference
+    | Task | Best For | Typical Size |
+    |------|----------|--------------|
+    | Text Generation | Chatbots, content | 3B - 70B |
+    | Text Classification | Sentiment, topics | 50M - 300M |
+    | Embeddings | Search, RAG | 20M - 100M |
+    | Speech Recognition | Transcription | 200M - 1.5B |
+    | Image Generation | Art, visualization | 1B - 12B |
+    ---
+    Built by [Lorenzo Scaturchio](https://huggingface.co/gr8monk3ys)
+    """)
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ gradio>=5.9.1
2	+ huggingface_hub>=0.20.0