Spaces:
Sleeping
Sleeping
| """ | |
| Model Selector - Find the right HuggingFace model for your task. | |
| Answer a few questions and get personalized model recommendations. | |
| """ | |
| import gradio as gr | |
| from huggingface_hub import HfApi, list_models | |
| from typing import Optional | |
| # --------------------------------------------------------------------------- | |
| # Task Categories and Model Recommendations | |
| # --------------------------------------------------------------------------- | |
| TASKS = { | |
| "Text Generation": { | |
| "id": "text-generation", | |
| "description": "Generate text, stories, code, or continue prompts", | |
| "use_cases": ["Chatbots", "Content writing", "Code completion", "Story generation"], | |
| "top_models": [ | |
| {"name": "meta-llama/Llama-3.1-8B-Instruct", "size": "8B", "license": "llama3.1"}, | |
| {"name": "mistralai/Mistral-7B-Instruct-v0.3", "size": "7B", "license": "apache-2.0"}, | |
| {"name": "Qwen/Qwen2.5-7B-Instruct", "size": "7B", "license": "apache-2.0"}, | |
| {"name": "google/gemma-2-9b-it", "size": "9B", "license": "gemma"}, | |
| {"name": "microsoft/phi-3-mini-4k-instruct", "size": "3.8B", "license": "mit"}, | |
| ] | |
| }, | |
| "Text Classification": { | |
| "id": "text-classification", | |
| "description": "Classify text into categories (sentiment, topic, intent)", | |
| "use_cases": ["Sentiment analysis", "Spam detection", "Topic classification", "Intent detection"], | |
| "top_models": [ | |
| {"name": "distilbert-base-uncased-finetuned-sst-2-english", "size": "67M", "license": "apache-2.0"}, | |
| {"name": "cardiffnlp/twitter-roberta-base-sentiment-latest", "size": "125M", "license": "mit"}, | |
| {"name": "facebook/bart-large-mnli", "size": "400M", "license": "mit"}, | |
| {"name": "MoritzLaworoutedistilbert-base-uncased-sentiment", "size": "67M", "license": "apache-2.0"}, | |
| ] | |
| }, | |
| "Question Answering": { | |
| "id": "question-answering", | |
| "description": "Answer questions based on context or knowledge", | |
| "use_cases": ["Customer support", "Document QA", "Knowledge retrieval", "FAQ bots"], | |
| "top_models": [ | |
| {"name": "deepset/roberta-base-squad2", "size": "125M", "license": "cc-by-4.0"}, | |
| {"name": "distilbert-base-cased-distilled-squad", "size": "67M", "license": "apache-2.0"}, | |
| {"name": "google/flan-t5-base", "size": "250M", "license": "apache-2.0"}, | |
| {"name": "Intel/dynamic_tinybert", "size": "15M", "license": "apache-2.0"}, | |
| ] | |
| }, | |
| "Translation": { | |
| "id": "translation", | |
| "description": "Translate text between languages", | |
| "use_cases": ["Multilingual apps", "Document translation", "Real-time translation"], | |
| "top_models": [ | |
| {"name": "facebook/nllb-200-distilled-600M", "size": "600M", "license": "cc-by-nc-4.0"}, | |
| {"name": "Helsinki-NLP/opus-mt-en-de", "size": "74M", "license": "apache-2.0"}, | |
| {"name": "google/madlad400-3b-mt", "size": "3B", "license": "apache-2.0"}, | |
| {"name": "facebook/mbart-large-50-many-to-many-mmt", "size": "611M", "license": "mit"}, | |
| ] | |
| }, | |
| "Summarization": { | |
| "id": "summarization", | |
| "description": "Summarize long documents or articles", | |
| "use_cases": ["News summarization", "Document condensing", "Meeting notes", "Research papers"], | |
| "top_models": [ | |
| {"name": "facebook/bart-large-cnn", "size": "400M", "license": "mit"}, | |
| {"name": "google/pegasus-xsum", "size": "568M", "license": "apache-2.0"}, | |
| {"name": "philschmid/bart-large-cnn-samsum", "size": "400M", "license": "mit"}, | |
| {"name": "google/flan-t5-large", "size": "780M", "license": "apache-2.0"}, | |
| ] | |
| }, | |
| "Image Classification": { | |
| "id": "image-classification", | |
| "description": "Classify images into categories", | |
| "use_cases": ["Product categorization", "Medical imaging", "Quality control", "Content moderation"], | |
| "top_models": [ | |
| {"name": "google/vit-base-patch16-224", "size": "86M", "license": "apache-2.0"}, | |
| {"name": "microsoft/resnet-50", "size": "25M", "license": "apache-2.0"}, | |
| {"name": "facebook/convnext-base-224", "size": "88M", "license": "apache-2.0"}, | |
| {"name": "timm/efficientnet_b0.ra_in1k", "size": "5M", "license": "apache-2.0"}, | |
| ] | |
| }, | |
| "Object Detection": { | |
| "id": "object-detection", | |
| "description": "Detect and locate objects in images", | |
| "use_cases": ["Autonomous vehicles", "Security cameras", "Inventory management", "Sports analytics"], | |
| "top_models": [ | |
| {"name": "facebook/detr-resnet-50", "size": "41M", "license": "apache-2.0"}, | |
| {"name": "hustvl/yolos-tiny", "size": "6M", "license": "apache-2.0"}, | |
| {"name": "microsoft/table-transformer-detection", "size": "42M", "license": "mit"}, | |
| {"name": "facebook/detr-resnet-101", "size": "60M", "license": "apache-2.0"}, | |
| ] | |
| }, | |
| "Image Generation": { | |
| "id": "text-to-image", | |
| "description": "Generate images from text descriptions", | |
| "use_cases": ["Art creation", "Product visualization", "Marketing content", "Game assets"], | |
| "top_models": [ | |
| {"name": "stabilityai/stable-diffusion-xl-base-1.0", "size": "6.9B", "license": "openrail++"}, | |
| {"name": "black-forest-labs/FLUX.1-schnell", "size": "12B", "license": "apache-2.0"}, | |
| {"name": "runwayml/stable-diffusion-v1-5", "size": "1B", "license": "creativeml-openrail-m"}, | |
| {"name": "stabilityai/sdxl-turbo", "size": "6.9B", "license": "openrail++"}, | |
| ] | |
| }, | |
| "Speech Recognition": { | |
| "id": "automatic-speech-recognition", | |
| "description": "Convert speech to text", | |
| "use_cases": ["Transcription", "Voice commands", "Meeting notes", "Accessibility"], | |
| "top_models": [ | |
| {"name": "openai/whisper-large-v3", "size": "1.5B", "license": "apache-2.0"}, | |
| {"name": "openai/whisper-medium", "size": "769M", "license": "apache-2.0"}, | |
| {"name": "openai/whisper-small", "size": "244M", "license": "apache-2.0"}, | |
| {"name": "facebook/wav2vec2-base-960h", "size": "95M", "license": "apache-2.0"}, | |
| ] | |
| }, | |
| "Embeddings": { | |
| "id": "feature-extraction", | |
| "description": "Generate embeddings for semantic search and similarity", | |
| "use_cases": ["Semantic search", "Recommendation systems", "Clustering", "RAG systems"], | |
| "top_models": [ | |
| {"name": "sentence-transformers/all-MiniLM-L6-v2", "size": "22M", "license": "apache-2.0"}, | |
| {"name": "sentence-transformers/all-mpnet-base-v2", "size": "109M", "license": "apache-2.0"}, | |
| {"name": "BAAI/bge-small-en-v1.5", "size": "33M", "license": "mit"}, | |
| {"name": "intfloat/e5-small-v2", "size": "33M", "license": "mit"}, | |
| ] | |
| }, | |
| } | |
| SIZE_PREFERENCES = { | |
| "Tiny (< 100M)": {"min": 0, "max": 100}, | |
| "Small (100M - 500M)": {"min": 100, "max": 500}, | |
| "Medium (500M - 2B)": {"min": 500, "max": 2000}, | |
| "Large (2B - 10B)": {"min": 2000, "max": 10000}, | |
| "Any size": {"min": 0, "max": 100000}, | |
| } | |
| # --------------------------------------------------------------------------- | |
| # Core Functions | |
| # --------------------------------------------------------------------------- | |
| def get_recommendations( | |
| task: str, | |
| size_pref: str, | |
| priority: str, | |
| use_case: str | |
| ) -> tuple[str, str]: | |
| """Get model recommendations based on user preferences.""" | |
| if task not in TASKS: | |
| return "Please select a task.", "" | |
| task_info = TASKS[task] | |
| models = task_info["top_models"] | |
| # Filter by size if preference is set | |
| size_range = SIZE_PREFERENCES.get(size_pref, SIZE_PREFERENCES["Any size"]) | |
| def parse_size(size_str): | |
| """Parse size string to millions.""" | |
| size_str = size_str.upper() | |
| if 'B' in size_str: | |
| return float(size_str.replace('B', '')) * 1000 | |
| elif 'M' in size_str: | |
| return float(size_str.replace('M', '')) | |
| return 0 | |
| if size_pref != "Any size": | |
| models = [m for m in models if size_range["min"] <= parse_size(m["size"]) <= size_range["max"]] | |
| if not models: | |
| return "No models match your size preference. Try 'Any size'.", "" | |
| # Sort by priority | |
| if priority == "Smallest/Fastest": | |
| models = sorted(models, key=lambda x: parse_size(x["size"])) | |
| elif priority == "Most Popular": | |
| # Keep original order (already sorted by popularity) | |
| pass | |
| elif priority == "Best Quality": | |
| # Larger models tend to be higher quality | |
| models = sorted(models, key=lambda x: parse_size(x["size"]), reverse=True) | |
| # Build recommendation output | |
| recs = [] | |
| recs.append(f"## Recommendations for: {task}\n") | |
| recs.append(f"*{task_info['description']}*\n") | |
| if use_case: | |
| recs.append(f"**Your use case:** {use_case}\n") | |
| recs.append("---\n") | |
| for i, model in enumerate(models[:4], 1): | |
| recs.append(f"### {i}. {model['name']}") | |
| recs.append(f"- **Size:** {model['size']} parameters") | |
| recs.append(f"- **License:** {model['license']}") | |
| recs.append(f"- **Link:** [View on HuggingFace](https://huggingface.co/{model['name']})") | |
| recs.append("") | |
| # Build code example | |
| code = generate_code_example(task, models[0] if models else None) | |
| return "\n".join(recs), code | |
| def generate_code_example(task: str, model: Optional[dict]) -> str: | |
| """Generate code example for using the recommended model.""" | |
| if not model: | |
| return "" | |
| model_name = model["name"] | |
| code_templates = { | |
| "Text Generation": f'''```python | |
| from transformers import pipeline | |
| generator = pipeline("text-generation", model="{model_name}") | |
| result = generator( | |
| "Write a story about a robot:", | |
| max_length=100, | |
| num_return_sequences=1 | |
| ) | |
| print(result[0]["generated_text"]) | |
| ```''', | |
| "Text Classification": f'''```python | |
| from transformers import pipeline | |
| classifier = pipeline("text-classification", model="{model_name}") | |
| result = classifier("I love this product! It's amazing!") | |
| print(result) # [{{'label': 'POSITIVE', 'score': 0.99}}] | |
| ```''', | |
| "Question Answering": f'''```python | |
| from transformers import pipeline | |
| qa = pipeline("question-answering", model="{model_name}") | |
| result = qa( | |
| question="What is the capital of France?", | |
| context="France is a country in Europe. Paris is its capital city." | |
| ) | |
| print(result["answer"]) # Paris | |
| ```''', | |
| "Translation": f'''```python | |
| from transformers import pipeline | |
| translator = pipeline("translation", model="{model_name}") | |
| result = translator("Hello, how are you?") | |
| print(result[0]["translation_text"]) | |
| ```''', | |
| "Summarization": f'''```python | |
| from transformers import pipeline | |
| summarizer = pipeline("summarization", model="{model_name}") | |
| long_text = """Your long article text here...""" | |
| result = summarizer(long_text, max_length=130, min_length=30) | |
| print(result[0]["summary_text"]) | |
| ```''', | |
| "Image Classification": f'''```python | |
| from transformers import pipeline | |
| classifier = pipeline("image-classification", model="{model_name}") | |
| result = classifier("path/to/image.jpg") | |
| print(result) # [{{'label': 'cat', 'score': 0.95}}] | |
| ```''', | |
| "Speech Recognition": f'''```python | |
| from transformers import pipeline | |
| transcriber = pipeline("automatic-speech-recognition", model="{model_name}") | |
| result = transcriber("audio.mp3") | |
| print(result["text"]) | |
| ```''', | |
| "Embeddings": f'''```python | |
| from sentence_transformers import SentenceTransformer | |
| model = SentenceTransformer("{model_name}") | |
| sentences = ["This is a sentence", "This is another sentence"] | |
| embeddings = model.encode(sentences) | |
| print(embeddings.shape) # (2, 384) | |
| ```''', | |
| } | |
| return code_templates.get(task, f'''```python | |
| from transformers import pipeline | |
| pipe = pipeline("{TASKS[task]['id']}", model="{model_name}") | |
| result = pipe("Your input here") | |
| print(result) | |
| ```''') | |
| # --------------------------------------------------------------------------- | |
| # Gradio Interface | |
| # --------------------------------------------------------------------------- | |
| with gr.Blocks(title="Model Selector", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown(""" | |
| # Model Selector | |
| Find the perfect HuggingFace model for your task. Answer a few questions | |
| and get personalized recommendations with code examples. | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| task_select = gr.Dropdown( | |
| choices=list(TASKS.keys()), | |
| label="What do you want to do?", | |
| value="Text Generation" | |
| ) | |
| task_description = gr.Markdown( | |
| value=f"*{TASKS['Text Generation']['description']}*" | |
| ) | |
| size_select = gr.Dropdown( | |
| choices=list(SIZE_PREFERENCES.keys()), | |
| label="Model size preference?", | |
| value="Any size", | |
| info="Smaller = faster, larger = higher quality" | |
| ) | |
| priority_select = gr.Radio( | |
| choices=["Most Popular", "Smallest/Fastest", "Best Quality"], | |
| label="What matters most?", | |
| value="Most Popular" | |
| ) | |
| use_case = gr.Textbox( | |
| label="Describe your use case (optional)", | |
| placeholder="e.g., Customer support chatbot for e-commerce" | |
| ) | |
| recommend_btn = gr.Button("Get Recommendations", variant="primary", size="lg") | |
| with gr.Column(scale=1): | |
| recommendations = gr.Markdown(label="Recommendations") | |
| code_example = gr.Markdown(label="Code Example") | |
| # Use cases display | |
| use_cases_display = gr.Markdown( | |
| value=f"**Common use cases:** {', '.join(TASKS['Text Generation']['use_cases'])}" | |
| ) | |
| # Event handlers | |
| def update_task_info(task): | |
| desc = f"*{TASKS[task]['description']}*" | |
| uses = f"**Common use cases:** {', '.join(TASKS[task]['use_cases'])}" | |
| return desc, uses | |
| task_select.change( | |
| fn=update_task_info, | |
| inputs=[task_select], | |
| outputs=[task_description, use_cases_display] | |
| ) | |
| recommend_btn.click( | |
| fn=get_recommendations, | |
| inputs=[task_select, size_select, priority_select, use_case], | |
| outputs=[recommendations, code_example] | |
| ) | |
| gr.Markdown(""" | |
| --- | |
| ### Quick Reference | |
| | Task | Best For | Typical Size | | |
| |------|----------|--------------| | |
| | Text Generation | Chatbots, content | 3B - 70B | | |
| | Text Classification | Sentiment, topics | 50M - 300M | | |
| | Embeddings | Search, RAG | 20M - 100M | | |
| | Speech Recognition | Transcription | 200M - 1.5B | | |
| | Image Generation | Art, visualization | 1B - 12B | | |
| --- | |
| Built by [Lorenzo Scaturchio](https://huggingface.co/gr8monk3ys) | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() | |