|
|
|
|
|
""" |
|
|
Model Selection Helper for LLM API |
|
|
|
|
|
This script helps users choose the right model based on their requirements. |
|
|
""" |
|
|
|
|
|
import os |
|
|
import sys |
|
|
from typing import Dict, List, Any |
|
|
|
|
|
|
|
|
MODEL_CONFIGS = { |
|
|
"phi-2": { |
|
|
"name": "microsoft/phi-2", |
|
|
"type": "transformers", |
|
|
"context_window": 2048, |
|
|
"prompt_format": "phi", |
|
|
"description": "Microsoft Phi-2 (2.7B) - Excellent reasoning and coding", |
|
|
"size_mb": 1700, |
|
|
"speed_rating": 9, |
|
|
"quality_rating": 9, |
|
|
"stop_sequences": ["<|endoftext|>", "Human:", "Assistant:"], |
|
|
"parameters": "2.7B" |
|
|
}, |
|
|
"tinyllama": { |
|
|
"name": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", |
|
|
"type": "transformers", |
|
|
"context_window": 2048, |
|
|
"prompt_format": "llama", |
|
|
"description": "TinyLlama 1.1B - Ultra-lightweight and fast", |
|
|
"size_mb": 700, |
|
|
"speed_rating": 10, |
|
|
"quality_rating": 7, |
|
|
"stop_sequences": ["[INST]", "[/INST]", "</s>"], |
|
|
"parameters": "1.1B" |
|
|
}, |
|
|
"qwen2.5-3b": { |
|
|
"name": "Qwen/Qwen2.5-3B-Instruct", |
|
|
"type": "transformers", |
|
|
"context_window": 32768, |
|
|
"prompt_format": "qwen", |
|
|
"description": "Qwen2.5 3B - Excellent multilingual support", |
|
|
"size_mb": 2000, |
|
|
"speed_rating": 8, |
|
|
"quality_rating": 8, |
|
|
"stop_sequences": ["<|endoftext|>", "<|im_end|>"], |
|
|
"parameters": "3B" |
|
|
}, |
|
|
"gemma-2b": { |
|
|
"name": "google/gemma-2b-it", |
|
|
"type": "transformers", |
|
|
"context_window": 8192, |
|
|
"prompt_format": "gemma", |
|
|
"description": "Google Gemma 2B - Good balance of speed and quality", |
|
|
"size_mb": 1500, |
|
|
"speed_rating": 8, |
|
|
"quality_rating": 7, |
|
|
"stop_sequences": ["<end_of_turn>", "<start_of_turn>"], |
|
|
"parameters": "2B" |
|
|
}, |
|
|
"llama-2-7b": { |
|
|
"name": "models/llama-2-7b-chat.gguf", |
|
|
"type": "llama_cpp", |
|
|
"context_window": 4096, |
|
|
"prompt_format": "llama", |
|
|
"description": "LLaMA 2 7B Chat - Balanced performance", |
|
|
"size_mb": 4000, |
|
|
"speed_rating": 6, |
|
|
"quality_rating": 8, |
|
|
"stop_sequences": ["[INST]", "[/INST]", "</s>"], |
|
|
"parameters": "7B" |
|
|
}, |
|
|
"mistral-7b": { |
|
|
"name": "mistralai/Mistral-7B-Instruct-v0.2", |
|
|
"type": "transformers", |
|
|
"context_window": 32768, |
|
|
"prompt_format": "mistral", |
|
|
"description": "Mistral 7B - Excellent performance", |
|
|
"size_mb": 4000, |
|
|
"speed_rating": 6, |
|
|
"quality_rating": 9, |
|
|
"stop_sequences": ["</s>", "[INST]", "[/INST]"], |
|
|
"parameters": "7B" |
|
|
}, |
|
|
"llama-2-13b": { |
|
|
"name": "models/llama-2-13b-chat.gguf", |
|
|
"type": "llama_cpp", |
|
|
"context_window": 4096, |
|
|
"prompt_format": "llama", |
|
|
"description": "LLaMA 2 13B Chat - High quality", |
|
|
"size_mb": 8000, |
|
|
"speed_rating": 4, |
|
|
"quality_rating": 9, |
|
|
"stop_sequences": ["[INST]", "[/INST]", "</s>"], |
|
|
"parameters": "13B" |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
def print_model_table(): |
|
|
"""Print a formatted table of all available models.""" |
|
|
print("\nπ Available Models:") |
|
|
print("=" * 120) |
|
|
print(f"{'Model ID':<15} {'Parameters':<10} {'Size (MB)':<10} {'Speed':<6} {'Quality':<8} {'Type':<12} {'Context':<8}") |
|
|
print("-" * 120) |
|
|
|
|
|
for model_id, config in MODEL_CONFIGS.items(): |
|
|
print(f"{model_id:<15} {config['parameters']:<10} {config['size_mb']:<10} " |
|
|
f"{config['speed_rating']:<6} {config['quality_rating']:<8} " |
|
|
f"{config['type']:<12} {config['context_window']:<8}") |
|
|
|
|
|
print("=" * 120) |
|
|
|
|
|
|
|
|
def print_model_details(model_id: str): |
|
|
"""Print detailed information about a specific model.""" |
|
|
if model_id not in MODEL_CONFIGS: |
|
|
print(f"β Model '{model_id}' not found!") |
|
|
return |
|
|
|
|
|
config = MODEL_CONFIGS[model_id] |
|
|
print(f"\nπ Model Details: {model_id}") |
|
|
print("=" * 50) |
|
|
print(f"Description: {config['description']}") |
|
|
print(f"Parameters: {config['parameters']}") |
|
|
print(f"Size: {config['size_mb']} MB") |
|
|
print(f"Speed Rating: {config['speed_rating']}/10") |
|
|
print(f"Quality Rating: {config['quality_rating']}/10") |
|
|
print(f"Type: {config['type']}") |
|
|
print(f"Context Window: {config['context_window']} tokens") |
|
|
print(f"Prompt Format: {config['prompt_format']}") |
|
|
print(f"Stop Sequences: {config['stop_sequences']}") |
|
|
|
|
|
|
|
|
def get_recommendations(use_case: str = "general") -> List[str]: |
|
|
"""Get model recommendations based on use case.""" |
|
|
recommendations = { |
|
|
"speed": ["tinyllama", "phi-2", "gemma-2b"], |
|
|
"quality": ["mistral-7b", "llama-2-13b", "qwen2.5-3b"], |
|
|
"balanced": ["phi-2", "qwen2.5-3b", "llama-2-7b"], |
|
|
"coding": ["phi-2", "qwen2.5-3b", "mistral-7b"], |
|
|
"multilingual": ["qwen2.5-3b", "mistral-7b", "llama-2-7b"], |
|
|
"general": ["phi-2", "qwen2.5-3b", "llama-2-7b"] |
|
|
} |
|
|
|
|
|
return recommendations.get(use_case, recommendations["general"]) |
|
|
|
|
|
|
|
|
def print_recommendations(use_case: str = "general"): |
|
|
"""Print model recommendations for a specific use case.""" |
|
|
recs = get_recommendations(use_case) |
|
|
print(f"\nπ― Recommendations for {use_case} use case:") |
|
|
print("=" * 50) |
|
|
|
|
|
for i, model_id in enumerate(recs, 1): |
|
|
config = MODEL_CONFIGS[model_id] |
|
|
print(f"{i}. {model_id} ({config['parameters']}) - {config['description']}") |
|
|
print(f" Speed: {config['speed_rating']}/10, Quality: {config['quality_rating']}/10, Size: {config['size_mb']}MB") |
|
|
|
|
|
|
|
|
def main(): |
|
|
"""Main function to handle command line arguments.""" |
|
|
if len(sys.argv) == 1: |
|
|
|
|
|
print(""" |
|
|
π― LLM Model Selector |
|
|
|
|
|
Usage: |
|
|
python model_selector.py list # List all models |
|
|
python model_selector.py details <model_id> # Show model details |
|
|
python model_selector.py recommend <use_case> # Get recommendations |
|
|
python model_selector.py set <model_id> # Set model for API |
|
|
|
|
|
Use cases: |
|
|
speed, quality, balanced, coding, multilingual, general |
|
|
|
|
|
Examples: |
|
|
python model_selector.py list |
|
|
python model_selector.py details phi-2 |
|
|
python model_selector.py recommend coding |
|
|
python model_selector.py set phi-2 |
|
|
""") |
|
|
return |
|
|
|
|
|
command = sys.argv[1].lower() |
|
|
|
|
|
if command == "list": |
|
|
print_model_table() |
|
|
|
|
|
elif command == "details" and len(sys.argv) == 3: |
|
|
model_id = sys.argv[2] |
|
|
print_model_details(model_id) |
|
|
|
|
|
elif command == "recommend" and len(sys.argv) == 3: |
|
|
use_case = sys.argv[2] |
|
|
print_recommendations(use_case) |
|
|
|
|
|
elif command == "set" and len(sys.argv) == 3: |
|
|
model_id = sys.argv[2] |
|
|
if model_id in MODEL_CONFIGS: |
|
|
|
|
|
os.environ["MODEL_NAME"] = model_id |
|
|
print(f"β
Model set to: {model_id}") |
|
|
print(f"π Run: export MODEL_NAME={model_id}") |
|
|
print(f"π Or start server with: MODEL_NAME={model_id} uvicorn app.main:app --reload") |
|
|
else: |
|
|
print(f"β Model '{model_id}' not found!") |
|
|
print("Use 'python model_selector.py list' to see available models") |
|
|
|
|
|
else: |
|
|
print("β Invalid command. Use 'python model_selector.py' for help.") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|