Spaces:

david167
/

question-generation-api

Sleeping

File size: 2,544 Bytes

6ea58d5

# Model upgrade options for better long-form content interpretation

# OPTION 1: Llama 3.1 70B (Best Quality - if you have compute)
LLAMA_70B = {
    "model_name": "meta-llama/Llama-3.1-70B-Instruct",
    "context_window": "128K tokens",
    "quality": "Excellent - best for complex content",
    "speed": "Moderate (2-4x slower than 7B)",
    "memory_required": "~35GB VRAM",
    "fits_on_a100": True,
    "upgrade_difficulty": "Easy - just change model name"
}

# OPTION 2: Qwen2.5-32B (Best Balance)
QWEN_32B = {
    "model_name": "Qwen/Qwen2.5-32B-Instruct",
    "context_window": "128K tokens", 
    "quality": "Excellent - specialized for reasoning",
    "speed": "Fast (1.5-2x slower than 7B)",
    "memory_required": "~16GB VRAM",
    "fits_on_a100": True,
    "upgrade_difficulty": "Easy - just change model name"
}

# OPTION 3: Llama 3.1 8B (Easy Upgrade)
LLAMA_8B = {
    "model_name": "meta-llama/Llama-3.1-8B-Instruct", 
    "context_window": "128K tokens",
    "quality": "Very good - better than Mistral-7B",
    "speed": "Fast (similar to current)",
    "memory_required": "~8GB VRAM",
    "fits_on_a100": True,
    "upgrade_difficulty": "Trivial - just change model name"
}

# OPTION 4: Claude 3.5 Sonnet via API (Best Overall)
CLAUDE_API = {
    "model_name": "claude-3-5-sonnet-20241022",
    "context_window": "200K tokens",
    "quality": "Excellent - best for nuanced questions",
    "speed": "Very fast via API",
    "memory_required": "0GB (API-based)",
    "cost": "$3 per million input tokens",
    "upgrade_difficulty": "Medium - requires API integration"
}

def get_recommended_upgrade():
    """Get the best upgrade based on priorities"""
    
    recommendations = {
        "best_quality": LLAMA_70B,
        "best_balance": QWEN_32B, 
        "easiest_upgrade": LLAMA_8B,
        "best_overall": CLAUDE_API
    }
    
    return recommendations

# Context window comparison
CONTEXT_COMPARISON = {
    "Current Mistral-7B": "32K tokens",
    "Llama 3.1 8B": "128K tokens (4x more)",
    "Llama 3.1 70B": "128K tokens (4x more)", 
    "Qwen2.5-32B": "128K tokens (4x more)",
    "Claude 3.5 Sonnet": "200K tokens (6x more)"
}

# Performance for long-form content
LONG_FORM_PERFORMANCE = {
    "Mistral-7B": "Good for simple questions",
    "Llama 3.1 8B": "Better reasoning, longer context",
    "Qwen2.5-32B": "Excellent reasoning, great for complex content",
    "Llama 3.1 70B": "Superior understanding, best for nuanced questions",
    "Claude 3.5 Sonnet": "Best overall, excellent at context understanding"
}