Spaces:

david167
/

question-generation-api

Sleeping

App Files Files Community

question-generation-api / upgrade_models.py

david167

Upgrade to Llama 3.1 8B-Instruct for better long-form content

6ea58d5 8 months ago

raw

history blame contribute delete

2.54 kB

	# Model upgrade options for better long-form content interpretation

	# OPTION 1: Llama 3.1 70B (Best Quality - if you have compute)
	LLAMA_70B = {
	"model_name": "meta-llama/Llama-3.1-70B-Instruct",
	"context_window": "128K tokens",
	"quality": "Excellent - best for complex content",
	"speed": "Moderate (2-4x slower than 7B)",
	"memory_required": "~35GB VRAM",
	"fits_on_a100": True,
	"upgrade_difficulty": "Easy - just change model name"
	}

	# OPTION 2: Qwen2.5-32B (Best Balance)
	QWEN_32B = {
	"model_name": "Qwen/Qwen2.5-32B-Instruct",
	"context_window": "128K tokens",
	"quality": "Excellent - specialized for reasoning",
	"speed": "Fast (1.5-2x slower than 7B)",
	"memory_required": "~16GB VRAM",
	"fits_on_a100": True,
	"upgrade_difficulty": "Easy - just change model name"
	}

	# OPTION 3: Llama 3.1 8B (Easy Upgrade)
	LLAMA_8B = {
	"model_name": "meta-llama/Llama-3.1-8B-Instruct",
	"context_window": "128K tokens",
	"quality": "Very good - better than Mistral-7B",
	"speed": "Fast (similar to current)",
	"memory_required": "~8GB VRAM",
	"fits_on_a100": True,
	"upgrade_difficulty": "Trivial - just change model name"
	}

	# OPTION 4: Claude 3.5 Sonnet via API (Best Overall)
	CLAUDE_API = {
	"model_name": "claude-3-5-sonnet-20241022",
	"context_window": "200K tokens",
	"quality": "Excellent - best for nuanced questions",
	"speed": "Very fast via API",
	"memory_required": "0GB (API-based)",
	"cost": "$3 per million input tokens",
	"upgrade_difficulty": "Medium - requires API integration"
	}

	def get_recommended_upgrade():
	"""Get the best upgrade based on priorities"""

	recommendations = {
	"best_quality": LLAMA_70B,
	"best_balance": QWEN_32B,
	"easiest_upgrade": LLAMA_8B,
	"best_overall": CLAUDE_API
	}

	return recommendations

	# Context window comparison
	CONTEXT_COMPARISON = {
	"Current Mistral-7B": "32K tokens",
	"Llama 3.1 8B": "128K tokens (4x more)",
	"Llama 3.1 70B": "128K tokens (4x more)",
	"Qwen2.5-32B": "128K tokens (4x more)",
	"Claude 3.5 Sonnet": "200K tokens (6x more)"
	}

	# Performance for long-form content
	LONG_FORM_PERFORMANCE = {
	"Mistral-7B": "Good for simple questions",
	"Llama 3.1 8B": "Better reasoning, longer context",
	"Qwen2.5-32B": "Excellent reasoning, great for complex content",
	"Llama 3.1 70B": "Superior understanding, best for nuanced questions",
	"Claude 3.5 Sonnet": "Best overall, excellent at context understanding"
	}