[ { "model_name": "gpt-4o", "provider": "openai", "input_cost_per_1k": 0.0025, "output_cost_per_1k": 0.010, "context_window": 128000, "reasoning_score": 0.92, "coding_score": 0.93, "math_score": 0.90, "instruction_following_score": 0.95, "latency_score": 0.70, "max_complexity": 1.0, "notes": "Flagship multimodal model. Best quality/cost for hard tasks." }, { "model_name": "gpt-4o-mini", "provider": "openai", "input_cost_per_1k": 0.000150, "output_cost_per_1k": 0.000600, "context_window": 128000, "reasoning_score": 0.78, "coding_score": 0.78, "math_score": 0.72, "instruction_following_score": 0.85, "latency_score": 0.90, "max_complexity": 0.75, "notes": "Cost-efficient small model for lightweight tasks." }, { "model_name": "gpt-3.5-turbo", "provider": "openai", "input_cost_per_1k": 0.0005, "output_cost_per_1k": 0.0015, "context_window": 16385, "reasoning_score": 0.62, "coding_score": 0.65, "math_score": 0.55, "instruction_following_score": 0.75, "latency_score": 0.92, "max_complexity": 0.60, "notes": "Legacy fast model. Good for simple chat tasks." }, { "model_name": "claude-3-5-haiku-20241022", "provider": "anthropic", "input_cost_per_1k": 0.00080, "output_cost_per_1k": 0.00400, "context_window": 200000, "reasoning_score": 0.78, "coding_score": 0.80, "math_score": 0.75, "instruction_following_score": 0.85, "latency_score": 0.92, "max_complexity": 0.75, "notes": "Fast, affordable Anthropic model for everyday tasks." }, { "model_name": "claude-3-5-sonnet-20241022", "provider": "anthropic", "input_cost_per_1k": 0.003, "output_cost_per_1k": 0.015, "context_window": 200000, "reasoning_score": 0.93, "coding_score": 0.95, "math_score": 0.88, "instruction_following_score": 0.96, "latency_score": 0.75, "max_complexity": 1.0, "notes": "Top-tier coding and reasoning model from Anthropic." }, { "model_name": "claude-3-haiku-20240307", "provider": "anthropic", "input_cost_per_1k": 0.00025, "output_cost_per_1k": 0.00125, "context_window": 200000, "reasoning_score": 0.65, "coding_score": 0.65, "math_score": 0.60, "instruction_following_score": 0.75, "latency_score": 0.95, "max_complexity": 0.60, "notes": "Cheapest Anthropic model. Good for classification, summarization." }, { "model_name": "gemini-1.5-flash", "provider": "google", "input_cost_per_1k": 0.000075, "output_cost_per_1k": 0.000300, "context_window": 1000000, "reasoning_score": 0.74, "coding_score": 0.74, "math_score": 0.70, "instruction_following_score": 0.78, "latency_score": 0.88, "max_complexity": 0.72, "notes": "Extremely cheap and fast. Long context support." }, { "model_name": "gemini-1.5-pro", "provider": "google", "input_cost_per_1k": 0.00125, "output_cost_per_1k": 0.005, "context_window": 2000000, "reasoning_score": 0.88, "coding_score": 0.87, "math_score": 0.85, "instruction_following_score": 0.90, "latency_score": 0.72, "max_complexity": 0.95, "notes": "Massive context window. Great for long-doc analysis." }, { "model_name": "mistral-small-latest", "provider": "mistral", "input_cost_per_1k": 0.001, "output_cost_per_1k": 0.003, "context_window": 32000, "reasoning_score": 0.68, "coding_score": 0.70, "math_score": 0.62, "instruction_following_score": 0.75, "latency_score": 0.88, "max_complexity": 0.65, "notes": "Cost-effective European model." }, { "model_name": "mistral-large-latest", "provider": "mistral", "input_cost_per_1k": 0.003, "output_cost_per_1k": 0.009, "context_window": 128000, "reasoning_score": 0.85, "coding_score": 0.84, "math_score": 0.80, "instruction_following_score": 0.88, "latency_score": 0.75, "max_complexity": 0.90, "notes": "Strong European flagship model." }, { "model_name": "llama3.2:3b", "provider": "ollama", "input_cost_per_1k": 0.0, "output_cost_per_1k": 0.0, "context_window": 128000, "reasoning_score": 0.50, "coding_score": 0.48, "math_score": 0.42, "instruction_following_score": 0.60, "latency_score": 0.95, "max_complexity": 0.45, "notes": "Free local model. Use for simple/private tasks." }, { "model_name": "llama3.1:8b", "provider": "ollama", "input_cost_per_1k": 0.0, "output_cost_per_1k": 0.0, "context_window": 128000, "reasoning_score": 0.65, "coding_score": 0.64, "math_score": 0.58, "instruction_following_score": 0.72, "latency_score": 0.85, "max_complexity": 0.62, "notes": "Free local model with decent reasoning." }, { "model_name": "llama3.1:70b", "provider": "ollama", "input_cost_per_1k": 0.0, "output_cost_per_1k": 0.0, "context_window": 128000, "reasoning_score": 0.82, "coding_score": 0.82, "math_score": 0.78, "instruction_following_score": 0.85, "latency_score": 0.55, "max_complexity": 0.85, "notes": "Free local large model. Needs beefy hardware." }, { "model_name": "deepseek-chat", "provider": "deepseek", "input_cost_per_1k": 0.00014, "output_cost_per_1k": 0.00028, "context_window": 64000, "reasoning_score": 0.88, "coding_score": 0.90, "math_score": 0.92, "instruction_following_score": 0.85, "latency_score": 0.72, "max_complexity": 0.92, "notes": "Exceptional value model especially strong in math and code." } ]