metaviiii commited on
Commit Β·
41bc296
1
Parent(s): 8befed1
Update: May 2026 model comparison - comprehensive rewrite
Browse files
README.md
CHANGED
|
@@ -1,221 +1,176 @@
|
|
| 1 |
---
|
| 2 |
license: mit
|
| 3 |
tags:
|
| 4 |
-
-
|
| 5 |
-
- crazyrouter
|
| 6 |
- model-comparison
|
| 7 |
- benchmark
|
| 8 |
-
-
|
| 9 |
-
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
language:
|
| 11 |
- en
|
| 12 |
- zh
|
| 13 |
---
|
| 14 |
|
| 15 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
-
|
| 20 |
|
| 21 |
-
|
| 22 |
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
-
|
| 26 |
-
from openai import OpenAI
|
| 27 |
-
import time
|
| 28 |
|
| 29 |
-
|
| 30 |
-
base_url="https://crazyrouter.com/v1",
|
| 31 |
-
api_key="sk-your-crazyrouter-key"
|
| 32 |
-
)
|
| 33 |
|
| 34 |
-
|
| 35 |
-
"gpt-4o",
|
| 36 |
-
"gpt-4o-mini",
|
| 37 |
-
"claude-sonnet-4-20250514",
|
| 38 |
-
"claude-haiku-3.5",
|
| 39 |
-
"gemini-2.0-flash",
|
| 40 |
-
"deepseek-chat",
|
| 41 |
-
"deepseek-reasoner",
|
| 42 |
-
]
|
| 43 |
-
|
| 44 |
-
PROMPT = "Explain the difference between TCP and UDP in exactly 3 sentences."
|
| 45 |
-
|
| 46 |
-
print(f"Prompt: {PROMPT}\n")
|
| 47 |
-
print("=" * 60)
|
| 48 |
-
|
| 49 |
-
for model in MODELS:
|
| 50 |
-
try:
|
| 51 |
-
start = time.time()
|
| 52 |
-
response = client.chat.completions.create(
|
| 53 |
-
model=model,
|
| 54 |
-
messages=[{"role": "user", "content": PROMPT}],
|
| 55 |
-
max_tokens=200
|
| 56 |
-
)
|
| 57 |
-
elapsed = time.time() - start
|
| 58 |
-
content = response.choices[0].message.content
|
| 59 |
-
tokens = response.usage.total_tokens
|
| 60 |
-
|
| 61 |
-
print(f"\nπ€ {model}")
|
| 62 |
-
print(f"β±οΈ {elapsed:.2f}s | π {tokens} tokens")
|
| 63 |
-
print(f"π¬ {content}")
|
| 64 |
-
print("-" * 60)
|
| 65 |
-
except Exception as e:
|
| 66 |
-
print(f"\nβ {model}: {e}")
|
| 67 |
-
print("-" * 60)
|
| 68 |
-
```
|
| 69 |
-
|
| 70 |
-
---
|
| 71 |
-
|
| 72 |
-
## Benchmark: Speed Test
|
| 73 |
|
| 74 |
```python
|
| 75 |
-
import time
|
| 76 |
from openai import OpenAI
|
| 77 |
|
|
|
|
| 78 |
client = OpenAI(
|
| 79 |
-
|
| 80 |
-
|
| 81 |
)
|
| 82 |
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
for _ in range(runs):
|
| 86 |
-
start = time.time()
|
| 87 |
-
client.chat.completions.create(
|
| 88 |
-
model=model,
|
| 89 |
-
messages=[{"role": "user", "content": prompt}],
|
| 90 |
-
max_tokens=100
|
| 91 |
-
)
|
| 92 |
-
times.append(time.time() - start)
|
| 93 |
-
avg = sum(times) / len(times)
|
| 94 |
-
return avg
|
| 95 |
-
|
| 96 |
-
models = ["gpt-4o-mini", "claude-haiku-3.5", "gemini-2.0-flash", "deepseek-chat"]
|
| 97 |
-
prompt = "What is 2+2? Reply with just the number."
|
| 98 |
-
|
| 99 |
-
print("Speed Benchmark (avg of 3 runs)")
|
| 100 |
-
print("=" * 40)
|
| 101 |
-
for m in models:
|
| 102 |
-
avg = benchmark(m, prompt)
|
| 103 |
-
print(f"{m:30s} {avg:.2f}s")
|
| 104 |
-
```
|
| 105 |
-
|
| 106 |
-
---
|
| 107 |
-
|
| 108 |
-
## Coding Comparison
|
| 109 |
-
|
| 110 |
-
```python
|
| 111 |
-
CODING_PROMPT = """Write a Python function that:
|
| 112 |
-
1. Takes a list of integers
|
| 113 |
-
2. Returns the longest increasing subsequence
|
| 114 |
-
3. Include type hints and a docstring
|
| 115 |
-
"""
|
| 116 |
-
|
| 117 |
-
CODING_MODELS = [
|
| 118 |
-
"gpt-4o",
|
| 119 |
"claude-sonnet-4-20250514",
|
|
|
|
|
|
|
| 120 |
"deepseek-chat",
|
| 121 |
-
"gemini-2.0-flash",
|
| 122 |
]
|
| 123 |
|
| 124 |
-
for model in
|
| 125 |
response = client.chat.completions.create(
|
| 126 |
model=model,
|
| 127 |
-
messages=[{"role": "user", "content":
|
| 128 |
-
max_tokens=500
|
| 129 |
)
|
| 130 |
-
print(f"
|
| 131 |
-
print(f"π€ {model}")
|
| 132 |
-
print(f"{'='*60}")
|
| 133 |
-
print(response.choices[0].message.content)
|
| 134 |
```
|
| 135 |
|
| 136 |
-
|
| 137 |
|
| 138 |
-
##
|
| 139 |
|
| 140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
|
| 142 |
-
|
| 143 |
-
REASONING_PROMPT = """A farmer has 17 sheep. All but 9 die. How many sheep are left?
|
| 144 |
-
Think step by step."""
|
| 145 |
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
]
|
| 152 |
|
| 153 |
-
for
|
| 154 |
-
response = client.chat.completions.create(
|
| 155 |
-
model=model,
|
| 156 |
-
messages=[{"role": "user", "content": REASONING_PROMPT}],
|
| 157 |
-
max_tokens=300
|
| 158 |
-
)
|
| 159 |
-
print(f"\nπ€ {model}: {response.choices[0].message.content[:200]}")
|
| 160 |
-
```
|
| 161 |
|
| 162 |
-
|
| 163 |
|
| 164 |
-
|
|
|
|
|
|
|
| 165 |
|
| 166 |
-
|
| 167 |
-
# Approximate pricing per 1M tokens (input/output)
|
| 168 |
-
PRICING = {
|
| 169 |
-
"gpt-4o": {"input": 2.50, "output": 10.00},
|
| 170 |
-
"gpt-4o-mini": {"input": 0.15, "output": 0.60},
|
| 171 |
-
"claude-sonnet-4-20250514": {"input": 3.00, "output": 15.00},
|
| 172 |
-
"claude-haiku-3.5": {"input": 0.80, "output": 4.00},
|
| 173 |
-
"gemini-2.0-flash": {"input": 0.10, "output": 0.40},
|
| 174 |
-
"deepseek-chat": {"input": 0.14, "output": 0.28},
|
| 175 |
-
}
|
| 176 |
-
|
| 177 |
-
def estimate_cost(model, input_tokens, output_tokens):
|
| 178 |
-
p = PRICING.get(model, {"input": 0, "output": 0})
|
| 179 |
-
return (input_tokens * p["input"] + output_tokens * p["output"]) / 1_000_000
|
| 180 |
-
|
| 181 |
-
# Example: 1000 requests, avg 500 input + 200 output tokens each
|
| 182 |
-
requests = 1000
|
| 183 |
-
input_tok = 500
|
| 184 |
-
output_tok = 200
|
| 185 |
-
|
| 186 |
-
print(f"Cost estimate for {requests} requests ({input_tok} in / {output_tok} out tokens each):\n")
|
| 187 |
-
for model, price in PRICING.items():
|
| 188 |
-
cost = requests * estimate_cost(model, input_tok, output_tok)
|
| 189 |
-
print(f" {model:30s} ${cost:.4f}")
|
| 190 |
-
```
|
| 191 |
-
|
| 192 |
-
---
|
| 193 |
-
|
| 194 |
-
## When to Use Which Model
|
| 195 |
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
| Complex analysis | `gpt-4o` or `claude-sonnet-4-20250514` | Best reasoning |
|
| 200 |
-
| Coding | `deepseek-chat` or `claude-sonnet-4-20250514` | Strong code generation |
|
| 201 |
-
| Long documents | `gemini-2.0-flash` | 1M token context |
|
| 202 |
-
| Math/Logic | `deepseek-reasoner` or `o3-mini` | Chain-of-thought |
|
| 203 |
-
| Budget tasks | `deepseek-chat` | $0.14/1M input |
|
| 204 |
-
| Speed critical | `gemini-2.0-flash` | Fastest response |
|
| 205 |
|
| 206 |
---
|
| 207 |
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
π [Crazyrouter Demo on Hugging Face](https://huggingface.co/spaces/xujfcn/Crazyrouter-Demo) β switch models in real-time
|
| 211 |
-
|
| 212 |
-
---
|
| 213 |
-
|
| 214 |
-
## Links
|
| 215 |
-
|
| 216 |
-
- π [Crazyrouter](https://crazyrouter.com/?utm_source=huggingface&utm_medium=tutorial&utm_campaign=dev_community)
|
| 217 |
-
- π [Getting Started](https://huggingface.co/xujfcn/Crazyrouter-Getting-Started)
|
| 218 |
-
- π [LangChain Guide](https://huggingface.co/xujfcn/Crazyrouter-LangChain-Guide)
|
| 219 |
-
- π° [Pricing](https://huggingface.co/spaces/xujfcn/Crazyrouter-Pricing)
|
| 220 |
-
- π¬ [Telegram](https://t.me/crazyrouter)
|
| 221 |
-
- π¦ [Twitter @metaviiii](https://twitter.com/metaviiii)
|
|
|
|
| 1 |
---
|
| 2 |
license: mit
|
| 3 |
tags:
|
| 4 |
+
- llm
|
|
|
|
| 5 |
- model-comparison
|
| 6 |
- benchmark
|
| 7 |
+
- claude
|
| 8 |
+
- gpt
|
| 9 |
+
- gemini
|
| 10 |
+
- deepseek
|
| 11 |
+
- ai-models
|
| 12 |
+
- "2026"
|
| 13 |
language:
|
| 14 |
- en
|
| 15 |
- zh
|
| 16 |
---
|
| 17 |
|
| 18 |
+
# π Top AI Models Comparison β May 2026
|
| 19 |
+
|
| 20 |
+
A practical, up-to-date comparison of the best large language models available via API as of **May 4, 2026**. Focused on real-world performance, pricing, and use-case fit β not just benchmark scores.
|
| 21 |
+
|
| 22 |
+
> **Last updated: 2026-05-04** | Contributions welcome via PR
|
| 23 |
+
|
| 24 |
+
## π Model Overview
|
| 25 |
+
|
| 26 |
+
| Model | Provider | Context Window | Input Price (per 1M tokens) | Output Price (per 1M tokens) | Strengths |
|
| 27 |
+
|-------|----------|---------------|----------------------------|-----------------------------|-----------|
|
| 28 |
+
| **Claude 4 Sonnet** | Anthropic | 200K | $3.00 | $15.00 | Best overall coding + reasoning, extended thinking |
|
| 29 |
+
| **Claude 3.7 Sonnet** | Anthropic | 200K | $3.00 | $15.00 | Excellent balance of speed and quality |
|
| 30 |
+
| **Claude 3.5 Haiku** | Anthropic | 200K | $0.80 | $4.00 | Fast and cheap, great for high-volume tasks |
|
| 31 |
+
| **GPT-4.1** | OpenAI | 1M | $2.00 | $8.00 | Large context, strong instruction following |
|
| 32 |
+
| **GPT-4.1 mini** | OpenAI | 1M | $0.40 | $1.60 | Budget-friendly, good for simple tasks |
|
| 33 |
+
| **GPT-4o** | OpenAI | 128K | $2.50 | $10.00 | Multimodal (text + image + audio) |
|
| 34 |
+
| **Gemini 2.5 Pro** | Google | 1M | $1.25 / $2.50 | $10.00 | Huge context, strong reasoning + thinking |
|
| 35 |
+
| **Gemini 2.5 Flash** | Google | 1M | $0.15 | $0.60 / $3.50 | Extremely fast and cheap |
|
| 36 |
+
| **DeepSeek V3** | DeepSeek | 128K | $0.27 | $1.10 | Best value for money, strong coding |
|
| 37 |
+
| **DeepSeek R1** | DeepSeek | 128K | $0.55 | $2.19 | Deep reasoning with chain-of-thought |
|
| 38 |
+
| **Llama 4 Maverick** | Meta | 1M | Varies | Varies | Open-weight, self-hostable |
|
| 39 |
+
| **Qwen3 235B** | Alibaba | 128K | Varies | Varies | Top open-source, hybrid thinking |
|
| 40 |
+
|
| 41 |
+
> π‘ Prices are official API rates. Third-party providers often offer 20-50% discounts.
|
| 42 |
+
|
| 43 |
+
## π― Best Model by Use Case
|
| 44 |
+
|
| 45 |
+
### Coding & Development
|
| 46 |
+
| Task | Recommended | Why |
|
| 47 |
+
|------|------------|-----|
|
| 48 |
+
| Complex refactoring | Claude 4 Sonnet | Best code understanding and generation |
|
| 49 |
+
| Quick code completion | Claude 3.5 Haiku | Fast, accurate, low cost |
|
| 50 |
+
| Debugging | Claude 4 Sonnet / GPT-4.1 | Strong reasoning about code logic |
|
| 51 |
+
| Code review | Claude 3.7 Sonnet | Good balance of depth and speed |
|
| 52 |
+
|
| 53 |
+
### Writing & Content
|
| 54 |
+
| Task | Recommended | Why |
|
| 55 |
+
|------|------------|-----|
|
| 56 |
+
| Long-form articles | Claude 4 Sonnet | Natural writing style, follows instructions well |
|
| 57 |
+
| Translation | Gemini 2.5 Pro | Strong multilingual capabilities |
|
| 58 |
+
| Summarization | Gemini 2.5 Flash | Fast, cheap, handles long docs |
|
| 59 |
+
| Creative writing | Claude 4 Sonnet | Most natural and nuanced output |
|
| 60 |
+
|
| 61 |
+
### Data & Analysis
|
| 62 |
+
| Task | Recommended | Why |
|
| 63 |
+
|------|------------|-----|
|
| 64 |
+
| Data extraction | GPT-4.1 | Reliable structured output, large context |
|
| 65 |
+
| Math / Logic | DeepSeek R1 | Deep chain-of-thought reasoning |
|
| 66 |
+
| Research analysis | Gemini 2.5 Pro | 1M context for large document sets |
|
| 67 |
+
| Classification | Gemini 2.5 Flash / GPT-4.1 mini | Cheap and fast for high volume |
|
| 68 |
+
|
| 69 |
+
### Multimodal
|
| 70 |
+
| Task | Recommended | Why |
|
| 71 |
+
|------|------------|-----|
|
| 72 |
+
| Image understanding | GPT-4o / Gemini 2.5 Pro | Native vision capabilities |
|
| 73 |
+
| Document OCR | Gemini 2.5 Pro | Handles PDFs and scanned docs well |
|
| 74 |
+
| Audio transcription | GPT-4o | Native audio input support |
|
| 75 |
+
|
| 76 |
+
## β‘ Speed vs Quality Tiers
|
| 77 |
|
| 78 |
+
```
|
| 79 |
+
Tier 1 β Maximum Quality (slower, higher cost)
|
| 80 |
+
βββ Claude 4 Sonnet (extended thinking)
|
| 81 |
+
βββ Gemini 2.5 Pro (thinking mode)
|
| 82 |
+
βββ DeepSeek R1
|
| 83 |
+
|
| 84 |
+
Tier 2 β Balanced (good quality, reasonable speed)
|
| 85 |
+
βββ Claude 3.7 Sonnet
|
| 86 |
+
βββ GPT-4.1
|
| 87 |
+
βββ GPT-4o
|
| 88 |
+
|
| 89 |
+
Tier 3 β Fast & Cheap (high throughput)
|
| 90 |
+
βββ Claude 3.5 Haiku
|
| 91 |
+
βββ Gemini 2.5 Flash
|
| 92 |
+
βββ GPT-4.1 mini
|
| 93 |
+
βββ DeepSeek V3
|
| 94 |
+
```
|
| 95 |
|
| 96 |
+
## π° Cost Efficiency Ranking
|
| 97 |
|
| 98 |
+
For typical workloads (mixed input/output), approximate cost per 1M total tokens:
|
| 99 |
|
| 100 |
+
| Rank | Model | ~Cost per 1M tokens | Quality |
|
| 101 |
+
|------|-------|---------------------|---------|
|
| 102 |
+
| 1 | Gemini 2.5 Flash | ~$0.40 | Good |
|
| 103 |
+
| 2 | GPT-4.1 mini | ~$1.00 | Good |
|
| 104 |
+
| 3 | DeepSeek V3 | ~$0.70 | Very Good |
|
| 105 |
+
| 4 | Claude 3.5 Haiku | ~$2.40 | Very Good |
|
| 106 |
+
| 5 | DeepSeek R1 | ~$1.40 | Excellent (reasoning) |
|
| 107 |
+
| 6 | GPT-4.1 | ~$5.00 | Excellent |
|
| 108 |
+
| 7 | Gemini 2.5 Pro | ~$6.00 | Excellent |
|
| 109 |
+
| 8 | Claude 4 Sonnet | ~$9.00 | Top tier |
|
| 110 |
|
| 111 |
+
## π§ Quick Start: Access All Models with One API
|
|
|
|
|
|
|
| 112 |
|
| 113 |
+
Instead of managing separate API keys for each provider, you can use an API gateway to access all models through a single OpenAI-compatible endpoint.
|
|
|
|
|
|
|
|
|
|
| 114 |
|
| 115 |
+
**Example with Python (OpenAI SDK):**
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
|
| 117 |
```python
|
|
|
|
| 118 |
from openai import OpenAI
|
| 119 |
|
| 120 |
+
# Works with any OpenAI-compatible gateway
|
| 121 |
client = OpenAI(
|
| 122 |
+
api_key="your-api-key",
|
| 123 |
+
base_url="https://your-gateway.com/v1"
|
| 124 |
)
|
| 125 |
|
| 126 |
+
# Switch models by just changing the model name
|
| 127 |
+
models = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
"claude-sonnet-4-20250514",
|
| 129 |
+
"gpt-4.1",
|
| 130 |
+
"gemini-2.5-pro-preview-05-06",
|
| 131 |
"deepseek-chat",
|
|
|
|
| 132 |
]
|
| 133 |
|
| 134 |
+
for model in models:
|
| 135 |
response = client.chat.completions.create(
|
| 136 |
model=model,
|
| 137 |
+
messages=[{"role": "user", "content": "Explain quicksort in 3 sentences"}],
|
|
|
|
| 138 |
)
|
| 139 |
+
print(f"{model}: {response.choices[0].message.content[:100]}...")
|
|
|
|
|
|
|
|
|
|
| 140 |
```
|
| 141 |
|
| 142 |
+
**Popular API gateways:** [Crazyrouter](https://crazyrouter.com), [OpenRouter](https://openrouter.ai), [AIHubMix](https://aihubmix.com)
|
| 143 |
|
| 144 |
+
## π Key Trends β May 2026
|
| 145 |
|
| 146 |
+
1. **Extended thinking is mainstream** β Claude 4 Sonnet, Gemini 2.5 Pro, and DeepSeek R1 all support chain-of-thought reasoning modes
|
| 147 |
+
2. **1M+ context is the new normal** β GPT-4.1, Gemini 2.5, and Llama 4 all support 1M tokens
|
| 148 |
+
3. **Open-source closing the gap** β Qwen3, Llama 4, and DeepSeek V3 rival proprietary models
|
| 149 |
+
4. **Prices keep dropping** β Flash/mini tiers make AI accessible for high-volume production use
|
| 150 |
+
5. **Multimodal expanding** β Vision, audio, and video understanding becoming standard features
|
| 151 |
|
| 152 |
+
## π Methodology
|
|
|
|
|
|
|
| 153 |
|
| 154 |
+
This comparison is based on:
|
| 155 |
+
- Official API documentation and pricing pages
|
| 156 |
+
- Public benchmarks (LMSYS Chatbot Arena, LiveBench, SWE-bench)
|
| 157 |
+
- Community feedback and real-world usage reports
|
| 158 |
+
- Our own testing across coding, writing, and analysis tasks
|
|
|
|
| 159 |
|
| 160 |
+
We update this guide monthly. Prices and capabilities change frequently β always check the provider's official docs for the latest info.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
|
| 162 |
+
## π€ Contributing
|
| 163 |
|
| 164 |
+
Found outdated info or want to add a model? PRs are welcome! Please include:
|
| 165 |
+
- Source link for any pricing or capability claims
|
| 166 |
+
- Date of verification
|
| 167 |
|
| 168 |
+
## π Related Resources
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
|
| 170 |
+
- [LMSYS Chatbot Arena](https://chat.lmsys.org/) β Live model rankings by human preference
|
| 171 |
+
- [LiveBench](https://livebench.ai/) β Contamination-free LLM benchmark
|
| 172 |
+
- [Artificial Analysis](https://artificialanalysis.ai/) β Speed and pricing tracker
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
|
| 174 |
---
|
| 175 |
|
| 176 |
+
β Star this repo if you find it useful β it helps others discover it!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|