PinkSky / server /model_ranking.py
FreshPixels's picture
Rename model_ranking.py to server/model_ranking.py
5174f6f verified
Raw
History Blame Contribute Delete
8.6 kB
"""Все модели NVIDIA с ранжированием"""
from .config import HF_FALLBACK_MODEL
MODEL_RANKING = {
# TIER 1: ELITE CODING
"deepseek-v4-pro": {
"endpoint": "deepseek-ai/deepseek-v4-pro",
"context_window": 64000, "max_tokens": 8000,
"coding_rank": 1, "speed_rank": 15, "reasoning_rank": 1,
"cost_per_1k_input": 0.001, "cost_per_1k_output": 0.005,
"tags": ["elite", "coding", "reasoning", "math", "cheap"]
},
"kimi-k2.6": {
"endpoint": "moonshotai/kimi-k2.6",
"context_window": 32000, "max_tokens": 8000,
"coding_rank": 2, "speed_rank": 12, "reasoning_rank": 2,
"cost_per_1k_input": 0.003, "cost_per_1k_output": 0.015,
"tags": ["elite", "coding", "reasoning", "long_context"]
},
"qwen3.5-397b": {
"endpoint": "qwen/qwen3.5-397b-a17b",
"context_window": 128000, "max_tokens": 8000,
"coding_rank": 3, "speed_rank": 18, "reasoning_rank": 3,
"cost_per_1k_input": 0.002, "cost_per_1k_output": 0.008,
"tags": ["elite", "coding", "long_context", "chinese"]
},
"mistral-large-3": {
"endpoint": "mistralai/mistral-large-3-675b-instruct-2512",
"context_window": 128000, "max_tokens": 8000,
"coding_rank": 4, "speed_rank": 14, "reasoning_rank": 4,
"cost_per_1k_input": 0.002, "cost_per_1k_output": 0.010,
"tags": ["elite", "coding", "multilingual", "long_context"]
},
"gpt-oss-120b": {
"endpoint": "openai/gpt-oss-120b",
"context_window": 128000, "max_tokens": 8000,
"coding_rank": 5, "speed_rank": 20, "reasoning_rank": 5,
"cost_per_1k_input": 0.003, "cost_per_1k_output": 0.012,
"tags": ["elite", "coding", "reasoning", "openai"]
},
# TIER 2: STRONG CODING
"deepseek-v4-flash": {
"endpoint": "deepseek-ai/deepseek-v4-flash",
"context_window": 32000, "max_tokens": 8000,
"coding_rank": 6, "speed_rank": 8, "reasoning_rank": 8,
"cost_per_1k_input": 0.0005, "cost_per_1k_output": 0.002,
"tags": ["strong", "coding", "fast", "cheap"]
},
"llama-4-maverick": {
"endpoint": "meta/llama-4-maverick-17b-128e-instruct",
"context_window": 128000, "max_tokens": 8000,
"coding_rank": 7, "speed_rank": 10, "reasoning_rank": 7,
"cost_per_1k_input": 0.001, "cost_per_1k_output": 0.004,
"tags": ["strong", "coding", "meta", "long_context"]
},
"nemotron-3-super": {
"endpoint": "nvidia/nemotron-3-super-120b-a12b",
"context_window": 128000, "max_tokens": 8000,
"coding_rank": 8, "speed_rank": 16, "reasoning_rank": 6,
"cost_per_1k_input": 0.002, "cost_per_1k_output": 0.008,
"tags": ["strong", "coding", "nvidia", "reasoning"]
},
"mistral-medium-3.5": {
"endpoint": "mistralai/mistral-medium-3.5-128b",
"context_window": 64000, "max_tokens": 8000,
"coding_rank": 9, "speed_rank": 11, "reasoning_rank": 10,
"cost_per_1k_input": 0.001, "cost_per_1k_output": 0.005,
"tags": ["strong", "coding", "mistral", "balanced"]
},
"dracarys-llama-70b": {
"endpoint": "abacusai/dracarys-llama-3.1-70b-instruct",
"context_window": 32000, "max_tokens": 8000,
"coding_rank": 10, "speed_rank": 13, "reasoning_rank": 11,
"cost_per_1k_input": 0.001, "cost_per_1k_output": 0.004,
"tags": ["strong", "coding", "roleplay", "creative"]
},
"llama-3.3-70b": {
"endpoint": "meta/llama-3.3-70b-instruct",
"context_window": 32000, "max_tokens": 8000,
"coding_rank": 11, "speed_rank": 9, "reasoning_rank": 12,
"cost_per_1k_input": 0.0005, "cost_per_1k_output": 0.002,
"tags": ["strong", "coding", "meta", "fast", "cheap"]
},
"nemotron-super-49b": {
"endpoint": "nvidia/llama-3.3-nemotron-super-49b-v1.5",
"context_window": 32000, "max_tokens": 8000,
"coding_rank": 12, "speed_rank": 7, "reasoning_rank": 13,
"cost_per_1k_input": 0.0005, "cost_per_1k_output": 0.002,
"tags": ["strong", "coding", "nvidia", "fast", "cheap"]
},
# TIER 3: GOOD CODING
"step-3.7-flash": {
"endpoint": "stepfun-ai/step-3.7-flash",
"context_window": 32000, "max_tokens": 8000,
"coding_rank": 13, "speed_rank": 6, "reasoning_rank": 14,
"cost_per_1k_input": 0.0005, "cost_per_1k_output": 0.002,
"tags": ["good", "coding", "fast", "chinese", "cheap"]
},
"mistral-small-4": {
"endpoint": "mistralai/mistral-small-4-119b-2603",
"context_window": 32000, "max_tokens": 8000,
"coding_rank": 14, "speed_rank": 5, "reasoning_rank": 15,
"cost_per_1k_input": 0.0005, "cost_per_1k_output": 0.002,
"tags": ["good", "coding", "fast", "mistral", "cheap"]
},
"minimax-m2.7": {
"endpoint": "minimaxai/minimax-m2.7",
"context_window": 32000, "max_tokens": 8000,
"coding_rank": 15, "speed_rank": 4, "reasoning_rank": 16,
"cost_per_1k_input": 0.0005, "cost_per_1k_output": 0.002,
"tags": ["good", "coding", "fast", "chinese", "cheap"]
},
"nemotron-super-49b-v1": {
"endpoint": "nvidia/llama-3.3-nemotron-super-49b-v1",
"context_window": 32000, "max_tokens": 8000,
"coding_rank": 16, "speed_rank": 17, "reasoning_rank": 17,
"cost_per_1k_input": 0.0005, "cost_per_1k_output": 0.002,
"tags": ["good", "coding", "nvidia", "cheap"]
},
"llama-3.2-90b-vision": {
"endpoint": "meta/llama-3.2-90b-vision-instruct",
"context_window": 32000, "max_tokens": 8000,
"coding_rank": 17, "speed_rank": 19, "reasoning_rank": 18,
"cost_per_1k_input": 0.001, "cost_per_1k_output": 0.004,
"tags": ["good", "coding", "vision", "multimodal", "meta"]
},
# TIER 4: FAST / LIGHT
"nemotron-nano-12b": {
"endpoint": "nvidia/nemotron-nano-12b-v2-vl",
"context_window": 16000, "max_tokens": 4000,
"coding_rank": 18, "speed_rank": 2, "reasoning_rank": 22,
"cost_per_1k_input": 0.0001, "cost_per_1k_output": 0.0005,
"tags": ["light", "fast", "vision", "nvidia", "cheap"]
},
"nemotron-3-nano-30b": {
"endpoint": "nvidia/nemotron-3-nano-30b-a3b",
"context_window": 32000, "max_tokens": 8000,
"coding_rank": 19, "speed_rank": 3, "reasoning_rank": 19,
"cost_per_1k_input": 0.0002, "cost_per_1k_output": 0.001,
"tags": ["light", "fast", "nvidia", "cheap"]
},
"nemotron-nano-9b": {
"endpoint": "nvidia/nvidia-nemotron-nano-9b-v2",
"context_window": 16000, "max_tokens": 4000,
"coding_rank": 20, "speed_rank": 1, "reasoning_rank": 23,
"cost_per_1k_input": 0.0001, "cost_per_1k_output": 0.0005,
"tags": ["light", "fastest", "nvidia", "cheap"]
},
"nemotron-content-safety": {
"endpoint": "nvidia/nemotron-content-safety-reasoning-4b",
"context_window": 8000, "max_tokens": 2000,
"coding_rank": 21, "speed_rank": 1, "reasoning_rank": 24,
"cost_per_1k_input": 0.0001, "cost_per_1k_output": 0.0005,
"tags": ["light", "fastest", "safety", "nvidia", "cheap"]
},
# TIER 5: SPECIALIZED
"nemotron-3-nano-omni": {
"endpoint": "nvidia/nemotron-3-nano-omni-30b-a3b-reasoning",
"context_window": 32000, "max_tokens": 8000,
"coding_rank": 22, "speed_rank": 5, "reasoning_rank": 20,
"cost_per_1k_input": 0.0002, "cost_per_1k_output": 0.001,
"tags": ["specialized", "omni", "multimodal", "reasoning", "nvidia", "cheap"]
},
"diffusiongemma": {
"endpoint": "google/diffusiongemma-26b-a4b-it",
"context_window": 16000, "max_tokens": 4000,
"coding_rank": 23, "speed_rank": 10, "reasoning_rank": 25,
"cost_per_1k_input": 0.001, "cost_per_1k_output": 0.004,
"tags": ["specialized", "image", "diffusion", "google"]
},
# LEGACY
"glm": {
"endpoint": "z-ai/glm-5.1",
"context_window": 32000, "max_tokens": 8000,
"coding_rank": 9, "speed_rank": 8, "reasoning_rank": 9,
"cost_per_1k_input": 0.002, "cost_per_1k_output": 0.008,
"tags": ["legacy", "coding", "fast", "chinese"]
},
"hf_fallback": {
"endpoint": HF_FALLBACK_MODEL,
"context_window": 32000, "max_tokens": 8000,
"coding_rank": 50, "speed_rank": 50, "reasoning_rank": 50,
"cost_per_1k_input": 0, "cost_per_1k_output": 0,
"tags": ["fallback", "free", "hf"]
},
}