Spaces:

FreshPixels
/

PinkSky

Runtime error

App Files Files Community

PinkSky / server /model_ranking.py

FreshPixels

Rename model_ranking.py to server/model_ranking.py

5174f6f verified 13 days ago

Raw

History Blame Contribute Delete

8.6 kB

	"""Все модели NVIDIA с ранжированием"""

	from .config import HF_FALLBACK_MODEL

	MODEL_RANKING = {
	# TIER 1: ELITE CODING
	"deepseek-v4-pro": {
	"endpoint": "deepseek-ai/deepseek-v4-pro",
	"context_window": 64000, "max_tokens": 8000,
	"coding_rank": 1, "speed_rank": 15, "reasoning_rank": 1,
	"cost_per_1k_input": 0.001, "cost_per_1k_output": 0.005,
	"tags": ["elite", "coding", "reasoning", "math", "cheap"]
	},
	"kimi-k2.6": {
	"endpoint": "moonshotai/kimi-k2.6",
	"context_window": 32000, "max_tokens": 8000,
	"coding_rank": 2, "speed_rank": 12, "reasoning_rank": 2,
	"cost_per_1k_input": 0.003, "cost_per_1k_output": 0.015,
	"tags": ["elite", "coding", "reasoning", "long_context"]
	},
	"qwen3.5-397b": {
	"endpoint": "qwen/qwen3.5-397b-a17b",
	"context_window": 128000, "max_tokens": 8000,
	"coding_rank": 3, "speed_rank": 18, "reasoning_rank": 3,
	"cost_per_1k_input": 0.002, "cost_per_1k_output": 0.008,
	"tags": ["elite", "coding", "long_context", "chinese"]
	},
	"mistral-large-3": {
	"endpoint": "mistralai/mistral-large-3-675b-instruct-2512",
	"context_window": 128000, "max_tokens": 8000,
	"coding_rank": 4, "speed_rank": 14, "reasoning_rank": 4,
	"cost_per_1k_input": 0.002, "cost_per_1k_output": 0.010,
	"tags": ["elite", "coding", "multilingual", "long_context"]
	},
	"gpt-oss-120b": {
	"endpoint": "openai/gpt-oss-120b",
	"context_window": 128000, "max_tokens": 8000,
	"coding_rank": 5, "speed_rank": 20, "reasoning_rank": 5,
	"cost_per_1k_input": 0.003, "cost_per_1k_output": 0.012,
	"tags": ["elite", "coding", "reasoning", "openai"]
	},

	# TIER 2: STRONG CODING
	"deepseek-v4-flash": {
	"endpoint": "deepseek-ai/deepseek-v4-flash",
	"context_window": 32000, "max_tokens": 8000,
	"coding_rank": 6, "speed_rank": 8, "reasoning_rank": 8,
	"cost_per_1k_input": 0.0005, "cost_per_1k_output": 0.002,
	"tags": ["strong", "coding", "fast", "cheap"]
	},
	"llama-4-maverick": {
	"endpoint": "meta/llama-4-maverick-17b-128e-instruct",
	"context_window": 128000, "max_tokens": 8000,
	"coding_rank": 7, "speed_rank": 10, "reasoning_rank": 7,
	"cost_per_1k_input": 0.001, "cost_per_1k_output": 0.004,
	"tags": ["strong", "coding", "meta", "long_context"]
	},
	"nemotron-3-super": {
	"endpoint": "nvidia/nemotron-3-super-120b-a12b",
	"context_window": 128000, "max_tokens": 8000,
	"coding_rank": 8, "speed_rank": 16, "reasoning_rank": 6,
	"cost_per_1k_input": 0.002, "cost_per_1k_output": 0.008,
	"tags": ["strong", "coding", "nvidia", "reasoning"]
	},
	"mistral-medium-3.5": {
	"endpoint": "mistralai/mistral-medium-3.5-128b",
	"context_window": 64000, "max_tokens": 8000,
	"coding_rank": 9, "speed_rank": 11, "reasoning_rank": 10,
	"cost_per_1k_input": 0.001, "cost_per_1k_output": 0.005,
	"tags": ["strong", "coding", "mistral", "balanced"]
	},
	"dracarys-llama-70b": {
	"endpoint": "abacusai/dracarys-llama-3.1-70b-instruct",
	"context_window": 32000, "max_tokens": 8000,
	"coding_rank": 10, "speed_rank": 13, "reasoning_rank": 11,
	"cost_per_1k_input": 0.001, "cost_per_1k_output": 0.004,
	"tags": ["strong", "coding", "roleplay", "creative"]
	},
	"llama-3.3-70b": {
	"endpoint": "meta/llama-3.3-70b-instruct",
	"context_window": 32000, "max_tokens": 8000,
	"coding_rank": 11, "speed_rank": 9, "reasoning_rank": 12,
	"cost_per_1k_input": 0.0005, "cost_per_1k_output": 0.002,
	"tags": ["strong", "coding", "meta", "fast", "cheap"]
	},
	"nemotron-super-49b": {
	"endpoint": "nvidia/llama-3.3-nemotron-super-49b-v1.5",
	"context_window": 32000, "max_tokens": 8000,
	"coding_rank": 12, "speed_rank": 7, "reasoning_rank": 13,
	"cost_per_1k_input": 0.0005, "cost_per_1k_output": 0.002,
	"tags": ["strong", "coding", "nvidia", "fast", "cheap"]
	},

	# TIER 3: GOOD CODING
	"step-3.7-flash": {
	"endpoint": "stepfun-ai/step-3.7-flash",
	"context_window": 32000, "max_tokens": 8000,
	"coding_rank": 13, "speed_rank": 6, "reasoning_rank": 14,
	"cost_per_1k_input": 0.0005, "cost_per_1k_output": 0.002,
	"tags": ["good", "coding", "fast", "chinese", "cheap"]
	},
	"mistral-small-4": {
	"endpoint": "mistralai/mistral-small-4-119b-2603",
	"context_window": 32000, "max_tokens": 8000,
	"coding_rank": 14, "speed_rank": 5, "reasoning_rank": 15,
	"cost_per_1k_input": 0.0005, "cost_per_1k_output": 0.002,
	"tags": ["good", "coding", "fast", "mistral", "cheap"]
	},
	"minimax-m2.7": {
	"endpoint": "minimaxai/minimax-m2.7",
	"context_window": 32000, "max_tokens": 8000,
	"coding_rank": 15, "speed_rank": 4, "reasoning_rank": 16,
	"cost_per_1k_input": 0.0005, "cost_per_1k_output": 0.002,
	"tags": ["good", "coding", "fast", "chinese", "cheap"]
	},
	"nemotron-super-49b-v1": {
	"endpoint": "nvidia/llama-3.3-nemotron-super-49b-v1",
	"context_window": 32000, "max_tokens": 8000,
	"coding_rank": 16, "speed_rank": 17, "reasoning_rank": 17,
	"cost_per_1k_input": 0.0005, "cost_per_1k_output": 0.002,
	"tags": ["good", "coding", "nvidia", "cheap"]
	},
	"llama-3.2-90b-vision": {
	"endpoint": "meta/llama-3.2-90b-vision-instruct",
	"context_window": 32000, "max_tokens": 8000,
	"coding_rank": 17, "speed_rank": 19, "reasoning_rank": 18,
	"cost_per_1k_input": 0.001, "cost_per_1k_output": 0.004,
	"tags": ["good", "coding", "vision", "multimodal", "meta"]
	},

	# TIER 4: FAST / LIGHT
	"nemotron-nano-12b": {
	"endpoint": "nvidia/nemotron-nano-12b-v2-vl",
	"context_window": 16000, "max_tokens": 4000,
	"coding_rank": 18, "speed_rank": 2, "reasoning_rank": 22,
	"cost_per_1k_input": 0.0001, "cost_per_1k_output": 0.0005,
	"tags": ["light", "fast", "vision", "nvidia", "cheap"]
	},
	"nemotron-3-nano-30b": {
	"endpoint": "nvidia/nemotron-3-nano-30b-a3b",
	"context_window": 32000, "max_tokens": 8000,
	"coding_rank": 19, "speed_rank": 3, "reasoning_rank": 19,
	"cost_per_1k_input": 0.0002, "cost_per_1k_output": 0.001,
	"tags": ["light", "fast", "nvidia", "cheap"]
	},
	"nemotron-nano-9b": {
	"endpoint": "nvidia/nvidia-nemotron-nano-9b-v2",
	"context_window": 16000, "max_tokens": 4000,
	"coding_rank": 20, "speed_rank": 1, "reasoning_rank": 23,
	"cost_per_1k_input": 0.0001, "cost_per_1k_output": 0.0005,
	"tags": ["light", "fastest", "nvidia", "cheap"]
	},
	"nemotron-content-safety": {
	"endpoint": "nvidia/nemotron-content-safety-reasoning-4b",
	"context_window": 8000, "max_tokens": 2000,
	"coding_rank": 21, "speed_rank": 1, "reasoning_rank": 24,
	"cost_per_1k_input": 0.0001, "cost_per_1k_output": 0.0005,
	"tags": ["light", "fastest", "safety", "nvidia", "cheap"]
	},

	# TIER 5: SPECIALIZED
	"nemotron-3-nano-omni": {
	"endpoint": "nvidia/nemotron-3-nano-omni-30b-a3b-reasoning",
	"context_window": 32000, "max_tokens": 8000,
	"coding_rank": 22, "speed_rank": 5, "reasoning_rank": 20,
	"cost_per_1k_input": 0.0002, "cost_per_1k_output": 0.001,
	"tags": ["specialized", "omni", "multimodal", "reasoning", "nvidia", "cheap"]
	},
	"diffusiongemma": {
	"endpoint": "google/diffusiongemma-26b-a4b-it",
	"context_window": 16000, "max_tokens": 4000,
	"coding_rank": 23, "speed_rank": 10, "reasoning_rank": 25,
	"cost_per_1k_input": 0.001, "cost_per_1k_output": 0.004,
	"tags": ["specialized", "image", "diffusion", "google"]
	},

	# LEGACY
	"glm": {
	"endpoint": "z-ai/glm-5.1",
	"context_window": 32000, "max_tokens": 8000,
	"coding_rank": 9, "speed_rank": 8, "reasoning_rank": 9,
	"cost_per_1k_input": 0.002, "cost_per_1k_output": 0.008,
	"tags": ["legacy", "coding", "fast", "chinese"]
	},
	"hf_fallback": {
	"endpoint": HF_FALLBACK_MODEL,
	"context_window": 32000, "max_tokens": 8000,
	"coding_rank": 50, "speed_rank": 50, "reasoning_rank": 50,
	"cost_per_1k_input": 0, "cost_per_1k_output": 0,
	"tags": ["fallback", "free", "hf"]
	},
	}