Spaces:
Runtime error
Runtime error
| """Все модели NVIDIA с ранжированием""" | |
| from .config import HF_FALLBACK_MODEL | |
| MODEL_RANKING = { | |
| # TIER 1: ELITE CODING | |
| "deepseek-v4-pro": { | |
| "endpoint": "deepseek-ai/deepseek-v4-pro", | |
| "context_window": 64000, "max_tokens": 8000, | |
| "coding_rank": 1, "speed_rank": 15, "reasoning_rank": 1, | |
| "cost_per_1k_input": 0.001, "cost_per_1k_output": 0.005, | |
| "tags": ["elite", "coding", "reasoning", "math", "cheap"] | |
| }, | |
| "kimi-k2.6": { | |
| "endpoint": "moonshotai/kimi-k2.6", | |
| "context_window": 32000, "max_tokens": 8000, | |
| "coding_rank": 2, "speed_rank": 12, "reasoning_rank": 2, | |
| "cost_per_1k_input": 0.003, "cost_per_1k_output": 0.015, | |
| "tags": ["elite", "coding", "reasoning", "long_context"] | |
| }, | |
| "qwen3.5-397b": { | |
| "endpoint": "qwen/qwen3.5-397b-a17b", | |
| "context_window": 128000, "max_tokens": 8000, | |
| "coding_rank": 3, "speed_rank": 18, "reasoning_rank": 3, | |
| "cost_per_1k_input": 0.002, "cost_per_1k_output": 0.008, | |
| "tags": ["elite", "coding", "long_context", "chinese"] | |
| }, | |
| "mistral-large-3": { | |
| "endpoint": "mistralai/mistral-large-3-675b-instruct-2512", | |
| "context_window": 128000, "max_tokens": 8000, | |
| "coding_rank": 4, "speed_rank": 14, "reasoning_rank": 4, | |
| "cost_per_1k_input": 0.002, "cost_per_1k_output": 0.010, | |
| "tags": ["elite", "coding", "multilingual", "long_context"] | |
| }, | |
| "gpt-oss-120b": { | |
| "endpoint": "openai/gpt-oss-120b", | |
| "context_window": 128000, "max_tokens": 8000, | |
| "coding_rank": 5, "speed_rank": 20, "reasoning_rank": 5, | |
| "cost_per_1k_input": 0.003, "cost_per_1k_output": 0.012, | |
| "tags": ["elite", "coding", "reasoning", "openai"] | |
| }, | |
| # TIER 2: STRONG CODING | |
| "deepseek-v4-flash": { | |
| "endpoint": "deepseek-ai/deepseek-v4-flash", | |
| "context_window": 32000, "max_tokens": 8000, | |
| "coding_rank": 6, "speed_rank": 8, "reasoning_rank": 8, | |
| "cost_per_1k_input": 0.0005, "cost_per_1k_output": 0.002, | |
| "tags": ["strong", "coding", "fast", "cheap"] | |
| }, | |
| "llama-4-maverick": { | |
| "endpoint": "meta/llama-4-maverick-17b-128e-instruct", | |
| "context_window": 128000, "max_tokens": 8000, | |
| "coding_rank": 7, "speed_rank": 10, "reasoning_rank": 7, | |
| "cost_per_1k_input": 0.001, "cost_per_1k_output": 0.004, | |
| "tags": ["strong", "coding", "meta", "long_context"] | |
| }, | |
| "nemotron-3-super": { | |
| "endpoint": "nvidia/nemotron-3-super-120b-a12b", | |
| "context_window": 128000, "max_tokens": 8000, | |
| "coding_rank": 8, "speed_rank": 16, "reasoning_rank": 6, | |
| "cost_per_1k_input": 0.002, "cost_per_1k_output": 0.008, | |
| "tags": ["strong", "coding", "nvidia", "reasoning"] | |
| }, | |
| "mistral-medium-3.5": { | |
| "endpoint": "mistralai/mistral-medium-3.5-128b", | |
| "context_window": 64000, "max_tokens": 8000, | |
| "coding_rank": 9, "speed_rank": 11, "reasoning_rank": 10, | |
| "cost_per_1k_input": 0.001, "cost_per_1k_output": 0.005, | |
| "tags": ["strong", "coding", "mistral", "balanced"] | |
| }, | |
| "dracarys-llama-70b": { | |
| "endpoint": "abacusai/dracarys-llama-3.1-70b-instruct", | |
| "context_window": 32000, "max_tokens": 8000, | |
| "coding_rank": 10, "speed_rank": 13, "reasoning_rank": 11, | |
| "cost_per_1k_input": 0.001, "cost_per_1k_output": 0.004, | |
| "tags": ["strong", "coding", "roleplay", "creative"] | |
| }, | |
| "llama-3.3-70b": { | |
| "endpoint": "meta/llama-3.3-70b-instruct", | |
| "context_window": 32000, "max_tokens": 8000, | |
| "coding_rank": 11, "speed_rank": 9, "reasoning_rank": 12, | |
| "cost_per_1k_input": 0.0005, "cost_per_1k_output": 0.002, | |
| "tags": ["strong", "coding", "meta", "fast", "cheap"] | |
| }, | |
| "nemotron-super-49b": { | |
| "endpoint": "nvidia/llama-3.3-nemotron-super-49b-v1.5", | |
| "context_window": 32000, "max_tokens": 8000, | |
| "coding_rank": 12, "speed_rank": 7, "reasoning_rank": 13, | |
| "cost_per_1k_input": 0.0005, "cost_per_1k_output": 0.002, | |
| "tags": ["strong", "coding", "nvidia", "fast", "cheap"] | |
| }, | |
| # TIER 3: GOOD CODING | |
| "step-3.7-flash": { | |
| "endpoint": "stepfun-ai/step-3.7-flash", | |
| "context_window": 32000, "max_tokens": 8000, | |
| "coding_rank": 13, "speed_rank": 6, "reasoning_rank": 14, | |
| "cost_per_1k_input": 0.0005, "cost_per_1k_output": 0.002, | |
| "tags": ["good", "coding", "fast", "chinese", "cheap"] | |
| }, | |
| "mistral-small-4": { | |
| "endpoint": "mistralai/mistral-small-4-119b-2603", | |
| "context_window": 32000, "max_tokens": 8000, | |
| "coding_rank": 14, "speed_rank": 5, "reasoning_rank": 15, | |
| "cost_per_1k_input": 0.0005, "cost_per_1k_output": 0.002, | |
| "tags": ["good", "coding", "fast", "mistral", "cheap"] | |
| }, | |
| "minimax-m2.7": { | |
| "endpoint": "minimaxai/minimax-m2.7", | |
| "context_window": 32000, "max_tokens": 8000, | |
| "coding_rank": 15, "speed_rank": 4, "reasoning_rank": 16, | |
| "cost_per_1k_input": 0.0005, "cost_per_1k_output": 0.002, | |
| "tags": ["good", "coding", "fast", "chinese", "cheap"] | |
| }, | |
| "nemotron-super-49b-v1": { | |
| "endpoint": "nvidia/llama-3.3-nemotron-super-49b-v1", | |
| "context_window": 32000, "max_tokens": 8000, | |
| "coding_rank": 16, "speed_rank": 17, "reasoning_rank": 17, | |
| "cost_per_1k_input": 0.0005, "cost_per_1k_output": 0.002, | |
| "tags": ["good", "coding", "nvidia", "cheap"] | |
| }, | |
| "llama-3.2-90b-vision": { | |
| "endpoint": "meta/llama-3.2-90b-vision-instruct", | |
| "context_window": 32000, "max_tokens": 8000, | |
| "coding_rank": 17, "speed_rank": 19, "reasoning_rank": 18, | |
| "cost_per_1k_input": 0.001, "cost_per_1k_output": 0.004, | |
| "tags": ["good", "coding", "vision", "multimodal", "meta"] | |
| }, | |
| # TIER 4: FAST / LIGHT | |
| "nemotron-nano-12b": { | |
| "endpoint": "nvidia/nemotron-nano-12b-v2-vl", | |
| "context_window": 16000, "max_tokens": 4000, | |
| "coding_rank": 18, "speed_rank": 2, "reasoning_rank": 22, | |
| "cost_per_1k_input": 0.0001, "cost_per_1k_output": 0.0005, | |
| "tags": ["light", "fast", "vision", "nvidia", "cheap"] | |
| }, | |
| "nemotron-3-nano-30b": { | |
| "endpoint": "nvidia/nemotron-3-nano-30b-a3b", | |
| "context_window": 32000, "max_tokens": 8000, | |
| "coding_rank": 19, "speed_rank": 3, "reasoning_rank": 19, | |
| "cost_per_1k_input": 0.0002, "cost_per_1k_output": 0.001, | |
| "tags": ["light", "fast", "nvidia", "cheap"] | |
| }, | |
| "nemotron-nano-9b": { | |
| "endpoint": "nvidia/nvidia-nemotron-nano-9b-v2", | |
| "context_window": 16000, "max_tokens": 4000, | |
| "coding_rank": 20, "speed_rank": 1, "reasoning_rank": 23, | |
| "cost_per_1k_input": 0.0001, "cost_per_1k_output": 0.0005, | |
| "tags": ["light", "fastest", "nvidia", "cheap"] | |
| }, | |
| "nemotron-content-safety": { | |
| "endpoint": "nvidia/nemotron-content-safety-reasoning-4b", | |
| "context_window": 8000, "max_tokens": 2000, | |
| "coding_rank": 21, "speed_rank": 1, "reasoning_rank": 24, | |
| "cost_per_1k_input": 0.0001, "cost_per_1k_output": 0.0005, | |
| "tags": ["light", "fastest", "safety", "nvidia", "cheap"] | |
| }, | |
| # TIER 5: SPECIALIZED | |
| "nemotron-3-nano-omni": { | |
| "endpoint": "nvidia/nemotron-3-nano-omni-30b-a3b-reasoning", | |
| "context_window": 32000, "max_tokens": 8000, | |
| "coding_rank": 22, "speed_rank": 5, "reasoning_rank": 20, | |
| "cost_per_1k_input": 0.0002, "cost_per_1k_output": 0.001, | |
| "tags": ["specialized", "omni", "multimodal", "reasoning", "nvidia", "cheap"] | |
| }, | |
| "diffusiongemma": { | |
| "endpoint": "google/diffusiongemma-26b-a4b-it", | |
| "context_window": 16000, "max_tokens": 4000, | |
| "coding_rank": 23, "speed_rank": 10, "reasoning_rank": 25, | |
| "cost_per_1k_input": 0.001, "cost_per_1k_output": 0.004, | |
| "tags": ["specialized", "image", "diffusion", "google"] | |
| }, | |
| # LEGACY | |
| "glm": { | |
| "endpoint": "z-ai/glm-5.1", | |
| "context_window": 32000, "max_tokens": 8000, | |
| "coding_rank": 9, "speed_rank": 8, "reasoning_rank": 9, | |
| "cost_per_1k_input": 0.002, "cost_per_1k_output": 0.008, | |
| "tags": ["legacy", "coding", "fast", "chinese"] | |
| }, | |
| "hf_fallback": { | |
| "endpoint": HF_FALLBACK_MODEL, | |
| "context_window": 32000, "max_tokens": 8000, | |
| "coding_rank": 50, "speed_rank": 50, "reasoning_rank": 50, | |
| "cost_per_1k_input": 0, "cost_per_1k_output": 0, | |
| "tags": ["fallback", "free", "hf"] | |
| }, | |
| } | |