"""Все модели NVIDIA с ранжированием""" from .config import HF_FALLBACK_MODEL MODEL_RANKING = { # TIER 1: ELITE CODING "deepseek-v4-pro": { "endpoint": "deepseek-ai/deepseek-v4-pro", "context_window": 64000, "max_tokens": 8000, "coding_rank": 1, "speed_rank": 15, "reasoning_rank": 1, "cost_per_1k_input": 0.001, "cost_per_1k_output": 0.005, "tags": ["elite", "coding", "reasoning", "math", "cheap"] }, "kimi-k2.6": { "endpoint": "moonshotai/kimi-k2.6", "context_window": 32000, "max_tokens": 8000, "coding_rank": 2, "speed_rank": 12, "reasoning_rank": 2, "cost_per_1k_input": 0.003, "cost_per_1k_output": 0.015, "tags": ["elite", "coding", "reasoning", "long_context"] }, "qwen3.5-397b": { "endpoint": "qwen/qwen3.5-397b-a17b", "context_window": 128000, "max_tokens": 8000, "coding_rank": 3, "speed_rank": 18, "reasoning_rank": 3, "cost_per_1k_input": 0.002, "cost_per_1k_output": 0.008, "tags": ["elite", "coding", "long_context", "chinese"] }, "mistral-large-3": { "endpoint": "mistralai/mistral-large-3-675b-instruct-2512", "context_window": 128000, "max_tokens": 8000, "coding_rank": 4, "speed_rank": 14, "reasoning_rank": 4, "cost_per_1k_input": 0.002, "cost_per_1k_output": 0.010, "tags": ["elite", "coding", "multilingual", "long_context"] }, "gpt-oss-120b": { "endpoint": "openai/gpt-oss-120b", "context_window": 128000, "max_tokens": 8000, "coding_rank": 5, "speed_rank": 20, "reasoning_rank": 5, "cost_per_1k_input": 0.003, "cost_per_1k_output": 0.012, "tags": ["elite", "coding", "reasoning", "openai"] }, # TIER 2: STRONG CODING "deepseek-v4-flash": { "endpoint": "deepseek-ai/deepseek-v4-flash", "context_window": 32000, "max_tokens": 8000, "coding_rank": 6, "speed_rank": 8, "reasoning_rank": 8, "cost_per_1k_input": 0.0005, "cost_per_1k_output": 0.002, "tags": ["strong", "coding", "fast", "cheap"] }, "llama-4-maverick": { "endpoint": "meta/llama-4-maverick-17b-128e-instruct", "context_window": 128000, "max_tokens": 8000, "coding_rank": 7, "speed_rank": 10, "reasoning_rank": 7, "cost_per_1k_input": 0.001, "cost_per_1k_output": 0.004, "tags": ["strong", "coding", "meta", "long_context"] }, "nemotron-3-super": { "endpoint": "nvidia/nemotron-3-super-120b-a12b", "context_window": 128000, "max_tokens": 8000, "coding_rank": 8, "speed_rank": 16, "reasoning_rank": 6, "cost_per_1k_input": 0.002, "cost_per_1k_output": 0.008, "tags": ["strong", "coding", "nvidia", "reasoning"] }, "mistral-medium-3.5": { "endpoint": "mistralai/mistral-medium-3.5-128b", "context_window": 64000, "max_tokens": 8000, "coding_rank": 9, "speed_rank": 11, "reasoning_rank": 10, "cost_per_1k_input": 0.001, "cost_per_1k_output": 0.005, "tags": ["strong", "coding", "mistral", "balanced"] }, "dracarys-llama-70b": { "endpoint": "abacusai/dracarys-llama-3.1-70b-instruct", "context_window": 32000, "max_tokens": 8000, "coding_rank": 10, "speed_rank": 13, "reasoning_rank": 11, "cost_per_1k_input": 0.001, "cost_per_1k_output": 0.004, "tags": ["strong", "coding", "roleplay", "creative"] }, "llama-3.3-70b": { "endpoint": "meta/llama-3.3-70b-instruct", "context_window": 32000, "max_tokens": 8000, "coding_rank": 11, "speed_rank": 9, "reasoning_rank": 12, "cost_per_1k_input": 0.0005, "cost_per_1k_output": 0.002, "tags": ["strong", "coding", "meta", "fast", "cheap"] }, "nemotron-super-49b": { "endpoint": "nvidia/llama-3.3-nemotron-super-49b-v1.5", "context_window": 32000, "max_tokens": 8000, "coding_rank": 12, "speed_rank": 7, "reasoning_rank": 13, "cost_per_1k_input": 0.0005, "cost_per_1k_output": 0.002, "tags": ["strong", "coding", "nvidia", "fast", "cheap"] }, # TIER 3: GOOD CODING "step-3.7-flash": { "endpoint": "stepfun-ai/step-3.7-flash", "context_window": 32000, "max_tokens": 8000, "coding_rank": 13, "speed_rank": 6, "reasoning_rank": 14, "cost_per_1k_input": 0.0005, "cost_per_1k_output": 0.002, "tags": ["good", "coding", "fast", "chinese", "cheap"] }, "mistral-small-4": { "endpoint": "mistralai/mistral-small-4-119b-2603", "context_window": 32000, "max_tokens": 8000, "coding_rank": 14, "speed_rank": 5, "reasoning_rank": 15, "cost_per_1k_input": 0.0005, "cost_per_1k_output": 0.002, "tags": ["good", "coding", "fast", "mistral", "cheap"] }, "minimax-m2.7": { "endpoint": "minimaxai/minimax-m2.7", "context_window": 32000, "max_tokens": 8000, "coding_rank": 15, "speed_rank": 4, "reasoning_rank": 16, "cost_per_1k_input": 0.0005, "cost_per_1k_output": 0.002, "tags": ["good", "coding", "fast", "chinese", "cheap"] }, "nemotron-super-49b-v1": { "endpoint": "nvidia/llama-3.3-nemotron-super-49b-v1", "context_window": 32000, "max_tokens": 8000, "coding_rank": 16, "speed_rank": 17, "reasoning_rank": 17, "cost_per_1k_input": 0.0005, "cost_per_1k_output": 0.002, "tags": ["good", "coding", "nvidia", "cheap"] }, "llama-3.2-90b-vision": { "endpoint": "meta/llama-3.2-90b-vision-instruct", "context_window": 32000, "max_tokens": 8000, "coding_rank": 17, "speed_rank": 19, "reasoning_rank": 18, "cost_per_1k_input": 0.001, "cost_per_1k_output": 0.004, "tags": ["good", "coding", "vision", "multimodal", "meta"] }, # TIER 4: FAST / LIGHT "nemotron-nano-12b": { "endpoint": "nvidia/nemotron-nano-12b-v2-vl", "context_window": 16000, "max_tokens": 4000, "coding_rank": 18, "speed_rank": 2, "reasoning_rank": 22, "cost_per_1k_input": 0.0001, "cost_per_1k_output": 0.0005, "tags": ["light", "fast", "vision", "nvidia", "cheap"] }, "nemotron-3-nano-30b": { "endpoint": "nvidia/nemotron-3-nano-30b-a3b", "context_window": 32000, "max_tokens": 8000, "coding_rank": 19, "speed_rank": 3, "reasoning_rank": 19, "cost_per_1k_input": 0.0002, "cost_per_1k_output": 0.001, "tags": ["light", "fast", "nvidia", "cheap"] }, "nemotron-nano-9b": { "endpoint": "nvidia/nvidia-nemotron-nano-9b-v2", "context_window": 16000, "max_tokens": 4000, "coding_rank": 20, "speed_rank": 1, "reasoning_rank": 23, "cost_per_1k_input": 0.0001, "cost_per_1k_output": 0.0005, "tags": ["light", "fastest", "nvidia", "cheap"] }, "nemotron-content-safety": { "endpoint": "nvidia/nemotron-content-safety-reasoning-4b", "context_window": 8000, "max_tokens": 2000, "coding_rank": 21, "speed_rank": 1, "reasoning_rank": 24, "cost_per_1k_input": 0.0001, "cost_per_1k_output": 0.0005, "tags": ["light", "fastest", "safety", "nvidia", "cheap"] }, # TIER 5: SPECIALIZED "nemotron-3-nano-omni": { "endpoint": "nvidia/nemotron-3-nano-omni-30b-a3b-reasoning", "context_window": 32000, "max_tokens": 8000, "coding_rank": 22, "speed_rank": 5, "reasoning_rank": 20, "cost_per_1k_input": 0.0002, "cost_per_1k_output": 0.001, "tags": ["specialized", "omni", "multimodal", "reasoning", "nvidia", "cheap"] }, "diffusiongemma": { "endpoint": "google/diffusiongemma-26b-a4b-it", "context_window": 16000, "max_tokens": 4000, "coding_rank": 23, "speed_rank": 10, "reasoning_rank": 25, "cost_per_1k_input": 0.001, "cost_per_1k_output": 0.004, "tags": ["specialized", "image", "diffusion", "google"] }, # LEGACY "glm": { "endpoint": "z-ai/glm-5.1", "context_window": 32000, "max_tokens": 8000, "coding_rank": 9, "speed_rank": 8, "reasoning_rank": 9, "cost_per_1k_input": 0.002, "cost_per_1k_output": 0.008, "tags": ["legacy", "coding", "fast", "chinese"] }, "hf_fallback": { "endpoint": HF_FALLBACK_MODEL, "context_window": 32000, "max_tokens": 8000, "coding_rank": 50, "speed_rank": 50, "reasoning_rank": 50, "cost_per_1k_input": 0, "cost_per_1k_output": 0, "tags": ["fallback", "free", "hf"] }, }