File size: 1,475 Bytes
9c60174
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
model_zoo = {
    # OpenAI / Azure-hosted models (deployment string, api_version)
    'gpt-5': ("gpt-5_2025-08-07", "2024-12-01-preview"),
    "gpt-4.1-azure": ("gpt-4.1_2025-04-14", "2025-04-01-preview"),
    'gpt-4o': ('gpt-4o_2024-11-20', '2024-10-21'),
    'gpt-4o-mini': ("gpt-4o-mini", ""),
    'gpt-5-openai': ("gpt-5", ""),
    'gpt-5-mini-openai': ("gpt-5-mini", ""),

    # vLLM-hosted models (OpenAI-compatible server)
    'Qwen3-30B-A3B-Instruct-2507': ("Qwen/Qwen3-30B-A3B-Instruct-2507", ""),
    'Qwen3-VL-30B-A3B-Instruct': ("Qwen3-VL-30B-A3B-Instruct", ""),

    # Anthropic models via direct Anthropic API (uses ANTHROPIC_API_KEY)
    'claude-opus-4-6': ("claude-opus-4-6", ""),
    'claude-sonnet-4-6': ("claude-sonnet-4-6", ""),

    # Anthropic / DeepSeek via an OpenAI-compatible LiteLLM proxy
    # (uses LITELLM_API_KEY; selected by main.py's --tritonai flag)
    'claude-opus-4-6-tritonai': ("us.anthropic.claude-opus-4-6-v1", ""),
    'claude-sonnet-4-6-tritonai': ("us.anthropic.claude-sonnet-4-6-v1", ""),
    'deepseek-r1-tritonai': ("us.deepseek.r1-v1:0", ""),

    # Models served via an OpenAI-compatible inference API (uses NV_API_KEY)
    'gpt-5.1': ("openai/openai/gpt-5.1", ""),
    'gpt-5.2': ("openai/openai/gpt-5.2", ""),
    'gpt-5.5': ("openai/openai/gpt-5.5", ""),
    'gpt-4.1': ("us/azure/openai/gpt-4.1", ""),
    'Qwen3.5-397B-A17B': ("nvidia/qwen/qwen3-5-397b-a17b", ""),
    'Kimi-K2.6': ("nvidia/moonshotai/kimi-k2.6", ""),
}