Spaces:
Running
Running
| model_list: | |
| - model_name: azure_gpt-5.5 | |
| litellm_params: | |
| model: azure/azure_gpt-5.5 | |
| api_base: os.environ/AZURE_RESPONSES_OPENAI_BASE_URL | |
| api_version: preview | |
| api_key: os.environ/AZURE_RESPONSES_OPENAI_API_KEY | |
| stream_timeout: 4000 | |
| merge_reasoning_content_in_choices: true | |
| drop_params: True | |
| additional_drop_params: ["stream_options", "temperature", "top_p", "presence_penalty", "frequency_penalty", "logprobs", "top_logprobs", "logit_bias", "max_tokens"] | |
| reasoning: | |
| summary: detailed | |
| # verbosity: low | |
| model_info: | |
| mode: responses | |
| background: True | |
| supports_reasoning: True | |
| max_input_tokens: 128000 | |
| max_output_tokens: 64000 | |
| supports_tool_choice: True | |
| supports_vision: True | |
| supports_response_schema: True | |
| supports_prompt_caching: True | |
| input_cost_per_token: 0.00000125 | |
| output_cost_per_token: 0.000010 | |
| - model_name: azure_gpt-5.3-codex | |
| litellm_params: | |
| model: azure/azure_gpt-5.3-codex | |
| api_base: os.environ/AZURE_RESPONSES_OPENAI_BASE_URL | |
| api_version: preview | |
| api_key: os.environ/AZURE_RESPONSES_OPENAI_API_KEY | |
| merge_reasoning_content_in_choices: true | |
| drop_params: True | |
| additional_drop_params: ["stream_options", "temperature", "top_p", "presence_penalty", "frequency_penalty", "logprobs", "top_logprobs", "logit_bias", "max_tokens"] | |
| reasoning: | |
| summary: auto | |
| model_info: | |
| mode: responses | |
| background: True | |
| supports_reasoning: True | |
| - model_name: azure_gpt-5.4-pro | |
| litellm_params: | |
| model: azure/azure_gpt-5.4-pro | |
| api_base: os.environ/AZURE_RESPONSES_OPENAI_BASE_URL | |
| api_version: preview | |
| api_key: os.environ/AZURE_RESPONSES_OPENAI_API_KEY | |
| merge_reasoning_content_in_choices: true | |
| drop_params: True | |
| additional_drop_params: ["stream_options", "temperature", "top_p", "presence_penalty", "frequency_penalty", "logprobs", "top_logprobs", "logit_bias", "max_tokens"] | |
| reasoning: | |
| summary: detailed | |
| model_info: | |
| mode: responses | |
| background: True | |
| supports_reasoning: True | |
| # --------------Other Settings-------------------- | |
| litellm_settings: | |
| # Networking settings | |
| request_timeout: 4000 # (int) llm request timeout in seconds. Raise Timeout error if call takes longer than 10s. Sets litellm.request_timeout | |
| num_retries: 0 | |
| # fallbacks: [{ "gemini-1.5-pro": ["gemini-1.5-flash"] }] | |
| allowed_fails: 1 # cooldown model if it fails > 1 call in a minute. | |
| cooldown_time: 30 # how long to cooldown model if fails/min > allowed_fails | |
| drop_params: true | |
| general_settings: | |
| master_key: os.environ/MASTER_KEY # sk-1234 # [OPTIONAL] Only use this if you require all calls to contain this key (Authorization: Bearer sk-1234) | |
| # router_settings: | |
| # fallbacks: | |
| # [ | |
| # { "or/gemini-2.0-flash-exp": ["gg1/gemini-2.0-flash-exp"] }, | |
| # { "gpt-3.5-turbo": ["gemini-1.5-flash"] }, | |
| # ] | |
| # model_group_alias: { "gpt-4": "gemini-1.5-pro" } | |
| # routing_strategy: simple-shuffle | |