LiteLLM / config.yaml
keungliang's picture
Update config.yaml
58f27f3 verified
model_list:
- model_name: azure_gpt-5.5
litellm_params:
model: azure/azure_gpt-5.5
api_base: os.environ/AZURE_RESPONSES_OPENAI_BASE_URL
api_version: preview
api_key: os.environ/AZURE_RESPONSES_OPENAI_API_KEY
stream_timeout: 4000
merge_reasoning_content_in_choices: true
drop_params: True
additional_drop_params: ["stream_options", "temperature", "top_p", "presence_penalty", "frequency_penalty", "logprobs", "top_logprobs", "logit_bias", "max_tokens"]
reasoning:
summary: detailed
# verbosity: low
model_info:
mode: responses
background: True
supports_reasoning: True
max_input_tokens: 128000
max_output_tokens: 64000
supports_tool_choice: True
supports_vision: True
supports_response_schema: True
supports_prompt_caching: True
input_cost_per_token: 0.00000125
output_cost_per_token: 0.000010
- model_name: azure_gpt-5.3-codex
litellm_params:
model: azure/azure_gpt-5.3-codex
api_base: os.environ/AZURE_RESPONSES_OPENAI_BASE_URL
api_version: preview
api_key: os.environ/AZURE_RESPONSES_OPENAI_API_KEY
merge_reasoning_content_in_choices: true
drop_params: True
additional_drop_params: ["stream_options", "temperature", "top_p", "presence_penalty", "frequency_penalty", "logprobs", "top_logprobs", "logit_bias", "max_tokens"]
reasoning:
summary: auto
model_info:
mode: responses
background: True
supports_reasoning: True
- model_name: azure_gpt-5.4-pro
litellm_params:
model: azure/azure_gpt-5.4-pro
api_base: os.environ/AZURE_RESPONSES_OPENAI_BASE_URL
api_version: preview
api_key: os.environ/AZURE_RESPONSES_OPENAI_API_KEY
merge_reasoning_content_in_choices: true
drop_params: True
additional_drop_params: ["stream_options", "temperature", "top_p", "presence_penalty", "frequency_penalty", "logprobs", "top_logprobs", "logit_bias", "max_tokens"]
reasoning:
summary: detailed
model_info:
mode: responses
background: True
supports_reasoning: True
# --------------Other Settings--------------------
litellm_settings:
# Networking settings
request_timeout: 4000 # (int) llm request timeout in seconds. Raise Timeout error if call takes longer than 10s. Sets litellm.request_timeout
num_retries: 0
# fallbacks: [{ "gemini-1.5-pro": ["gemini-1.5-flash"] }]
allowed_fails: 1 # cooldown model if it fails > 1 call in a minute.
cooldown_time: 30 # how long to cooldown model if fails/min > allowed_fails
drop_params: true
general_settings:
master_key: os.environ/MASTER_KEY # sk-1234 # [OPTIONAL] Only use this if you require all calls to contain this key (Authorization: Bearer sk-1234)
# router_settings:
# fallbacks:
# [
# { "or/gemini-2.0-flash-exp": ["gg1/gemini-2.0-flash-exp"] },
# { "gpt-3.5-turbo": ["gemini-1.5-flash"] },
# ]
# model_group_alias: { "gpt-4": "gemini-1.5-pro" }
# routing_strategy: simple-shuffle