litellm-proxy / litellm-config-auto.yaml
raheem786's picture
update gpt model
01b814c verified
general_settings:
master_key: "os.environ/LITELLM_MASTER_KEY"
litellm_settings:
drop_params: True
modify_params: True
additional_drop_params: ["messages[*].prefix"]
callbacks: ["trim_messages_hook.proxy_handler_instance", "trim_messages_hook.error_handler_instance"]
set_verbose: False
request_timeout: 300
model_list:
- model_name: gpt-4o-mini # Cursor sees this and lets it pass
litellm_params:
model: openrouter/qwen/qwen3-coder:free # What you actually want to use
api_key: "os.environ/OPENROUTER_API_KEY"
- model_name: my-free-models
litellm_params:
model: openrouter/qwen/qwen3-coder:free
api_key: "os.environ/OPENROUTER_API_KEY"
order: 1
model_info:
max_input_tokens: 128000
max_tokens: 8192
- model_name: my-free-models
litellm_params:
model: openrouter/mistralai/mistral-small-3.1-24b-instruct:free
api_key: "os.environ/OPENROUTER_API_KEY"
order: 1
model_info:
max_input_tokens: 128000
max_tokens: 8192
- model_name: my-free-models
litellm_params:
model: openrouter/google/gemma-3-27b-it:free
api_key: "os.environ/OPENROUTER_API_KEY"
order: 1
model_info:
max_input_tokens: 128000
max_tokens: 8192
- model_name: my-free-models
litellm_params:
model: openrouter/stepfun/step-3.5-flash:free
api_key: "os.environ/OPENROUTER_API_KEY"
order: 1
model_info:
max_input_tokens: 128000
max_tokens: 4096
- model_name: my-free-models
litellm_params:
model: openrouter/qwen/qwen3-4b:free
api_key: "os.environ/OPENROUTER_API_KEY"
order: 2
model_info:
max_input_tokens: 32768
max_tokens: 4096
- model_name: my-free-models
litellm_params:
model: openrouter/z-ai/glm-4.5-air:free
api_key: "os.environ/OPENROUTER_API_KEY"
order: 2
model_info:
max_input_tokens: 128000
max_tokens: 4096
- model_name: my-free-models
litellm_params:
model: openrouter/nvidia/nemotron-3-nano-30b-a3b:free
api_key: "os.environ/OPENROUTER_API_KEY"
order: 2
model_info:
max_input_tokens: 128000
max_tokens: 4096
- model_name: my-free-models
litellm_params:
model: openrouter/tngtech/tng-r1t-chimera:free
api_key: "os.environ/OPENROUTER_API_KEY"
order: 2
model_info:
max_input_tokens: 128000
max_tokens: 4096
- model_name: my-free-models
litellm_params:
model: openrouter/nvidia/nemotron-nano-12b-v2-vl:free
api_key: "os.environ/OPENROUTER_API_KEY"
order: 2
model_info:
max_input_tokens: 128000
max_tokens: 4096
- model_name: my-free-models
litellm_params:
model: openrouter/arcee-ai/trinity-mini:free
api_key: "os.environ/OPENROUTER_API_KEY"
order: 2
model_info:
max_input_tokens: 128000
max_tokens: 4096
- model_name: my-free-models
litellm_params:
model: openrouter/nvidia/nemotron-nano-9b-v2:free
api_key: "os.environ/OPENROUTER_API_KEY"
order: 2
model_info:
max_input_tokens: 128000
max_tokens: 4096
- model_name: my-free-models
litellm_params:
model: openrouter/upstage/solar-pro-3:free
api_key: "os.environ/OPENROUTER_API_KEY"
order: 2
model_info:
max_input_tokens: 128000
max_tokens: 8192
- model_name: my-free-coders-new
litellm_params:
model: gemini/gemini-3-flash-preview
api_key: "os.environ/GOOGLE_API_KEY"
order: 1
model_info:
max_input_tokens: 1048576
max_tokens: 65535
- model_name: my-free-coders-new
litellm_params:
model: gemini/gemini-2.5-flash
api_key: "os.environ/GOOGLE_API_KEY"
order: 1
model_info:
max_input_tokens: 1048576
max_tokens: 65535
- model_name: my-free-coders-new
litellm_params:
model: gemini/gemini-2.5-pro
api_key: "os.environ/GOOGLE_API_KEY"
order: 1
model_info:
max_input_tokens: 1048576
max_tokens: 65535
- model_name: my-free-coders-new
litellm_params:
model: openrouter/openai/gpt-oss-20b:free
api_key: "os.environ/OPENROUTER_API_KEY"
order: 2
model_info:
max_input_tokens: 128000
max_tokens: 8192
- model_name: my-free-coders-new
litellm_params:
model: openrouter/meta-llama/llama-3.3-70b-instruct:free
api_key: "os.environ/OPENROUTER_API_KEY"
order: 2
model_info:
max_input_tokens: 128000
max_tokens: 8192
- model_name: my-free-coders-new
litellm_params:
model: openrouter/qwen/qwen3-next-80b-a3b-instruct:free
api_key: "os.environ/OPENROUTER_API_KEY"
order: 2
model_info:
max_input_tokens: 262144
max_tokens: 8192
- model_name: my-free-coders-new
litellm_params:
model: openrouter/nvidia/nemotron-3-nano-30b-a3b:free
api_key: "os.environ/OPENROUTER_API_KEY"
order: 2
model_info:
max_input_tokens: 128000
max_tokens: 4096
- model_name: my-free-coders
litellm_params:
model: openrouter/openai/gpt-oss-120b:free
api_key: "os.environ/OPENROUTER_API_KEY"
order: 1
model_info:
max_input_tokens: 128000
max_tokens: 8192
- model_name: my-free-coders
litellm_params:
model: openrouter/arcee-ai/trinity-large-preview:free
api_key: "os.environ/OPENROUTER_API_KEY"
order: 2
model_info:
max_input_tokens: 128000
max_tokens: 4096
- model_name: my-paid-coders
litellm_params:
model: openrouter/openai/gpt-oss-20b
api_key: "os.environ/OPENROUTER_API_KEY"
order: 1
model_info:
max_input_tokens: 128000
max_tokens: 8192
- model_name: my-paid-coders
litellm_params:
model: openrouter/nousresearch/deephermes-3-mistral-24b-preview
api_key: "os.environ/OPENROUTER_API_KEY"
order: 2
model_info:
max_input_tokens: 32000
max_tokens: 8192
- model_name: my-paid-coders
litellm_params:
model: openrouter/qwen/qwen-2.5-7b-instruct
api_key: "os.environ/OPENROUTER_API_KEY"
order: 2
model_info:
max_input_tokens: 32768
max_tokens: 8192
- model_name: my-paid-coders
litellm_params:
model: openrouter/mistralai/ministral-3b
api_key: "os.environ/OPENROUTER_API_KEY"
order: 2
model_info:
max_input_tokens: 128000
max_tokens: 4096
- model_name: my-paid-coders
litellm_params:
model: openrouter/meta-llama/llama-3.1-8b-instruct
api_key: "os.environ/OPENROUTER_API_KEY"
order: 2
model_info:
max_input_tokens: 128000
max_tokens: 4096
- model_name: my-paid-coders
litellm_params:
model: openrouter/meta-llama/llama-3-8b-instruct
api_key: "os.environ/OPENROUTER_API_KEY"
order: 2
model_info:
max_input_tokens: 8192
max_tokens: 4096
- model_name: my-hf-models
litellm_params:
model: huggingface/meta-llama/Llama-3.3-70B-Instruct
api_key: "os.environ/HF_TOKEN"
order: 1
model_info:
max_input_tokens: 128000
max_tokens: 8192
- model_name: my-hf-models
litellm_params:
model: huggingface/together/deepseek-ai/DeepSeek-R1
api_key: "os.environ/HF_TOKEN"
order: 2
model_info:
max_input_tokens: 128000
max_tokens: 32768
- model_name: my-large-context
litellm_params:
model: openrouter/meta-llama/llama-3.3-70b-instruct:free
api_key: "os.environ/OPENROUTER_API_KEY"
order: 1
model_info:
max_input_tokens: 128000
max_tokens: 8192
- model_name: my-large-context
litellm_params:
model: openrouter/qwen/qwen3-next-80b-a3b-instruct:free
api_key: "os.environ/OPENROUTER_API_KEY"
order: 2
model_info:
max_input_tokens: 262144
max_tokens: 8192
router_settings:
model_group_alias:
"grok-code-fast-1": "my-free-coders-new"
routing_strategy: latency-based-routing
enable_pre_call_checks: true
num_retries: 3
allowed_fails: 3
cooldown_time: 30
retry_after: 10
timeout: 600
stream_timeout: 300
max_fallbacks: 10
default_fallbacks:
- my-large-context
- my-free-models
retry_policy:
AuthenticationErrorRetries: 3
TimeoutErrorRetries: 3
RateLimitErrorRetries: 3
ContentPolicyViolationErrorRetries: 4
InternalServerErrorRetries: 6
allowed_fails_policy:
BadRequestErrorAllowedFails: 1000
NotFoundErrorAllowedFails: 1000
AuthenticationErrorAllowedFails: 10
TimeoutErrorAllowedFails: 12
RateLimitErrorAllowedFails: 10000
ContentPolicyViolationErrorAllowedFails: 15
InternalServerErrorAllowedFails: 20
fallbacks:
- my-free-coders-new:
- my-large-context
- my-free-models
- my-hf-models
- my-free-coders:
- my-free-coders-new
- my-large-context
- my-free-models
- my-hf-models
- my-paid-coders:
- my-free-coders-new
- my-large-context
- my-free-models
- my-hf-models
- my-free-models:
- my-large-context
- my-hf-models
- my-hf-models:
- my-large-context
- my-free-models
- my-large-context:
- my-free-models
- my-hf-models
context_window_fallbacks:
- my-free-coders-new:
- my-large-context
- my-free-models
- my-hf-models
- my-free-coders:
- my-free-coders-new
- my-large-context
- my-free-models
- my-hf-models
- my-paid-coders:
- my-free-coders-new
- my-large-context
- my-free-models
- my-hf-models
- my-free-models:
- my-large-context
- my-hf-models
- my-hf-models:
- my-large-context
- my-free-models
- my-large-context:
- my-free-models
- my-hf-models
content_policy_fallbacks:
- my-free-coders-new:
- my-large-context
- my-free-models
- my-hf-models
- my-free-coders:
- my-free-coders-new
- my-large-context
- my-free-models
- my-hf-models
- my-paid-coders:
- my-free-coders-new
- my-large-context
- my-free-models
- my-hf-models
- my-free-models:
- my-large-context
- my-hf-models
- my-hf-models:
- my-large-context
- my-free-models
- my-large-context:
- my-free-models
- my-hf-models
default_litellm_params:
# Lower default to reduce output token usage; clients can override with max_tokens in the request
max_tokens: 2048