Spaces:
Paused
Paused
| // Last updated: 2025-01-15 | |
| // Thanks to AgentOps - https://github.com/AgentOps-AI/tokencost | |
| // 291 kb | |
| export const modelPrices = { | |
| "gpt-4": { | |
| max_tokens: 4096, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 3e-5, | |
| output_cost_per_token: 6e-5, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_prompt_caching: true, | |
| supports_system_messages: true, | |
| }, | |
| "gpt-4o": { | |
| max_tokens: 16384, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 16384, | |
| input_cost_per_token: 2.5e-6, | |
| output_cost_per_token: 1e-5, | |
| input_cost_per_token_batches: 1.25e-6, | |
| output_cost_per_token_batches: 5e-6, | |
| cache_read_input_token_cost: 1.25e-6, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_response_schema: true, | |
| supports_vision: true, | |
| supports_prompt_caching: true, | |
| supports_system_messages: true, | |
| }, | |
| "gpt-4o-audio-preview": { | |
| max_tokens: 16384, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 16384, | |
| input_cost_per_token: 2.5e-6, | |
| input_cost_per_audio_token: 0.0001, | |
| output_cost_per_token: 1e-5, | |
| output_cost_per_audio_token: 0.0002, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_audio_input: true, | |
| supports_audio_output: true, | |
| supports_system_messages: true, | |
| }, | |
| "gpt-4o-audio-preview-2024-10-01": { | |
| max_tokens: 16384, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 16384, | |
| input_cost_per_token: 2.5e-6, | |
| input_cost_per_audio_token: 0.0001, | |
| output_cost_per_token: 1e-5, | |
| output_cost_per_audio_token: 0.0002, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_audio_input: true, | |
| supports_audio_output: true, | |
| supports_system_messages: true, | |
| }, | |
| "gpt-4o-mini": { | |
| max_tokens: 16384, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 16384, | |
| input_cost_per_token: 1.5e-7, | |
| output_cost_per_token: 6e-7, | |
| input_cost_per_token_batches: 7.5e-8, | |
| output_cost_per_token_batches: 3e-7, | |
| cache_read_input_token_cost: 7.5e-8, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_response_schema: true, | |
| supports_vision: true, | |
| supports_prompt_caching: true, | |
| supports_system_messages: true, | |
| }, | |
| "gpt-4o-mini-2024-07-18": { | |
| max_tokens: 16384, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 16384, | |
| input_cost_per_token: 1.5e-7, | |
| output_cost_per_token: 6e-7, | |
| input_cost_per_token_batches: 7.5e-8, | |
| output_cost_per_token_batches: 3e-7, | |
| cache_read_input_token_cost: 7.5e-8, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_response_schema: true, | |
| supports_vision: true, | |
| supports_prompt_caching: true, | |
| supports_system_messages: true, | |
| }, | |
| "o1-mini": { | |
| max_tokens: 65536, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 65536, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 1.2e-5, | |
| cache_read_input_token_cost: 1.5e-6, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_vision: true, | |
| supports_prompt_caching: true, | |
| }, | |
| "o1-mini-2024-09-12": { | |
| max_tokens: 65536, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 65536, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 1.2e-5, | |
| cache_read_input_token_cost: 1.5e-6, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_vision: true, | |
| supports_prompt_caching: true, | |
| }, | |
| "o1-preview": { | |
| max_tokens: 32768, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 32768, | |
| input_cost_per_token: 1.5e-5, | |
| output_cost_per_token: 6e-5, | |
| cache_read_input_token_cost: 7.5e-6, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_vision: true, | |
| supports_prompt_caching: true, | |
| }, | |
| "o1-preview-2024-09-12": { | |
| max_tokens: 32768, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 32768, | |
| input_cost_per_token: 1.5e-5, | |
| output_cost_per_token: 6e-5, | |
| cache_read_input_token_cost: 7.5e-6, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_vision: true, | |
| supports_prompt_caching: true, | |
| }, | |
| "chatgpt-4o-latest": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 5e-6, | |
| output_cost_per_token: 1.5e-5, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_vision: true, | |
| supports_prompt_caching: true, | |
| supports_system_messages: true, | |
| }, | |
| "gpt-4o-2024-05-13": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 5e-6, | |
| output_cost_per_token: 1.5e-5, | |
| input_cost_per_token_batches: 2.5e-6, | |
| output_cost_per_token_batches: 7.5e-6, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_vision: true, | |
| supports_prompt_caching: true, | |
| supports_system_messages: true, | |
| }, | |
| "gpt-4o-2024-08-06": { | |
| max_tokens: 16384, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 16384, | |
| input_cost_per_token: 2.5e-6, | |
| output_cost_per_token: 1e-5, | |
| input_cost_per_token_batches: 1.25e-6, | |
| output_cost_per_token_batches: 5e-6, | |
| cache_read_input_token_cost: 1.25e-6, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_response_schema: true, | |
| supports_vision: true, | |
| supports_prompt_caching: true, | |
| supports_system_messages: true, | |
| }, | |
| "gpt-4-turbo-preview": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1e-5, | |
| output_cost_per_token: 3e-5, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_prompt_caching: true, | |
| supports_system_messages: true, | |
| }, | |
| "gpt-4-0314": { | |
| max_tokens: 4096, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 3e-5, | |
| output_cost_per_token: 6e-5, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_prompt_caching: true, | |
| supports_system_messages: true, | |
| }, | |
| "gpt-4-0613": { | |
| max_tokens: 4096, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 3e-5, | |
| output_cost_per_token: 6e-5, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_prompt_caching: true, | |
| supports_system_messages: true, | |
| }, | |
| "gpt-4-32k": { | |
| max_tokens: 4096, | |
| max_input_tokens: 32768, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 6e-5, | |
| output_cost_per_token: 0.00012, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_prompt_caching: true, | |
| supports_system_messages: true, | |
| }, | |
| "gpt-4-32k-0314": { | |
| max_tokens: 4096, | |
| max_input_tokens: 32768, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 6e-5, | |
| output_cost_per_token: 0.00012, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_prompt_caching: true, | |
| supports_system_messages: true, | |
| }, | |
| "gpt-4-32k-0613": { | |
| max_tokens: 4096, | |
| max_input_tokens: 32768, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 6e-5, | |
| output_cost_per_token: 0.00012, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_prompt_caching: true, | |
| supports_system_messages: true, | |
| }, | |
| "gpt-4-turbo": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1e-5, | |
| output_cost_per_token: 3e-5, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_vision: true, | |
| supports_prompt_caching: true, | |
| supports_system_messages: true, | |
| }, | |
| "gpt-4-turbo-2024-04-09": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1e-5, | |
| output_cost_per_token: 3e-5, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_vision: true, | |
| supports_prompt_caching: true, | |
| supports_system_messages: true, | |
| }, | |
| "gpt-4-1106-preview": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1e-5, | |
| output_cost_per_token: 3e-5, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_prompt_caching: true, | |
| supports_system_messages: true, | |
| }, | |
| "gpt-4-0125-preview": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1e-5, | |
| output_cost_per_token: 3e-5, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_prompt_caching: true, | |
| supports_system_messages: true, | |
| }, | |
| "gpt-4-vision-preview": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1e-5, | |
| output_cost_per_token: 3e-5, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_vision: true, | |
| supports_prompt_caching: true, | |
| supports_system_messages: true, | |
| }, | |
| "gpt-4-1106-vision-preview": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1e-5, | |
| output_cost_per_token: 3e-5, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_vision: true, | |
| supports_prompt_caching: true, | |
| supports_system_messages: true, | |
| }, | |
| "gpt-3.5-turbo": { | |
| max_tokens: 4097, | |
| max_input_tokens: 16385, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.5e-6, | |
| output_cost_per_token: 2e-6, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_prompt_caching: true, | |
| supports_system_messages: true, | |
| }, | |
| "gpt-3.5-turbo-0301": { | |
| max_tokens: 4097, | |
| max_input_tokens: 4097, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.5e-6, | |
| output_cost_per_token: 2e-6, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_prompt_caching: true, | |
| supports_system_messages: true, | |
| }, | |
| "gpt-3.5-turbo-0613": { | |
| max_tokens: 4097, | |
| max_input_tokens: 4097, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.5e-6, | |
| output_cost_per_token: 2e-6, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_prompt_caching: true, | |
| supports_system_messages: true, | |
| }, | |
| "gpt-3.5-turbo-1106": { | |
| max_tokens: 16385, | |
| max_input_tokens: 16385, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1e-6, | |
| output_cost_per_token: 2e-6, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_prompt_caching: true, | |
| supports_system_messages: true, | |
| }, | |
| "gpt-3.5-turbo-0125": { | |
| max_tokens: 16385, | |
| max_input_tokens: 16385, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 5e-7, | |
| output_cost_per_token: 1.5e-6, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_prompt_caching: true, | |
| supports_system_messages: true, | |
| }, | |
| "gpt-3.5-turbo-16k": { | |
| max_tokens: 16385, | |
| max_input_tokens: 16385, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 4e-6, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_prompt_caching: true, | |
| supports_system_messages: true, | |
| }, | |
| "gpt-3.5-turbo-16k-0613": { | |
| max_tokens: 16385, | |
| max_input_tokens: 16385, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 4e-6, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_prompt_caching: true, | |
| supports_system_messages: true, | |
| }, | |
| "ft:gpt-3.5-turbo": { | |
| max_tokens: 4096, | |
| max_input_tokens: 16385, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 6e-6, | |
| input_cost_per_token_batches: 1.5e-6, | |
| output_cost_per_token_batches: 3e-6, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_system_messages: true, | |
| }, | |
| "ft:gpt-3.5-turbo-0125": { | |
| max_tokens: 4096, | |
| max_input_tokens: 16385, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 6e-6, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_system_messages: true, | |
| }, | |
| "ft:gpt-3.5-turbo-1106": { | |
| max_tokens: 4096, | |
| max_input_tokens: 16385, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 6e-6, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_system_messages: true, | |
| }, | |
| "ft:gpt-3.5-turbo-0613": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 6e-6, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_system_messages: true, | |
| }, | |
| "ft:gpt-4-0613": { | |
| max_tokens: 4096, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 3e-5, | |
| output_cost_per_token: 6e-5, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| source: | |
| "OpenAI needs to add pricing for this ft model, will be updated when added by OpenAI. Defaulting to base model pricing", | |
| supports_system_messages: true, | |
| }, | |
| "ft:gpt-4o-2024-08-06": { | |
| max_tokens: 16384, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 16384, | |
| input_cost_per_token: 3.75e-6, | |
| output_cost_per_token: 1.5e-5, | |
| input_cost_per_token_batches: 1.875e-6, | |
| output_cost_per_token_batches: 7.5e-6, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_response_schema: true, | |
| supports_vision: true, | |
| supports_system_messages: true, | |
| }, | |
| "ft:gpt-4o-mini-2024-07-18": { | |
| max_tokens: 16384, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 16384, | |
| input_cost_per_token: 3e-7, | |
| output_cost_per_token: 1.2e-6, | |
| input_cost_per_token_batches: 1.5e-7, | |
| output_cost_per_token_batches: 6e-7, | |
| cache_read_input_token_cost: 1.5e-7, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_response_schema: true, | |
| supports_vision: true, | |
| supports_prompt_caching: true, | |
| supports_system_messages: true, | |
| }, | |
| "ft:davinci-002": { | |
| max_tokens: 16384, | |
| max_input_tokens: 16384, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 2e-6, | |
| output_cost_per_token: 2e-6, | |
| input_cost_per_token_batches: 1e-6, | |
| output_cost_per_token_batches: 1e-6, | |
| litellm_provider: "text-completion-openai", | |
| mode: "completion", | |
| }, | |
| "ft:babbage-002": { | |
| max_tokens: 16384, | |
| max_input_tokens: 16384, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 4e-7, | |
| output_cost_per_token: 4e-7, | |
| input_cost_per_token_batches: 2e-7, | |
| output_cost_per_token_batches: 2e-7, | |
| litellm_provider: "text-completion-openai", | |
| mode: "completion", | |
| }, | |
| "text-embedding-3-large": { | |
| max_tokens: 8191, | |
| max_input_tokens: 8191, | |
| output_vector_size: 3072, | |
| input_cost_per_token: 1.3e-7, | |
| output_cost_per_token: 0.0, | |
| input_cost_per_token_batches: 6.5e-8, | |
| output_cost_per_token_batches: 0.0, | |
| litellm_provider: "openai", | |
| mode: "embedding", | |
| }, | |
| "text-embedding-3-small": { | |
| max_tokens: 8191, | |
| max_input_tokens: 8191, | |
| output_vector_size: 1536, | |
| input_cost_per_token: 2e-8, | |
| output_cost_per_token: 0.0, | |
| input_cost_per_token_batches: 1e-8, | |
| output_cost_per_token_batches: 0.0, | |
| litellm_provider: "openai", | |
| mode: "embedding", | |
| }, | |
| "text-embedding-ada-002": { | |
| max_tokens: 8191, | |
| max_input_tokens: 8191, | |
| output_vector_size: 1536, | |
| input_cost_per_token: 1e-7, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "openai", | |
| mode: "embedding", | |
| }, | |
| "text-embedding-ada-002-v2": { | |
| max_tokens: 8191, | |
| max_input_tokens: 8191, | |
| input_cost_per_token: 1e-7, | |
| output_cost_per_token: 0.0, | |
| input_cost_per_token_batches: 5e-8, | |
| output_cost_per_token_batches: 0.0, | |
| litellm_provider: "openai", | |
| mode: "embedding", | |
| }, | |
| "text-moderation-stable": { | |
| max_tokens: 32768, | |
| max_input_tokens: 32768, | |
| max_output_tokens: 0, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "openai", | |
| mode: "moderations", | |
| }, | |
| "text-moderation-007": { | |
| max_tokens: 32768, | |
| max_input_tokens: 32768, | |
| max_output_tokens: 0, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "openai", | |
| mode: "moderations", | |
| }, | |
| "text-moderation-latest": { | |
| max_tokens: 32768, | |
| max_input_tokens: 32768, | |
| max_output_tokens: 0, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "openai", | |
| mode: "moderations", | |
| }, | |
| "256-x-256/dall-e-2": { | |
| mode: "image_generation", | |
| input_cost_per_pixel: 2.4414e-7, | |
| output_cost_per_pixel: 0.0, | |
| litellm_provider: "openai", | |
| }, | |
| "512-x-512/dall-e-2": { | |
| mode: "image_generation", | |
| input_cost_per_pixel: 6.86e-8, | |
| output_cost_per_pixel: 0.0, | |
| litellm_provider: "openai", | |
| }, | |
| "1024-x-1024/dall-e-2": { | |
| mode: "image_generation", | |
| input_cost_per_pixel: 1.9e-8, | |
| output_cost_per_pixel: 0.0, | |
| litellm_provider: "openai", | |
| }, | |
| "hd/1024-x-1792/dall-e-3": { | |
| mode: "image_generation", | |
| input_cost_per_pixel: 6.539e-8, | |
| output_cost_per_pixel: 0.0, | |
| litellm_provider: "openai", | |
| }, | |
| "hd/1792-x-1024/dall-e-3": { | |
| mode: "image_generation", | |
| input_cost_per_pixel: 6.539e-8, | |
| output_cost_per_pixel: 0.0, | |
| litellm_provider: "openai", | |
| }, | |
| "hd/1024-x-1024/dall-e-3": { | |
| mode: "image_generation", | |
| input_cost_per_pixel: 7.629e-8, | |
| output_cost_per_pixel: 0.0, | |
| litellm_provider: "openai", | |
| }, | |
| "standard/1024-x-1792/dall-e-3": { | |
| mode: "image_generation", | |
| input_cost_per_pixel: 4.359e-8, | |
| output_cost_per_pixel: 0.0, | |
| litellm_provider: "openai", | |
| }, | |
| "standard/1792-x-1024/dall-e-3": { | |
| mode: "image_generation", | |
| input_cost_per_pixel: 4.359e-8, | |
| output_cost_per_pixel: 0.0, | |
| litellm_provider: "openai", | |
| }, | |
| "standard/1024-x-1024/dall-e-3": { | |
| mode: "image_generation", | |
| input_cost_per_pixel: 3.81469e-8, | |
| output_cost_per_pixel: 0.0, | |
| litellm_provider: "openai", | |
| }, | |
| "whisper-1": { | |
| mode: "audio_transcription", | |
| input_cost_per_second: 0, | |
| output_cost_per_second: 0.0001, | |
| litellm_provider: "openai", | |
| }, | |
| "tts-1": { | |
| mode: "audio_speech", | |
| input_cost_per_character: 1.5e-5, | |
| litellm_provider: "openai", | |
| }, | |
| "tts-1-hd": { | |
| mode: "audio_speech", | |
| input_cost_per_character: 3e-5, | |
| litellm_provider: "openai", | |
| }, | |
| "azure/tts-1": { | |
| mode: "audio_speech", | |
| input_cost_per_character: 1.5e-5, | |
| litellm_provider: "azure", | |
| }, | |
| "azure/tts-1-hd": { | |
| mode: "audio_speech", | |
| input_cost_per_character: 3e-5, | |
| litellm_provider: "azure", | |
| }, | |
| "azure/whisper-1": { | |
| mode: "audio_transcription", | |
| input_cost_per_second: 0, | |
| output_cost_per_second: 0.0001, | |
| litellm_provider: "azure", | |
| }, | |
| "azure/o1-mini": { | |
| max_tokens: 65536, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 65536, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 1.2e-5, | |
| cache_read_input_token_cost: 1.5e-6, | |
| litellm_provider: "azure", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_vision: false, | |
| supports_prompt_caching: true, | |
| }, | |
| "azure/o1-mini-2024-09-12": { | |
| max_tokens: 65536, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 65536, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 1.2e-5, | |
| cache_read_input_token_cost: 1.5e-6, | |
| litellm_provider: "azure", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_vision: false, | |
| supports_prompt_caching: true, | |
| }, | |
| "azure/o1-preview": { | |
| max_tokens: 32768, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 32768, | |
| input_cost_per_token: 1.5e-5, | |
| output_cost_per_token: 6e-5, | |
| cache_read_input_token_cost: 7.5e-6, | |
| litellm_provider: "azure", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_vision: false, | |
| supports_prompt_caching: true, | |
| }, | |
| "azure/o1-preview-2024-09-12": { | |
| max_tokens: 32768, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 32768, | |
| input_cost_per_token: 1.5e-5, | |
| output_cost_per_token: 6e-5, | |
| cache_read_input_token_cost: 7.5e-6, | |
| litellm_provider: "azure", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_vision: false, | |
| supports_prompt_caching: true, | |
| }, | |
| "azure/gpt-4o": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 5e-6, | |
| output_cost_per_token: 1.5e-5, | |
| cache_read_input_token_cost: 1.25e-6, | |
| litellm_provider: "azure", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_vision: true, | |
| supports_prompt_caching: true, | |
| }, | |
| "azure/gpt-4o-2024-08-06": { | |
| max_tokens: 16384, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 16384, | |
| input_cost_per_token: 2.75e-6, | |
| output_cost_per_token: 1.1e-5, | |
| litellm_provider: "azure", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_response_schema: true, | |
| supports_vision: true, | |
| supports_prompt_caching: true, | |
| }, | |
| "azure/gpt-4o-2024-05-13": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 5e-6, | |
| output_cost_per_token: 1.5e-5, | |
| litellm_provider: "azure", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_vision: true, | |
| supports_prompt_caching: true, | |
| }, | |
| "azure/global-standard/gpt-4o-2024-08-06": { | |
| max_tokens: 16384, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 16384, | |
| input_cost_per_token: 2.5e-6, | |
| output_cost_per_token: 1e-5, | |
| litellm_provider: "azure", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_response_schema: true, | |
| supports_vision: true, | |
| supports_prompt_caching: true, | |
| }, | |
| "azure/global-standard/gpt-4o-mini": { | |
| max_tokens: 16384, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 16384, | |
| input_cost_per_token: 1.5e-7, | |
| output_cost_per_token: 6e-7, | |
| litellm_provider: "azure", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_response_schema: true, | |
| supports_vision: true, | |
| }, | |
| "azure/gpt-4o-mini": { | |
| max_tokens: 16384, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 16384, | |
| input_cost_per_token: 1.65e-7, | |
| output_cost_per_token: 6.6e-7, | |
| cache_read_input_token_cost: 7.5e-8, | |
| litellm_provider: "azure", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_response_schema: true, | |
| supports_vision: true, | |
| supports_prompt_caching: true, | |
| }, | |
| "azure/gpt-4-turbo-2024-04-09": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1e-5, | |
| output_cost_per_token: 3e-5, | |
| litellm_provider: "azure", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_vision: true, | |
| }, | |
| "azure/gpt-4-0125-preview": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1e-5, | |
| output_cost_per_token: 3e-5, | |
| litellm_provider: "azure", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| }, | |
| "azure/gpt-4-1106-preview": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1e-5, | |
| output_cost_per_token: 3e-5, | |
| litellm_provider: "azure", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| }, | |
| "azure/gpt-4-0613": { | |
| max_tokens: 4096, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 3e-5, | |
| output_cost_per_token: 6e-5, | |
| litellm_provider: "azure", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| }, | |
| "azure/gpt-4-32k-0613": { | |
| max_tokens: 4096, | |
| max_input_tokens: 32768, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 6e-5, | |
| output_cost_per_token: 0.00012, | |
| litellm_provider: "azure", | |
| mode: "chat", | |
| }, | |
| "azure/gpt-4-32k": { | |
| max_tokens: 4096, | |
| max_input_tokens: 32768, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 6e-5, | |
| output_cost_per_token: 0.00012, | |
| litellm_provider: "azure", | |
| mode: "chat", | |
| }, | |
| "azure/gpt-4": { | |
| max_tokens: 4096, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 3e-5, | |
| output_cost_per_token: 6e-5, | |
| litellm_provider: "azure", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| }, | |
| "azure/gpt-4-turbo": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1e-5, | |
| output_cost_per_token: 3e-5, | |
| litellm_provider: "azure", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| }, | |
| "azure/gpt-4-turbo-vision-preview": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1e-5, | |
| output_cost_per_token: 3e-5, | |
| litellm_provider: "azure", | |
| mode: "chat", | |
| supports_vision: true, | |
| }, | |
| "azure/gpt-35-turbo-16k-0613": { | |
| max_tokens: 4096, | |
| max_input_tokens: 16385, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 4e-6, | |
| litellm_provider: "azure", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| }, | |
| "azure/gpt-35-turbo-1106": { | |
| max_tokens: 4096, | |
| max_input_tokens: 16384, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1e-6, | |
| output_cost_per_token: 2e-6, | |
| litellm_provider: "azure", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| }, | |
| "azure/gpt-35-turbo-0613": { | |
| max_tokens: 4097, | |
| max_input_tokens: 4097, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.5e-6, | |
| output_cost_per_token: 2e-6, | |
| litellm_provider: "azure", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| }, | |
| "azure/gpt-35-turbo-0301": { | |
| max_tokens: 4097, | |
| max_input_tokens: 4097, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 2e-7, | |
| output_cost_per_token: 2e-6, | |
| litellm_provider: "azure", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| }, | |
| "azure/gpt-35-turbo-0125": { | |
| max_tokens: 4096, | |
| max_input_tokens: 16384, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 5e-7, | |
| output_cost_per_token: 1.5e-6, | |
| litellm_provider: "azure", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| }, | |
| "azure/gpt-35-turbo-16k": { | |
| max_tokens: 4096, | |
| max_input_tokens: 16385, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 4e-6, | |
| litellm_provider: "azure", | |
| mode: "chat", | |
| }, | |
| "azure/gpt-35-turbo": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4097, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 5e-7, | |
| output_cost_per_token: 1.5e-6, | |
| litellm_provider: "azure", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| }, | |
| "azure/gpt-3.5-turbo-instruct-0914": { | |
| max_tokens: 4097, | |
| max_input_tokens: 4097, | |
| input_cost_per_token: 1.5e-6, | |
| output_cost_per_token: 2e-6, | |
| litellm_provider: "azure_text", | |
| mode: "completion", | |
| }, | |
| "azure/gpt-35-turbo-instruct": { | |
| max_tokens: 4097, | |
| max_input_tokens: 4097, | |
| input_cost_per_token: 1.5e-6, | |
| output_cost_per_token: 2e-6, | |
| litellm_provider: "azure_text", | |
| mode: "completion", | |
| }, | |
| "azure/gpt-35-turbo-instruct-0914": { | |
| max_tokens: 4097, | |
| max_input_tokens: 4097, | |
| input_cost_per_token: 1.5e-6, | |
| output_cost_per_token: 2e-6, | |
| litellm_provider: "azure_text", | |
| mode: "completion", | |
| }, | |
| "azure/mistral-large-latest": { | |
| max_tokens: 32000, | |
| max_input_tokens: 32000, | |
| input_cost_per_token: 8e-6, | |
| output_cost_per_token: 2.4e-5, | |
| litellm_provider: "azure", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| }, | |
| "azure/mistral-large-2402": { | |
| max_tokens: 32000, | |
| max_input_tokens: 32000, | |
| input_cost_per_token: 8e-6, | |
| output_cost_per_token: 2.4e-5, | |
| litellm_provider: "azure", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| }, | |
| "azure/command-r-plus": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 1.5e-5, | |
| litellm_provider: "azure", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| }, | |
| "azure/ada": { | |
| max_tokens: 8191, | |
| max_input_tokens: 8191, | |
| input_cost_per_token: 1e-7, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "azure", | |
| mode: "embedding", | |
| }, | |
| "azure/text-embedding-ada-002": { | |
| max_tokens: 8191, | |
| max_input_tokens: 8191, | |
| input_cost_per_token: 1e-7, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "azure", | |
| mode: "embedding", | |
| }, | |
| "azure/text-embedding-3-large": { | |
| max_tokens: 8191, | |
| max_input_tokens: 8191, | |
| input_cost_per_token: 1.3e-7, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "azure", | |
| mode: "embedding", | |
| }, | |
| "azure/text-embedding-3-small": { | |
| max_tokens: 8191, | |
| max_input_tokens: 8191, | |
| input_cost_per_token: 2e-8, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "azure", | |
| mode: "embedding", | |
| }, | |
| "azure/standard/1024-x-1024/dall-e-3": { | |
| input_cost_per_pixel: 3.81469e-8, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "azure", | |
| mode: "image_generation", | |
| }, | |
| "azure/hd/1024-x-1024/dall-e-3": { | |
| input_cost_per_pixel: 7.629e-8, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "azure", | |
| mode: "image_generation", | |
| }, | |
| "azure/standard/1024-x-1792/dall-e-3": { | |
| input_cost_per_pixel: 4.359e-8, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "azure", | |
| mode: "image_generation", | |
| }, | |
| "azure/standard/1792-x-1024/dall-e-3": { | |
| input_cost_per_pixel: 4.359e-8, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "azure", | |
| mode: "image_generation", | |
| }, | |
| "azure/hd/1024-x-1792/dall-e-3": { | |
| input_cost_per_pixel: 6.539e-8, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "azure", | |
| mode: "image_generation", | |
| }, | |
| "azure/hd/1792-x-1024/dall-e-3": { | |
| input_cost_per_pixel: 6.539e-8, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "azure", | |
| mode: "image_generation", | |
| }, | |
| "azure/standard/1024-x-1024/dall-e-2": { | |
| input_cost_per_pixel: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "azure", | |
| mode: "image_generation", | |
| }, | |
| "azure_ai/jamba-instruct": { | |
| max_tokens: 4096, | |
| max_input_tokens: 70000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 5e-7, | |
| output_cost_per_token: 7e-7, | |
| litellm_provider: "azure_ai", | |
| mode: "chat", | |
| }, | |
| "azure_ai/mistral-large": { | |
| max_tokens: 8191, | |
| max_input_tokens: 32000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 4e-6, | |
| output_cost_per_token: 1.2e-5, | |
| litellm_provider: "azure_ai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| }, | |
| "azure_ai/mistral-small": { | |
| max_tokens: 8191, | |
| max_input_tokens: 32000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 1e-6, | |
| output_cost_per_token: 3e-6, | |
| litellm_provider: "azure_ai", | |
| supports_function_calling: true, | |
| mode: "chat", | |
| }, | |
| "azure_ai/Meta-Llama-3-70B-Instruct": { | |
| max_tokens: 2048, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 2048, | |
| input_cost_per_token: 1.1e-6, | |
| output_cost_per_token: 3.7e-7, | |
| litellm_provider: "azure_ai", | |
| mode: "chat", | |
| }, | |
| "azure_ai/Meta-Llama-3.1-8B-Instruct": { | |
| max_tokens: 2048, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 2048, | |
| input_cost_per_token: 3e-7, | |
| output_cost_per_token: 6.1e-7, | |
| litellm_provider: "azure_ai", | |
| mode: "chat", | |
| source: | |
| "https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-8b-instruct-offer?tab=PlansAndPrice", | |
| }, | |
| "azure_ai/Meta-Llama-3.1-70B-Instruct": { | |
| max_tokens: 2048, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 2048, | |
| input_cost_per_token: 2.68e-6, | |
| output_cost_per_token: 3.54e-6, | |
| litellm_provider: "azure_ai", | |
| mode: "chat", | |
| source: | |
| "https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-70b-instruct-offer?tab=PlansAndPrice", | |
| }, | |
| "azure_ai/Meta-Llama-3.1-405B-Instruct": { | |
| max_tokens: 2048, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 2048, | |
| input_cost_per_token: 5.33e-6, | |
| output_cost_per_token: 1.6e-5, | |
| litellm_provider: "azure_ai", | |
| mode: "chat", | |
| source: | |
| "https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-405b-instruct-offer?tab=PlansAndPrice", | |
| }, | |
| "azure_ai/cohere-rerank-v3-multilingual": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| max_query_tokens: 2048, | |
| input_cost_per_token: 0.0, | |
| input_cost_per_query: 0.002, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "azure_ai", | |
| mode: "rerank", | |
| }, | |
| "azure_ai/cohere-rerank-v3-english": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| max_query_tokens: 2048, | |
| input_cost_per_token: 0.0, | |
| input_cost_per_query: 0.002, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "azure_ai", | |
| mode: "rerank", | |
| }, | |
| "azure_ai/Cohere-embed-v3-english": { | |
| max_tokens: 512, | |
| max_input_tokens: 512, | |
| output_vector_size: 1024, | |
| input_cost_per_token: 1e-7, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "azure_ai", | |
| mode: "embedding", | |
| source: | |
| "https://azuremarketplace.microsoft.com/en-us/marketplace/apps/cohere.cohere-embed-v3-english-offer?tab=PlansAndPrice", | |
| }, | |
| "azure_ai/Cohere-embed-v3-multilingual": { | |
| max_tokens: 512, | |
| max_input_tokens: 512, | |
| output_vector_size: 1024, | |
| input_cost_per_token: 1e-7, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "azure_ai", | |
| mode: "embedding", | |
| source: | |
| "https://azuremarketplace.microsoft.com/en-us/marketplace/apps/cohere.cohere-embed-v3-english-offer?tab=PlansAndPrice", | |
| }, | |
| "babbage-002": { | |
| max_tokens: 16384, | |
| max_input_tokens: 16384, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 4e-7, | |
| output_cost_per_token: 4e-7, | |
| litellm_provider: "text-completion-openai", | |
| mode: "completion", | |
| }, | |
| "davinci-002": { | |
| max_tokens: 16384, | |
| max_input_tokens: 16384, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 2e-6, | |
| output_cost_per_token: 2e-6, | |
| litellm_provider: "text-completion-openai", | |
| mode: "completion", | |
| }, | |
| "gpt-3.5-turbo-instruct": { | |
| max_tokens: 4096, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.5e-6, | |
| output_cost_per_token: 2e-6, | |
| litellm_provider: "text-completion-openai", | |
| mode: "completion", | |
| }, | |
| "gpt-3.5-turbo-instruct-0914": { | |
| max_tokens: 4097, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 4097, | |
| input_cost_per_token: 1.5e-6, | |
| output_cost_per_token: 2e-6, | |
| litellm_provider: "text-completion-openai", | |
| mode: "completion", | |
| }, | |
| "claude-instant-1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 1.63e-6, | |
| output_cost_per_token: 5.51e-6, | |
| litellm_provider: "anthropic", | |
| mode: "chat", | |
| }, | |
| "mistral/mistral-tiny": { | |
| max_tokens: 8191, | |
| max_input_tokens: 32000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 2.5e-7, | |
| output_cost_per_token: 2.5e-7, | |
| litellm_provider: "mistral", | |
| mode: "chat", | |
| supports_assistant_prefill: true, | |
| }, | |
| "mistral/mistral-small": { | |
| max_tokens: 8191, | |
| max_input_tokens: 32000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 1e-6, | |
| output_cost_per_token: 3e-6, | |
| litellm_provider: "mistral", | |
| supports_function_calling: true, | |
| mode: "chat", | |
| supports_assistant_prefill: true, | |
| }, | |
| "mistral/mistral-small-latest": { | |
| max_tokens: 8191, | |
| max_input_tokens: 32000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 1e-6, | |
| output_cost_per_token: 3e-6, | |
| litellm_provider: "mistral", | |
| supports_function_calling: true, | |
| mode: "chat", | |
| supports_assistant_prefill: true, | |
| }, | |
| "mistral/mistral-medium": { | |
| max_tokens: 8191, | |
| max_input_tokens: 32000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 2.7e-6, | |
| output_cost_per_token: 8.1e-6, | |
| litellm_provider: "mistral", | |
| mode: "chat", | |
| supports_assistant_prefill: true, | |
| }, | |
| "mistral/mistral-medium-latest": { | |
| max_tokens: 8191, | |
| max_input_tokens: 32000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 2.7e-6, | |
| output_cost_per_token: 8.1e-6, | |
| litellm_provider: "mistral", | |
| mode: "chat", | |
| supports_assistant_prefill: true, | |
| }, | |
| "mistral/mistral-medium-2312": { | |
| max_tokens: 8191, | |
| max_input_tokens: 32000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 2.7e-6, | |
| output_cost_per_token: 8.1e-6, | |
| litellm_provider: "mistral", | |
| mode: "chat", | |
| supports_assistant_prefill: true, | |
| }, | |
| "mistral/mistral-large-latest": { | |
| max_tokens: 128000, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 128000, | |
| input_cost_per_token: 2e-6, | |
| output_cost_per_token: 6e-6, | |
| litellm_provider: "mistral", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_assistant_prefill: true, | |
| }, | |
| "mistral/mistral-large-2402": { | |
| max_tokens: 8191, | |
| max_input_tokens: 32000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 4e-6, | |
| output_cost_per_token: 1.2e-5, | |
| litellm_provider: "mistral", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_assistant_prefill: true, | |
| }, | |
| "mistral/mistral-large-2407": { | |
| max_tokens: 128000, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 128000, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 9e-6, | |
| litellm_provider: "mistral", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_assistant_prefill: true, | |
| }, | |
| "mistral/pixtral-12b-2409": { | |
| max_tokens: 128000, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 128000, | |
| input_cost_per_token: 1.5e-7, | |
| output_cost_per_token: 1.5e-7, | |
| litellm_provider: "mistral", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_assistant_prefill: true, | |
| supports_vision: true, | |
| }, | |
| "mistral/open-mistral-7b": { | |
| max_tokens: 8191, | |
| max_input_tokens: 32000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 2.5e-7, | |
| output_cost_per_token: 2.5e-7, | |
| litellm_provider: "mistral", | |
| mode: "chat", | |
| supports_assistant_prefill: true, | |
| }, | |
| "mistral/open-mixtral-8x7b": { | |
| max_tokens: 8191, | |
| max_input_tokens: 32000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 7e-7, | |
| output_cost_per_token: 7e-7, | |
| litellm_provider: "mistral", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_assistant_prefill: true, | |
| }, | |
| "mistral/open-mixtral-8x22b": { | |
| max_tokens: 8191, | |
| max_input_tokens: 64000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 2e-6, | |
| output_cost_per_token: 6e-6, | |
| litellm_provider: "mistral", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_assistant_prefill: true, | |
| }, | |
| "mistral/codestral-latest": { | |
| max_tokens: 8191, | |
| max_input_tokens: 32000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 1e-6, | |
| output_cost_per_token: 3e-6, | |
| litellm_provider: "mistral", | |
| mode: "chat", | |
| supports_assistant_prefill: true, | |
| }, | |
| "mistral/codestral-2405": { | |
| max_tokens: 8191, | |
| max_input_tokens: 32000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 1e-6, | |
| output_cost_per_token: 3e-6, | |
| litellm_provider: "mistral", | |
| mode: "chat", | |
| supports_assistant_prefill: true, | |
| }, | |
| "mistral/open-mistral-nemo": { | |
| max_tokens: 128000, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 128000, | |
| input_cost_per_token: 3e-7, | |
| output_cost_per_token: 3e-7, | |
| litellm_provider: "mistral", | |
| mode: "chat", | |
| source: "https://mistral.ai/technology/", | |
| supports_assistant_prefill: true, | |
| }, | |
| "mistral/open-mistral-nemo-2407": { | |
| max_tokens: 128000, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 128000, | |
| input_cost_per_token: 3e-7, | |
| output_cost_per_token: 3e-7, | |
| litellm_provider: "mistral", | |
| mode: "chat", | |
| source: "https://mistral.ai/technology/", | |
| supports_assistant_prefill: true, | |
| }, | |
| "mistral/open-codestral-mamba": { | |
| max_tokens: 256000, | |
| max_input_tokens: 256000, | |
| max_output_tokens: 256000, | |
| input_cost_per_token: 2.5e-7, | |
| output_cost_per_token: 2.5e-7, | |
| litellm_provider: "mistral", | |
| mode: "chat", | |
| source: "https://mistral.ai/technology/", | |
| supports_assistant_prefill: true, | |
| }, | |
| "mistral/codestral-mamba-latest": { | |
| max_tokens: 256000, | |
| max_input_tokens: 256000, | |
| max_output_tokens: 256000, | |
| input_cost_per_token: 2.5e-7, | |
| output_cost_per_token: 2.5e-7, | |
| litellm_provider: "mistral", | |
| mode: "chat", | |
| source: "https://mistral.ai/technology/", | |
| supports_assistant_prefill: true, | |
| }, | |
| "mistral/mistral-embed": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| input_cost_per_token: 1e-7, | |
| litellm_provider: "mistral", | |
| mode: "embedding", | |
| }, | |
| "deepseek-chat": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.4e-7, | |
| input_cost_per_token_cache_hit: 1.4e-8, | |
| output_cost_per_token: 2.8e-7, | |
| litellm_provider: "deepseek", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_assistant_prefill: true, | |
| supports_tool_choice: true, | |
| supports_prompt_caching: true, | |
| }, | |
| "codestral/codestral-latest": { | |
| max_tokens: 8191, | |
| max_input_tokens: 32000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "codestral", | |
| mode: "chat", | |
| source: "https://docs.mistral.ai/capabilities/code_generation/", | |
| supports_assistant_prefill: true, | |
| }, | |
| "codestral/codestral-2405": { | |
| max_tokens: 8191, | |
| max_input_tokens: 32000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "codestral", | |
| mode: "chat", | |
| source: "https://docs.mistral.ai/capabilities/code_generation/", | |
| supports_assistant_prefill: true, | |
| }, | |
| "text-completion-codestral/codestral-latest": { | |
| max_tokens: 8191, | |
| max_input_tokens: 32000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "text-completion-codestral", | |
| mode: "completion", | |
| source: "https://docs.mistral.ai/capabilities/code_generation/", | |
| }, | |
| "text-completion-codestral/codestral-2405": { | |
| max_tokens: 8191, | |
| max_input_tokens: 32000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "text-completion-codestral", | |
| mode: "completion", | |
| source: "https://docs.mistral.ai/capabilities/code_generation/", | |
| }, | |
| "deepseek-coder": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.4e-7, | |
| input_cost_per_token_cache_hit: 1.4e-8, | |
| output_cost_per_token: 2.8e-7, | |
| litellm_provider: "deepseek", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_assistant_prefill: true, | |
| supports_tool_choice: true, | |
| supports_prompt_caching: true, | |
| }, | |
| "groq/llama2-70b-4096": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 7e-7, | |
| output_cost_per_token: 8e-7, | |
| litellm_provider: "groq", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_response_schema: true, | |
| }, | |
| "groq/llama3-8b-8192": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 5e-8, | |
| output_cost_per_token: 8e-8, | |
| litellm_provider: "groq", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_response_schema: true, | |
| }, | |
| "groq/llama3-70b-8192": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 5.9e-7, | |
| output_cost_per_token: 7.9e-7, | |
| litellm_provider: "groq", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_response_schema: true, | |
| }, | |
| "groq/llama-3.1-8b-instant": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 5e-8, | |
| output_cost_per_token: 8e-8, | |
| litellm_provider: "groq", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_response_schema: true, | |
| }, | |
| "groq/llama-3.1-70b-versatile": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 5.9e-7, | |
| output_cost_per_token: 7.9e-7, | |
| litellm_provider: "groq", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_response_schema: true, | |
| }, | |
| "groq/llama-3.1-405b-reasoning": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 5.9e-7, | |
| output_cost_per_token: 7.9e-7, | |
| litellm_provider: "groq", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_response_schema: true, | |
| }, | |
| "groq/mixtral-8x7b-32768": { | |
| max_tokens: 32768, | |
| max_input_tokens: 32768, | |
| max_output_tokens: 32768, | |
| input_cost_per_token: 2.4e-7, | |
| output_cost_per_token: 2.4e-7, | |
| litellm_provider: "groq", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_response_schema: true, | |
| }, | |
| "groq/gemma-7b-it": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 7e-8, | |
| output_cost_per_token: 7e-8, | |
| litellm_provider: "groq", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_response_schema: true, | |
| }, | |
| "groq/gemma2-9b-it": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 2e-7, | |
| output_cost_per_token: 2e-7, | |
| litellm_provider: "groq", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_response_schema: true, | |
| }, | |
| "groq/llama3-groq-70b-8192-tool-use-preview": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 8.9e-7, | |
| output_cost_per_token: 8.9e-7, | |
| litellm_provider: "groq", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_response_schema: true, | |
| }, | |
| "groq/llama3-groq-8b-8192-tool-use-preview": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 1.9e-7, | |
| output_cost_per_token: 1.9e-7, | |
| litellm_provider: "groq", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_response_schema: true, | |
| }, | |
| "cerebras/llama3.1-8b": { | |
| max_tokens: 128000, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 128000, | |
| input_cost_per_token: 1e-7, | |
| output_cost_per_token: 1e-7, | |
| litellm_provider: "cerebras", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| }, | |
| "cerebras/llama3.1-70b": { | |
| max_tokens: 128000, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 128000, | |
| input_cost_per_token: 6e-7, | |
| output_cost_per_token: 6e-7, | |
| litellm_provider: "cerebras", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| }, | |
| "friendliai/mixtral-8x7b-instruct-v0-1": { | |
| max_tokens: 32768, | |
| max_input_tokens: 32768, | |
| max_output_tokens: 32768, | |
| input_cost_per_token: 4e-7, | |
| output_cost_per_token: 4e-7, | |
| litellm_provider: "friendliai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| }, | |
| "friendliai/meta-llama-3-8b-instruct": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 1e-7, | |
| output_cost_per_token: 1e-7, | |
| litellm_provider: "friendliai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| }, | |
| "friendliai/meta-llama-3-70b-instruct": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 8e-7, | |
| output_cost_per_token: 8e-7, | |
| litellm_provider: "friendliai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| }, | |
| "claude-instant-1.2": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 1.63e-7, | |
| output_cost_per_token: 5.51e-7, | |
| litellm_provider: "anthropic", | |
| mode: "chat", | |
| }, | |
| "claude-2": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 8e-6, | |
| output_cost_per_token: 2.4e-5, | |
| litellm_provider: "anthropic", | |
| mode: "chat", | |
| }, | |
| "claude-2.1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 8e-6, | |
| output_cost_per_token: 2.4e-5, | |
| litellm_provider: "anthropic", | |
| mode: "chat", | |
| }, | |
| "claude-3-haiku-20240307": { | |
| max_tokens: 4096, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 2.5e-7, | |
| output_cost_per_token: 1.25e-6, | |
| cache_creation_input_token_cost: 3e-7, | |
| cache_read_input_token_cost: 3e-8, | |
| litellm_provider: "anthropic", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| tool_use_system_prompt_tokens: 264, | |
| supports_assistant_prefill: true, | |
| supports_prompt_caching: true, | |
| supports_response_schema: true, | |
| }, | |
| "claude-3-haiku-latest": { | |
| max_tokens: 4096, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 2.5e-7, | |
| output_cost_per_token: 1.25e-6, | |
| cache_creation_input_token_cost: 3e-7, | |
| cache_read_input_token_cost: 3e-8, | |
| litellm_provider: "anthropic", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| tool_use_system_prompt_tokens: 264, | |
| supports_assistant_prefill: true, | |
| supports_prompt_caching: true, | |
| }, | |
| "claude-3-opus-20240229": { | |
| max_tokens: 4096, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.5e-5, | |
| output_cost_per_token: 7.5e-5, | |
| cache_creation_input_token_cost: 1.875e-5, | |
| cache_read_input_token_cost: 1.5e-6, | |
| litellm_provider: "anthropic", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| tool_use_system_prompt_tokens: 395, | |
| supports_assistant_prefill: true, | |
| supports_prompt_caching: true, | |
| supports_response_schema: true, | |
| }, | |
| "claude-3-opus-latest": { | |
| max_tokens: 4096, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.5e-5, | |
| output_cost_per_token: 7.5e-5, | |
| cache_creation_input_token_cost: 1.875e-5, | |
| cache_read_input_token_cost: 1.5e-6, | |
| litellm_provider: "anthropic", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| tool_use_system_prompt_tokens: 395, | |
| supports_assistant_prefill: true, | |
| supports_prompt_caching: true, | |
| }, | |
| "claude-3-sonnet-20240229": { | |
| max_tokens: 4096, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 1.5e-5, | |
| litellm_provider: "anthropic", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| tool_use_system_prompt_tokens: 159, | |
| supports_assistant_prefill: true, | |
| supports_prompt_caching: true, | |
| supports_response_schema: true, | |
| }, | |
| "claude-3-5-sonnet-20240620": { | |
| max_tokens: 8192, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 1.5e-5, | |
| cache_creation_input_token_cost: 3.75e-6, | |
| cache_read_input_token_cost: 3e-7, | |
| litellm_provider: "anthropic", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| tool_use_system_prompt_tokens: 159, | |
| supports_assistant_prefill: true, | |
| supports_prompt_caching: true, | |
| supports_response_schema: true, | |
| }, | |
| "claude-3-5-sonnet-20241022": { | |
| max_tokens: 8192, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 1.5e-5, | |
| cache_creation_input_token_cost: 3.75e-6, | |
| cache_read_input_token_cost: 3e-7, | |
| litellm_provider: "anthropic", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| tool_use_system_prompt_tokens: 159, | |
| supports_assistant_prefill: true, | |
| supports_pdf_input: true, | |
| supports_prompt_caching: true, | |
| supports_response_schema: true, | |
| }, | |
| "claude-3-5-sonnet-latest": { | |
| max_tokens: 8192, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 1.5e-5, | |
| cache_creation_input_token_cost: 3.75e-6, | |
| cache_read_input_token_cost: 3e-7, | |
| litellm_provider: "anthropic", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| tool_use_system_prompt_tokens: 159, | |
| supports_assistant_prefill: true, | |
| supports_prompt_caching: true, | |
| }, | |
| "text-bison": { | |
| max_tokens: 2048, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 2048, | |
| input_cost_per_character: 2.5e-7, | |
| output_cost_per_character: 5e-7, | |
| litellm_provider: "vertex_ai-text-models", | |
| mode: "completion", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "text-bison@001": { | |
| max_tokens: 1024, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 1024, | |
| input_cost_per_character: 2.5e-7, | |
| output_cost_per_character: 5e-7, | |
| litellm_provider: "vertex_ai-text-models", | |
| mode: "completion", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "text-bison@002": { | |
| max_tokens: 1024, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 1024, | |
| input_cost_per_character: 2.5e-7, | |
| output_cost_per_character: 5e-7, | |
| litellm_provider: "vertex_ai-text-models", | |
| mode: "completion", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "text-bison32k": { | |
| max_tokens: 1024, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 1024, | |
| input_cost_per_token: 1.25e-7, | |
| output_cost_per_token: 1.25e-7, | |
| input_cost_per_character: 2.5e-7, | |
| output_cost_per_character: 5e-7, | |
| litellm_provider: "vertex_ai-text-models", | |
| mode: "completion", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "text-bison32k@002": { | |
| max_tokens: 1024, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 1024, | |
| input_cost_per_token: 1.25e-7, | |
| output_cost_per_token: 1.25e-7, | |
| input_cost_per_character: 2.5e-7, | |
| output_cost_per_character: 5e-7, | |
| litellm_provider: "vertex_ai-text-models", | |
| mode: "completion", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "text-unicorn": { | |
| max_tokens: 1024, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 1024, | |
| input_cost_per_token: 1e-5, | |
| output_cost_per_token: 2.8e-5, | |
| litellm_provider: "vertex_ai-text-models", | |
| mode: "completion", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "text-unicorn@001": { | |
| max_tokens: 1024, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 1024, | |
| input_cost_per_token: 1e-5, | |
| output_cost_per_token: 2.8e-5, | |
| litellm_provider: "vertex_ai-text-models", | |
| mode: "completion", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "chat-bison": { | |
| max_tokens: 4096, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.25e-7, | |
| output_cost_per_token: 1.25e-7, | |
| input_cost_per_character: 2.5e-7, | |
| output_cost_per_character: 5e-7, | |
| litellm_provider: "vertex_ai-chat-models", | |
| mode: "chat", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "chat-bison@001": { | |
| max_tokens: 4096, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.25e-7, | |
| output_cost_per_token: 1.25e-7, | |
| input_cost_per_character: 2.5e-7, | |
| output_cost_per_character: 5e-7, | |
| litellm_provider: "vertex_ai-chat-models", | |
| mode: "chat", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "chat-bison@002": { | |
| max_tokens: 4096, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.25e-7, | |
| output_cost_per_token: 1.25e-7, | |
| input_cost_per_character: 2.5e-7, | |
| output_cost_per_character: 5e-7, | |
| litellm_provider: "vertex_ai-chat-models", | |
| mode: "chat", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "chat-bison-32k": { | |
| max_tokens: 8192, | |
| max_input_tokens: 32000, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 1.25e-7, | |
| output_cost_per_token: 1.25e-7, | |
| input_cost_per_character: 2.5e-7, | |
| output_cost_per_character: 5e-7, | |
| litellm_provider: "vertex_ai-chat-models", | |
| mode: "chat", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "chat-bison-32k@002": { | |
| max_tokens: 8192, | |
| max_input_tokens: 32000, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 1.25e-7, | |
| output_cost_per_token: 1.25e-7, | |
| input_cost_per_character: 2.5e-7, | |
| output_cost_per_character: 5e-7, | |
| litellm_provider: "vertex_ai-chat-models", | |
| mode: "chat", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "code-bison": { | |
| max_tokens: 1024, | |
| max_input_tokens: 6144, | |
| max_output_tokens: 1024, | |
| input_cost_per_token: 1.25e-7, | |
| output_cost_per_token: 1.25e-7, | |
| input_cost_per_character: 2.5e-7, | |
| output_cost_per_character: 5e-7, | |
| litellm_provider: "vertex_ai-code-text-models", | |
| mode: "chat", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "code-bison@001": { | |
| max_tokens: 1024, | |
| max_input_tokens: 6144, | |
| max_output_tokens: 1024, | |
| input_cost_per_token: 1.25e-7, | |
| output_cost_per_token: 1.25e-7, | |
| input_cost_per_character: 2.5e-7, | |
| output_cost_per_character: 5e-7, | |
| litellm_provider: "vertex_ai-code-text-models", | |
| mode: "completion", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "code-bison@002": { | |
| max_tokens: 1024, | |
| max_input_tokens: 6144, | |
| max_output_tokens: 1024, | |
| input_cost_per_token: 1.25e-7, | |
| output_cost_per_token: 1.25e-7, | |
| input_cost_per_character: 2.5e-7, | |
| output_cost_per_character: 5e-7, | |
| litellm_provider: "vertex_ai-code-text-models", | |
| mode: "completion", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "code-bison32k": { | |
| max_tokens: 1024, | |
| max_input_tokens: 6144, | |
| max_output_tokens: 1024, | |
| input_cost_per_token: 1.25e-7, | |
| output_cost_per_token: 1.25e-7, | |
| input_cost_per_character: 2.5e-7, | |
| output_cost_per_character: 5e-7, | |
| litellm_provider: "vertex_ai-code-text-models", | |
| mode: "completion", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "code-bison-32k@002": { | |
| max_tokens: 1024, | |
| max_input_tokens: 6144, | |
| max_output_tokens: 1024, | |
| input_cost_per_token: 1.25e-7, | |
| output_cost_per_token: 1.25e-7, | |
| input_cost_per_character: 2.5e-7, | |
| output_cost_per_character: 5e-7, | |
| litellm_provider: "vertex_ai-code-text-models", | |
| mode: "completion", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "code-gecko@001": { | |
| max_tokens: 64, | |
| max_input_tokens: 2048, | |
| max_output_tokens: 64, | |
| input_cost_per_token: 1.25e-7, | |
| output_cost_per_token: 1.25e-7, | |
| litellm_provider: "vertex_ai-code-text-models", | |
| mode: "completion", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "code-gecko@002": { | |
| max_tokens: 64, | |
| max_input_tokens: 2048, | |
| max_output_tokens: 64, | |
| input_cost_per_token: 1.25e-7, | |
| output_cost_per_token: 1.25e-7, | |
| litellm_provider: "vertex_ai-code-text-models", | |
| mode: "completion", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "code-gecko": { | |
| max_tokens: 64, | |
| max_input_tokens: 2048, | |
| max_output_tokens: 64, | |
| input_cost_per_token: 1.25e-7, | |
| output_cost_per_token: 1.25e-7, | |
| litellm_provider: "vertex_ai-code-text-models", | |
| mode: "completion", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "code-gecko-latest": { | |
| max_tokens: 64, | |
| max_input_tokens: 2048, | |
| max_output_tokens: 64, | |
| input_cost_per_token: 1.25e-7, | |
| output_cost_per_token: 1.25e-7, | |
| litellm_provider: "vertex_ai-code-text-models", | |
| mode: "completion", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "codechat-bison@latest": { | |
| max_tokens: 1024, | |
| max_input_tokens: 6144, | |
| max_output_tokens: 1024, | |
| input_cost_per_token: 1.25e-7, | |
| output_cost_per_token: 1.25e-7, | |
| input_cost_per_character: 2.5e-7, | |
| output_cost_per_character: 5e-7, | |
| litellm_provider: "vertex_ai-code-chat-models", | |
| mode: "chat", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "codechat-bison": { | |
| max_tokens: 1024, | |
| max_input_tokens: 6144, | |
| max_output_tokens: 1024, | |
| input_cost_per_token: 1.25e-7, | |
| output_cost_per_token: 1.25e-7, | |
| input_cost_per_character: 2.5e-7, | |
| output_cost_per_character: 5e-7, | |
| litellm_provider: "vertex_ai-code-chat-models", | |
| mode: "chat", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "codechat-bison@001": { | |
| max_tokens: 1024, | |
| max_input_tokens: 6144, | |
| max_output_tokens: 1024, | |
| input_cost_per_token: 1.25e-7, | |
| output_cost_per_token: 1.25e-7, | |
| input_cost_per_character: 2.5e-7, | |
| output_cost_per_character: 5e-7, | |
| litellm_provider: "vertex_ai-code-chat-models", | |
| mode: "chat", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "codechat-bison@002": { | |
| max_tokens: 1024, | |
| max_input_tokens: 6144, | |
| max_output_tokens: 1024, | |
| input_cost_per_token: 1.25e-7, | |
| output_cost_per_token: 1.25e-7, | |
| input_cost_per_character: 2.5e-7, | |
| output_cost_per_character: 5e-7, | |
| litellm_provider: "vertex_ai-code-chat-models", | |
| mode: "chat", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "codechat-bison-32k": { | |
| max_tokens: 8192, | |
| max_input_tokens: 32000, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 1.25e-7, | |
| output_cost_per_token: 1.25e-7, | |
| input_cost_per_character: 2.5e-7, | |
| output_cost_per_character: 5e-7, | |
| litellm_provider: "vertex_ai-code-chat-models", | |
| mode: "chat", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "codechat-bison-32k@002": { | |
| max_tokens: 8192, | |
| max_input_tokens: 32000, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 1.25e-7, | |
| output_cost_per_token: 1.25e-7, | |
| input_cost_per_character: 2.5e-7, | |
| output_cost_per_character: 5e-7, | |
| litellm_provider: "vertex_ai-code-chat-models", | |
| mode: "chat", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "gemini-pro": { | |
| max_tokens: 8192, | |
| max_input_tokens: 32760, | |
| max_output_tokens: 8192, | |
| input_cost_per_image: 0.0025, | |
| input_cost_per_video_per_second: 0.002, | |
| input_cost_per_token: 5e-7, | |
| input_cost_per_character: 1.25e-7, | |
| output_cost_per_token: 1.5e-6, | |
| output_cost_per_character: 3.75e-7, | |
| litellm_provider: "vertex_ai-language-models", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| source: "https://cloud.google.com/vertex-ai/generative-ai/pricing", | |
| }, | |
| "gemini-1.0-pro": { | |
| max_tokens: 8192, | |
| max_input_tokens: 32760, | |
| max_output_tokens: 8192, | |
| input_cost_per_image: 0.0025, | |
| input_cost_per_video_per_second: 0.002, | |
| input_cost_per_token: 5e-7, | |
| input_cost_per_character: 1.25e-7, | |
| output_cost_per_token: 1.5e-6, | |
| output_cost_per_character: 3.75e-7, | |
| litellm_provider: "vertex_ai-language-models", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models", | |
| }, | |
| "gemini-1.0-pro-001": { | |
| max_tokens: 8192, | |
| max_input_tokens: 32760, | |
| max_output_tokens: 8192, | |
| input_cost_per_image: 0.0025, | |
| input_cost_per_video_per_second: 0.002, | |
| input_cost_per_token: 5e-7, | |
| input_cost_per_character: 1.25e-7, | |
| output_cost_per_token: 1.5e-6, | |
| output_cost_per_character: 3.75e-7, | |
| litellm_provider: "vertex_ai-language-models", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "gemini-1.0-ultra": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 2048, | |
| input_cost_per_image: 0.0025, | |
| input_cost_per_video_per_second: 0.002, | |
| input_cost_per_token: 5e-7, | |
| input_cost_per_character: 1.25e-7, | |
| output_cost_per_token: 1.5e-6, | |
| output_cost_per_character: 3.75e-7, | |
| litellm_provider: "vertex_ai-language-models", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| source: | |
| "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro pricing. Got max_tokens info here: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "gemini-1.0-ultra-001": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 2048, | |
| input_cost_per_image: 0.0025, | |
| input_cost_per_video_per_second: 0.002, | |
| input_cost_per_token: 5e-7, | |
| input_cost_per_character: 1.25e-7, | |
| output_cost_per_token: 1.5e-6, | |
| output_cost_per_character: 3.75e-7, | |
| litellm_provider: "vertex_ai-language-models", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| source: | |
| "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro pricing. Got max_tokens info here: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "gemini-1.0-pro-002": { | |
| max_tokens: 8192, | |
| max_input_tokens: 32760, | |
| max_output_tokens: 8192, | |
| input_cost_per_image: 0.0025, | |
| input_cost_per_video_per_second: 0.002, | |
| input_cost_per_token: 5e-7, | |
| input_cost_per_character: 1.25e-7, | |
| output_cost_per_token: 1.5e-6, | |
| output_cost_per_character: 3.75e-7, | |
| litellm_provider: "vertex_ai-language-models", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "gemini-1.5-pro": { | |
| max_tokens: 8192, | |
| max_input_tokens: 2097152, | |
| max_output_tokens: 8192, | |
| input_cost_per_image: 0.00032875, | |
| input_cost_per_audio_per_second: 3.125e-5, | |
| input_cost_per_video_per_second: 0.00032875, | |
| input_cost_per_token: 1.25e-6, | |
| input_cost_per_character: 3.125e-7, | |
| input_cost_per_image_above_128k_tokens: 0.0006575, | |
| input_cost_per_video_per_second_above_128k_tokens: 0.0006575, | |
| input_cost_per_audio_per_second_above_128k_tokens: 6.25e-5, | |
| input_cost_per_token_above_128k_tokens: 2.5e-6, | |
| input_cost_per_character_above_128k_tokens: 6.25e-7, | |
| output_cost_per_token: 5e-6, | |
| output_cost_per_character: 1.25e-6, | |
| output_cost_per_token_above_128k_tokens: 1e-5, | |
| output_cost_per_character_above_128k_tokens: 2.5e-6, | |
| litellm_provider: "vertex_ai-language-models", | |
| mode: "chat", | |
| supports_vision: true, | |
| supports_pdf_input: true, | |
| supports_system_messages: true, | |
| supports_function_calling: true, | |
| supports_tool_choice: true, | |
| supports_response_schema: true, | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "gemini-1.5-pro-002": { | |
| max_tokens: 8192, | |
| max_input_tokens: 2097152, | |
| max_output_tokens: 8192, | |
| input_cost_per_image: 0.00032875, | |
| input_cost_per_audio_per_second: 3.125e-5, | |
| input_cost_per_video_per_second: 0.00032875, | |
| input_cost_per_token: 1.25e-6, | |
| input_cost_per_character: 3.125e-7, | |
| input_cost_per_image_above_128k_tokens: 0.0006575, | |
| input_cost_per_video_per_second_above_128k_tokens: 0.0006575, | |
| input_cost_per_audio_per_second_above_128k_tokens: 6.25e-5, | |
| input_cost_per_token_above_128k_tokens: 2.5e-6, | |
| input_cost_per_character_above_128k_tokens: 6.25e-7, | |
| output_cost_per_token: 5e-6, | |
| output_cost_per_character: 1.25e-6, | |
| output_cost_per_token_above_128k_tokens: 1e-5, | |
| output_cost_per_character_above_128k_tokens: 2.5e-6, | |
| litellm_provider: "vertex_ai-language-models", | |
| mode: "chat", | |
| supports_vision: true, | |
| supports_system_messages: true, | |
| supports_function_calling: true, | |
| supports_tool_choice: true, | |
| supports_response_schema: true, | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-1.5-pro", | |
| }, | |
| "gemini-1.5-pro-001": { | |
| max_tokens: 8192, | |
| max_input_tokens: 1000000, | |
| max_output_tokens: 8192, | |
| input_cost_per_image: 0.00032875, | |
| input_cost_per_audio_per_second: 3.125e-5, | |
| input_cost_per_video_per_second: 0.00032875, | |
| input_cost_per_token: 1.25e-6, | |
| input_cost_per_character: 3.125e-7, | |
| input_cost_per_image_above_128k_tokens: 0.0006575, | |
| input_cost_per_video_per_second_above_128k_tokens: 0.0006575, | |
| input_cost_per_audio_per_second_above_128k_tokens: 6.25e-5, | |
| input_cost_per_token_above_128k_tokens: 2.5e-6, | |
| input_cost_per_character_above_128k_tokens: 6.25e-7, | |
| output_cost_per_token: 5e-6, | |
| output_cost_per_character: 1.25e-6, | |
| output_cost_per_token_above_128k_tokens: 1e-5, | |
| output_cost_per_character_above_128k_tokens: 2.5e-6, | |
| litellm_provider: "vertex_ai-language-models", | |
| mode: "chat", | |
| supports_vision: true, | |
| supports_system_messages: true, | |
| supports_function_calling: true, | |
| supports_tool_choice: true, | |
| supports_response_schema: true, | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "gemini-1.5-pro-preview-0514": { | |
| max_tokens: 8192, | |
| max_input_tokens: 1000000, | |
| max_output_tokens: 8192, | |
| input_cost_per_image: 0.00032875, | |
| input_cost_per_audio_per_second: 3.125e-5, | |
| input_cost_per_video_per_second: 0.00032875, | |
| input_cost_per_token: 7.8125e-8, | |
| input_cost_per_character: 3.125e-7, | |
| input_cost_per_image_above_128k_tokens: 0.0006575, | |
| input_cost_per_video_per_second_above_128k_tokens: 0.0006575, | |
| input_cost_per_audio_per_second_above_128k_tokens: 6.25e-5, | |
| input_cost_per_token_above_128k_tokens: 1.5625e-7, | |
| input_cost_per_character_above_128k_tokens: 6.25e-7, | |
| output_cost_per_token: 3.125e-7, | |
| output_cost_per_character: 1.25e-6, | |
| output_cost_per_token_above_128k_tokens: 6.25e-7, | |
| output_cost_per_character_above_128k_tokens: 2.5e-6, | |
| litellm_provider: "vertex_ai-language-models", | |
| mode: "chat", | |
| supports_system_messages: true, | |
| supports_function_calling: true, | |
| supports_tool_choice: true, | |
| supports_response_schema: true, | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "gemini-1.5-pro-preview-0215": { | |
| max_tokens: 8192, | |
| max_input_tokens: 1000000, | |
| max_output_tokens: 8192, | |
| input_cost_per_image: 0.00032875, | |
| input_cost_per_audio_per_second: 3.125e-5, | |
| input_cost_per_video_per_second: 0.00032875, | |
| input_cost_per_token: 7.8125e-8, | |
| input_cost_per_character: 3.125e-7, | |
| input_cost_per_image_above_128k_tokens: 0.0006575, | |
| input_cost_per_video_per_second_above_128k_tokens: 0.0006575, | |
| input_cost_per_audio_per_second_above_128k_tokens: 6.25e-5, | |
| input_cost_per_token_above_128k_tokens: 1.5625e-7, | |
| input_cost_per_character_above_128k_tokens: 6.25e-7, | |
| output_cost_per_token: 3.125e-7, | |
| output_cost_per_character: 1.25e-6, | |
| output_cost_per_token_above_128k_tokens: 6.25e-7, | |
| output_cost_per_character_above_128k_tokens: 2.5e-6, | |
| litellm_provider: "vertex_ai-language-models", | |
| mode: "chat", | |
| supports_system_messages: true, | |
| supports_function_calling: true, | |
| supports_tool_choice: true, | |
| supports_response_schema: true, | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "gemini-1.5-pro-preview-0409": { | |
| max_tokens: 8192, | |
| max_input_tokens: 1000000, | |
| max_output_tokens: 8192, | |
| input_cost_per_image: 0.00032875, | |
| input_cost_per_audio_per_second: 3.125e-5, | |
| input_cost_per_video_per_second: 0.00032875, | |
| input_cost_per_token: 7.8125e-8, | |
| input_cost_per_character: 3.125e-7, | |
| input_cost_per_image_above_128k_tokens: 0.0006575, | |
| input_cost_per_video_per_second_above_128k_tokens: 0.0006575, | |
| input_cost_per_audio_per_second_above_128k_tokens: 6.25e-5, | |
| input_cost_per_token_above_128k_tokens: 1.5625e-7, | |
| input_cost_per_character_above_128k_tokens: 6.25e-7, | |
| output_cost_per_token: 3.125e-7, | |
| output_cost_per_character: 1.25e-6, | |
| output_cost_per_token_above_128k_tokens: 6.25e-7, | |
| output_cost_per_character_above_128k_tokens: 2.5e-6, | |
| litellm_provider: "vertex_ai-language-models", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_tool_choice: true, | |
| supports_response_schema: true, | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "gemini-1.5-flash": { | |
| max_tokens: 8192, | |
| max_input_tokens: 1000000, | |
| max_output_tokens: 8192, | |
| max_images_per_prompt: 3000, | |
| max_videos_per_prompt: 10, | |
| max_video_length: 1, | |
| max_audio_length_hours: 8.4, | |
| max_audio_per_prompt: 1, | |
| max_pdf_size_mb: 30, | |
| input_cost_per_image: 2e-5, | |
| input_cost_per_video_per_second: 2e-5, | |
| input_cost_per_audio_per_second: 2e-6, | |
| input_cost_per_token: 7.5e-8, | |
| input_cost_per_character: 1.875e-8, | |
| input_cost_per_token_above_128k_tokens: 1e-6, | |
| input_cost_per_character_above_128k_tokens: 2.5e-7, | |
| input_cost_per_image_above_128k_tokens: 4e-5, | |
| input_cost_per_video_per_second_above_128k_tokens: 4e-5, | |
| input_cost_per_audio_per_second_above_128k_tokens: 4e-6, | |
| output_cost_per_token: 3e-7, | |
| output_cost_per_character: 7.5e-8, | |
| output_cost_per_token_above_128k_tokens: 6e-7, | |
| output_cost_per_character_above_128k_tokens: 1.5e-7, | |
| litellm_provider: "vertex_ai-language-models", | |
| mode: "chat", | |
| supports_system_messages: true, | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_response_schema: true, | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "gemini-1.5-flash-exp-0827": { | |
| max_tokens: 8192, | |
| max_input_tokens: 1000000, | |
| max_output_tokens: 8192, | |
| max_images_per_prompt: 3000, | |
| max_videos_per_prompt: 10, | |
| max_video_length: 1, | |
| max_audio_length_hours: 8.4, | |
| max_audio_per_prompt: 1, | |
| max_pdf_size_mb: 30, | |
| input_cost_per_image: 2e-5, | |
| input_cost_per_video_per_second: 2e-5, | |
| input_cost_per_audio_per_second: 2e-6, | |
| input_cost_per_token: 4.688e-9, | |
| input_cost_per_character: 1.875e-8, | |
| input_cost_per_token_above_128k_tokens: 1e-6, | |
| input_cost_per_character_above_128k_tokens: 2.5e-7, | |
| input_cost_per_image_above_128k_tokens: 4e-5, | |
| input_cost_per_video_per_second_above_128k_tokens: 4e-5, | |
| input_cost_per_audio_per_second_above_128k_tokens: 4e-6, | |
| output_cost_per_token: 4.6875e-9, | |
| output_cost_per_character: 1.875e-8, | |
| output_cost_per_token_above_128k_tokens: 9.375e-9, | |
| output_cost_per_character_above_128k_tokens: 3.75e-8, | |
| litellm_provider: "vertex_ai-language-models", | |
| mode: "chat", | |
| supports_system_messages: true, | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_response_schema: true, | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "gemini-1.5-flash-002": { | |
| max_tokens: 8192, | |
| max_input_tokens: 1048576, | |
| max_output_tokens: 8192, | |
| max_images_per_prompt: 3000, | |
| max_videos_per_prompt: 10, | |
| max_video_length: 1, | |
| max_audio_length_hours: 8.4, | |
| max_audio_per_prompt: 1, | |
| max_pdf_size_mb: 30, | |
| input_cost_per_image: 2e-5, | |
| input_cost_per_video_per_second: 2e-5, | |
| input_cost_per_audio_per_second: 2e-6, | |
| input_cost_per_token: 7.5e-8, | |
| input_cost_per_character: 1.875e-8, | |
| input_cost_per_token_above_128k_tokens: 1e-6, | |
| input_cost_per_character_above_128k_tokens: 2.5e-7, | |
| input_cost_per_image_above_128k_tokens: 4e-5, | |
| input_cost_per_video_per_second_above_128k_tokens: 4e-5, | |
| input_cost_per_audio_per_second_above_128k_tokens: 4e-6, | |
| output_cost_per_token: 3e-7, | |
| output_cost_per_character: 7.5e-8, | |
| output_cost_per_token_above_128k_tokens: 6e-7, | |
| output_cost_per_character_above_128k_tokens: 1.5e-7, | |
| litellm_provider: "vertex_ai-language-models", | |
| mode: "chat", | |
| supports_system_messages: true, | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_response_schema: true, | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-1.5-flash", | |
| }, | |
| "gemini-1.5-flash-001": { | |
| max_tokens: 8192, | |
| max_input_tokens: 1000000, | |
| max_output_tokens: 8192, | |
| max_images_per_prompt: 3000, | |
| max_videos_per_prompt: 10, | |
| max_video_length: 1, | |
| max_audio_length_hours: 8.4, | |
| max_audio_per_prompt: 1, | |
| max_pdf_size_mb: 30, | |
| input_cost_per_image: 2e-5, | |
| input_cost_per_video_per_second: 2e-5, | |
| input_cost_per_audio_per_second: 2e-6, | |
| input_cost_per_token: 7.5e-8, | |
| input_cost_per_character: 1.875e-8, | |
| input_cost_per_token_above_128k_tokens: 1e-6, | |
| input_cost_per_character_above_128k_tokens: 2.5e-7, | |
| input_cost_per_image_above_128k_tokens: 4e-5, | |
| input_cost_per_video_per_second_above_128k_tokens: 4e-5, | |
| input_cost_per_audio_per_second_above_128k_tokens: 4e-6, | |
| output_cost_per_token: 3e-7, | |
| output_cost_per_character: 7.5e-8, | |
| output_cost_per_token_above_128k_tokens: 6e-7, | |
| output_cost_per_character_above_128k_tokens: 1.5e-7, | |
| litellm_provider: "vertex_ai-language-models", | |
| mode: "chat", | |
| supports_system_messages: true, | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_response_schema: true, | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "gemini-1.5-flash-preview-0514": { | |
| max_tokens: 8192, | |
| max_input_tokens: 1000000, | |
| max_output_tokens: 8192, | |
| max_images_per_prompt: 3000, | |
| max_videos_per_prompt: 10, | |
| max_video_length: 1, | |
| max_audio_length_hours: 8.4, | |
| max_audio_per_prompt: 1, | |
| max_pdf_size_mb: 30, | |
| input_cost_per_image: 2e-5, | |
| input_cost_per_video_per_second: 2e-5, | |
| input_cost_per_audio_per_second: 2e-6, | |
| input_cost_per_token: 7.5e-8, | |
| input_cost_per_character: 1.875e-8, | |
| input_cost_per_token_above_128k_tokens: 1e-6, | |
| input_cost_per_character_above_128k_tokens: 2.5e-7, | |
| input_cost_per_image_above_128k_tokens: 4e-5, | |
| input_cost_per_video_per_second_above_128k_tokens: 4e-5, | |
| input_cost_per_audio_per_second_above_128k_tokens: 4e-6, | |
| output_cost_per_token: 4.6875e-9, | |
| output_cost_per_character: 1.875e-8, | |
| output_cost_per_token_above_128k_tokens: 9.375e-9, | |
| output_cost_per_character_above_128k_tokens: 3.75e-8, | |
| litellm_provider: "vertex_ai-language-models", | |
| mode: "chat", | |
| supports_system_messages: true, | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "gemini-pro-experimental": { | |
| max_tokens: 8192, | |
| max_input_tokens: 1000000, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 0, | |
| output_cost_per_token: 0, | |
| input_cost_per_character: 0, | |
| output_cost_per_character: 0, | |
| litellm_provider: "vertex_ai-language-models", | |
| mode: "chat", | |
| supports_function_calling: false, | |
| supports_tool_choice: true, | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/gemini-experimental", | |
| }, | |
| "gemini-flash-experimental": { | |
| max_tokens: 8192, | |
| max_input_tokens: 1000000, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 0, | |
| output_cost_per_token: 0, | |
| input_cost_per_character: 0, | |
| output_cost_per_character: 0, | |
| litellm_provider: "vertex_ai-language-models", | |
| mode: "chat", | |
| supports_function_calling: false, | |
| supports_tool_choice: true, | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/gemini-experimental", | |
| }, | |
| "gemini-pro-vision": { | |
| max_tokens: 2048, | |
| max_input_tokens: 16384, | |
| max_output_tokens: 2048, | |
| max_images_per_prompt: 16, | |
| max_videos_per_prompt: 1, | |
| max_video_length: 2, | |
| input_cost_per_token: 2.5e-7, | |
| output_cost_per_token: 5e-7, | |
| litellm_provider: "vertex_ai-vision-models", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "gemini-1.0-pro-vision": { | |
| max_tokens: 2048, | |
| max_input_tokens: 16384, | |
| max_output_tokens: 2048, | |
| max_images_per_prompt: 16, | |
| max_videos_per_prompt: 1, | |
| max_video_length: 2, | |
| input_cost_per_token: 2.5e-7, | |
| output_cost_per_token: 5e-7, | |
| litellm_provider: "vertex_ai-vision-models", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "gemini-1.0-pro-vision-001": { | |
| max_tokens: 2048, | |
| max_input_tokens: 16384, | |
| max_output_tokens: 2048, | |
| max_images_per_prompt: 16, | |
| max_videos_per_prompt: 1, | |
| max_video_length: 2, | |
| input_cost_per_token: 2.5e-7, | |
| output_cost_per_token: 5e-7, | |
| litellm_provider: "vertex_ai-vision-models", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "medlm-medium": { | |
| max_tokens: 8192, | |
| max_input_tokens: 32768, | |
| max_output_tokens: 8192, | |
| input_cost_per_character: 5e-7, | |
| output_cost_per_character: 1e-6, | |
| litellm_provider: "vertex_ai-language-models", | |
| mode: "chat", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "medlm-large": { | |
| max_tokens: 1024, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 1024, | |
| input_cost_per_character: 5e-6, | |
| output_cost_per_character: 1.5e-5, | |
| litellm_provider: "vertex_ai-language-models", | |
| mode: "chat", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "vertex_ai/claude-3-sonnet@20240229": { | |
| max_tokens: 4096, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 1.5e-5, | |
| litellm_provider: "vertex_ai-anthropic_models", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_assistant_prefill: true, | |
| }, | |
| "vertex_ai/claude-3-5-sonnet@20240620": { | |
| max_tokens: 8192, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 1.5e-5, | |
| litellm_provider: "vertex_ai-anthropic_models", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_assistant_prefill: true, | |
| }, | |
| "vertex_ai/claude-3-5-sonnet-v2@20241022": { | |
| max_tokens: 8192, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 1.5e-5, | |
| litellm_provider: "vertex_ai-anthropic_models", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_assistant_prefill: true, | |
| }, | |
| "vertex_ai/claude-3-haiku@20240307": { | |
| max_tokens: 4096, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 2.5e-7, | |
| output_cost_per_token: 1.25e-6, | |
| litellm_provider: "vertex_ai-anthropic_models", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_assistant_prefill: true, | |
| }, | |
| "vertex_ai/claude-3-opus@20240229": { | |
| max_tokens: 4096, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.5e-5, | |
| output_cost_per_token: 7.5e-5, | |
| litellm_provider: "vertex_ai-anthropic_models", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_assistant_prefill: true, | |
| }, | |
| "vertex_ai/meta/llama3-405b-instruct-maas": { | |
| max_tokens: 32000, | |
| max_input_tokens: 32000, | |
| max_output_tokens: 32000, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "vertex_ai-llama_models", | |
| mode: "chat", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", | |
| }, | |
| "vertex_ai/meta/llama3-70b-instruct-maas": { | |
| max_tokens: 32000, | |
| max_input_tokens: 32000, | |
| max_output_tokens: 32000, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "vertex_ai-llama_models", | |
| mode: "chat", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", | |
| }, | |
| "vertex_ai/meta/llama3-8b-instruct-maas": { | |
| max_tokens: 32000, | |
| max_input_tokens: 32000, | |
| max_output_tokens: 32000, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "vertex_ai-llama_models", | |
| mode: "chat", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", | |
| }, | |
| "vertex_ai/meta/llama-3.2-90b-vision-instruct-maas": { | |
| max_tokens: 128000, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 2048, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "vertex_ai-llama_models", | |
| mode: "chat", | |
| supports_system_messages: true, | |
| supports_vision: true, | |
| source: | |
| "https://console.cloud.google.com/vertex-ai/publishers/meta/model-garden/llama-3.2-90b-vision-instruct-maas", | |
| }, | |
| "vertex_ai/mistral-large@latest": { | |
| max_tokens: 8191, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 2e-6, | |
| output_cost_per_token: 6e-6, | |
| litellm_provider: "vertex_ai-mistral_models", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| }, | |
| "vertex_ai/mistral-large@2407": { | |
| max_tokens: 8191, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 2e-6, | |
| output_cost_per_token: 6e-6, | |
| litellm_provider: "vertex_ai-mistral_models", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| }, | |
| "vertex_ai/mistral-nemo@latest": { | |
| max_tokens: 128000, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 128000, | |
| input_cost_per_token: 1.5e-7, | |
| output_cost_per_token: 1.5e-7, | |
| litellm_provider: "vertex_ai-mistral_models", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| }, | |
| "vertex_ai/jamba-1.5-mini@001": { | |
| max_tokens: 256000, | |
| max_input_tokens: 256000, | |
| max_output_tokens: 256000, | |
| input_cost_per_token: 2e-7, | |
| output_cost_per_token: 4e-7, | |
| litellm_provider: "vertex_ai-ai21_models", | |
| mode: "chat", | |
| }, | |
| "vertex_ai/jamba-1.5-large@001": { | |
| max_tokens: 256000, | |
| max_input_tokens: 256000, | |
| max_output_tokens: 256000, | |
| input_cost_per_token: 2e-6, | |
| output_cost_per_token: 8e-6, | |
| litellm_provider: "vertex_ai-ai21_models", | |
| mode: "chat", | |
| }, | |
| "vertex_ai/jamba-1.5": { | |
| max_tokens: 256000, | |
| max_input_tokens: 256000, | |
| max_output_tokens: 256000, | |
| input_cost_per_token: 2e-7, | |
| output_cost_per_token: 4e-7, | |
| litellm_provider: "vertex_ai-ai21_models", | |
| mode: "chat", | |
| }, | |
| "vertex_ai/jamba-1.5-mini": { | |
| max_tokens: 256000, | |
| max_input_tokens: 256000, | |
| max_output_tokens: 256000, | |
| input_cost_per_token: 2e-7, | |
| output_cost_per_token: 4e-7, | |
| litellm_provider: "vertex_ai-ai21_models", | |
| mode: "chat", | |
| }, | |
| "vertex_ai/jamba-1.5-large": { | |
| max_tokens: 256000, | |
| max_input_tokens: 256000, | |
| max_output_tokens: 256000, | |
| input_cost_per_token: 2e-6, | |
| output_cost_per_token: 8e-6, | |
| litellm_provider: "vertex_ai-ai21_models", | |
| mode: "chat", | |
| }, | |
| "vertex_ai/mistral-nemo@2407": { | |
| max_tokens: 128000, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 128000, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 3e-6, | |
| litellm_provider: "vertex_ai-mistral_models", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| }, | |
| "vertex_ai/codestral@latest": { | |
| max_tokens: 128000, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 128000, | |
| input_cost_per_token: 2e-7, | |
| output_cost_per_token: 6e-7, | |
| litellm_provider: "vertex_ai-mistral_models", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| }, | |
| "vertex_ai/codestral@2405": { | |
| max_tokens: 128000, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 128000, | |
| input_cost_per_token: 2e-7, | |
| output_cost_per_token: 6e-7, | |
| litellm_provider: "vertex_ai-mistral_models", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| }, | |
| "vertex_ai/imagegeneration@006": { | |
| output_cost_per_image: 0.02, | |
| litellm_provider: "vertex_ai-image-models", | |
| mode: "image_generation", | |
| source: "https://cloud.google.com/vertex-ai/generative-ai/pricing", | |
| }, | |
| "vertex_ai/imagen-3.0-generate-001": { | |
| output_cost_per_image: 0.04, | |
| litellm_provider: "vertex_ai-image-models", | |
| mode: "image_generation", | |
| source: "https://cloud.google.com/vertex-ai/generative-ai/pricing", | |
| }, | |
| "vertex_ai/imagen-3.0-fast-generate-001": { | |
| output_cost_per_image: 0.02, | |
| litellm_provider: "vertex_ai-image-models", | |
| mode: "image_generation", | |
| source: "https://cloud.google.com/vertex-ai/generative-ai/pricing", | |
| }, | |
| "text-embedding-004": { | |
| max_tokens: 2048, | |
| max_input_tokens: 2048, | |
| output_vector_size: 768, | |
| input_cost_per_character: 2.5e-8, | |
| input_cost_per_token: 1e-7, | |
| output_cost_per_token: 0, | |
| litellm_provider: "vertex_ai-embedding-models", | |
| mode: "embedding", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models", | |
| }, | |
| "text-multilingual-embedding-002": { | |
| max_tokens: 2048, | |
| max_input_tokens: 2048, | |
| output_vector_size: 768, | |
| input_cost_per_character: 2.5e-8, | |
| input_cost_per_token: 1e-7, | |
| output_cost_per_token: 0, | |
| litellm_provider: "vertex_ai-embedding-models", | |
| mode: "embedding", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models", | |
| }, | |
| "textembedding-gecko": { | |
| max_tokens: 3072, | |
| max_input_tokens: 3072, | |
| output_vector_size: 768, | |
| input_cost_per_character: 2.5e-8, | |
| input_cost_per_token: 1e-7, | |
| output_cost_per_token: 0, | |
| litellm_provider: "vertex_ai-embedding-models", | |
| mode: "embedding", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "textembedding-gecko-multilingual": { | |
| max_tokens: 3072, | |
| max_input_tokens: 3072, | |
| output_vector_size: 768, | |
| input_cost_per_character: 2.5e-8, | |
| input_cost_per_token: 1e-7, | |
| output_cost_per_token: 0, | |
| litellm_provider: "vertex_ai-embedding-models", | |
| mode: "embedding", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "textembedding-gecko-multilingual@001": { | |
| max_tokens: 3072, | |
| max_input_tokens: 3072, | |
| output_vector_size: 768, | |
| input_cost_per_character: 2.5e-8, | |
| input_cost_per_token: 1e-7, | |
| output_cost_per_token: 0, | |
| litellm_provider: "vertex_ai-embedding-models", | |
| mode: "embedding", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "textembedding-gecko@001": { | |
| max_tokens: 3072, | |
| max_input_tokens: 3072, | |
| output_vector_size: 768, | |
| input_cost_per_character: 2.5e-8, | |
| input_cost_per_token: 1e-7, | |
| output_cost_per_token: 0, | |
| litellm_provider: "vertex_ai-embedding-models", | |
| mode: "embedding", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "textembedding-gecko@003": { | |
| max_tokens: 3072, | |
| max_input_tokens: 3072, | |
| output_vector_size: 768, | |
| input_cost_per_character: 2.5e-8, | |
| input_cost_per_token: 1e-7, | |
| output_cost_per_token: 0, | |
| litellm_provider: "vertex_ai-embedding-models", | |
| mode: "embedding", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "text-embedding-preview-0409": { | |
| max_tokens: 3072, | |
| max_input_tokens: 3072, | |
| output_vector_size: 768, | |
| input_cost_per_token: 6.25e-9, | |
| input_cost_per_token_batch_requests: 5e-9, | |
| output_cost_per_token: 0, | |
| litellm_provider: "vertex_ai-embedding-models", | |
| mode: "embedding", | |
| source: "https://cloud.google.com/vertex-ai/generative-ai/pricing", | |
| }, | |
| "text-multilingual-embedding-preview-0409": { | |
| max_tokens: 3072, | |
| max_input_tokens: 3072, | |
| output_vector_size: 768, | |
| input_cost_per_token: 6.25e-9, | |
| output_cost_per_token: 0, | |
| litellm_provider: "vertex_ai-embedding-models", | |
| mode: "embedding", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "palm/chat-bison": { | |
| max_tokens: 4096, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.25e-7, | |
| output_cost_per_token: 1.25e-7, | |
| litellm_provider: "palm", | |
| mode: "chat", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "palm/chat-bison-001": { | |
| max_tokens: 4096, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.25e-7, | |
| output_cost_per_token: 1.25e-7, | |
| litellm_provider: "palm", | |
| mode: "chat", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "palm/text-bison": { | |
| max_tokens: 1024, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 1024, | |
| input_cost_per_token: 1.25e-7, | |
| output_cost_per_token: 1.25e-7, | |
| litellm_provider: "palm", | |
| mode: "completion", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "palm/text-bison-001": { | |
| max_tokens: 1024, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 1024, | |
| input_cost_per_token: 1.25e-7, | |
| output_cost_per_token: 1.25e-7, | |
| litellm_provider: "palm", | |
| mode: "completion", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "palm/text-bison-safety-off": { | |
| max_tokens: 1024, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 1024, | |
| input_cost_per_token: 1.25e-7, | |
| output_cost_per_token: 1.25e-7, | |
| litellm_provider: "palm", | |
| mode: "completion", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "palm/text-bison-safety-recitation-off": { | |
| max_tokens: 1024, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 1024, | |
| input_cost_per_token: 1.25e-7, | |
| output_cost_per_token: 1.25e-7, | |
| litellm_provider: "palm", | |
| mode: "completion", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "gemini/gemini-1.5-flash-002": { | |
| max_tokens: 8192, | |
| max_input_tokens: 1048576, | |
| max_output_tokens: 8192, | |
| max_images_per_prompt: 3000, | |
| max_videos_per_prompt: 10, | |
| max_video_length: 1, | |
| max_audio_length_hours: 8.4, | |
| max_audio_per_prompt: 1, | |
| max_pdf_size_mb: 30, | |
| cache_read_input_token_cost: 1.875e-8, | |
| cache_creation_input_token_cost: 1e-6, | |
| input_cost_per_token: 7.5e-8, | |
| input_cost_per_token_above_128k_tokens: 1.5e-7, | |
| output_cost_per_token: 3e-7, | |
| output_cost_per_token_above_128k_tokens: 6e-7, | |
| litellm_provider: "gemini", | |
| mode: "chat", | |
| supports_system_messages: true, | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_response_schema: true, | |
| supports_prompt_caching: true, | |
| tpm: 4000000, | |
| rpm: 2000, | |
| source: "https://ai.google.dev/pricing", | |
| }, | |
| "gemini/gemini-1.5-flash-001": { | |
| max_tokens: 8192, | |
| max_input_tokens: 1048576, | |
| max_output_tokens: 8192, | |
| max_images_per_prompt: 3000, | |
| max_videos_per_prompt: 10, | |
| max_video_length: 1, | |
| max_audio_length_hours: 8.4, | |
| max_audio_per_prompt: 1, | |
| max_pdf_size_mb: 30, | |
| cache_read_input_token_cost: 1.875e-8, | |
| cache_creation_input_token_cost: 1e-6, | |
| input_cost_per_token: 7.5e-8, | |
| input_cost_per_token_above_128k_tokens: 1.5e-7, | |
| output_cost_per_token: 3e-7, | |
| output_cost_per_token_above_128k_tokens: 6e-7, | |
| litellm_provider: "gemini", | |
| mode: "chat", | |
| supports_system_messages: true, | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_response_schema: true, | |
| supports_prompt_caching: true, | |
| tpm: 4000000, | |
| rpm: 2000, | |
| source: "https://ai.google.dev/pricing", | |
| }, | |
| "gemini/gemini-1.5-flash": { | |
| max_tokens: 8192, | |
| max_input_tokens: 1048576, | |
| max_output_tokens: 8192, | |
| max_images_per_prompt: 3000, | |
| max_videos_per_prompt: 10, | |
| max_video_length: 1, | |
| max_audio_length_hours: 8.4, | |
| max_audio_per_prompt: 1, | |
| max_pdf_size_mb: 30, | |
| input_cost_per_token: 7.5e-8, | |
| input_cost_per_token_above_128k_tokens: 1.5e-7, | |
| output_cost_per_token: 3e-7, | |
| output_cost_per_token_above_128k_tokens: 6e-7, | |
| litellm_provider: "gemini", | |
| mode: "chat", | |
| supports_system_messages: true, | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_response_schema: true, | |
| tpm: 4000000, | |
| rpm: 2000, | |
| source: "https://ai.google.dev/pricing", | |
| }, | |
| "gemini/gemini-1.5-flash-latest": { | |
| max_tokens: 8192, | |
| max_input_tokens: 1048576, | |
| max_output_tokens: 8192, | |
| max_images_per_prompt: 3000, | |
| max_videos_per_prompt: 10, | |
| max_video_length: 1, | |
| max_audio_length_hours: 8.4, | |
| max_audio_per_prompt: 1, | |
| max_pdf_size_mb: 30, | |
| input_cost_per_token: 7.5e-8, | |
| input_cost_per_token_above_128k_tokens: 1.5e-7, | |
| output_cost_per_token: 3e-7, | |
| output_cost_per_token_above_128k_tokens: 6e-7, | |
| litellm_provider: "gemini", | |
| mode: "chat", | |
| supports_system_messages: true, | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_response_schema: true, | |
| supports_prompt_caching: true, | |
| tpm: 4000000, | |
| rpm: 2000, | |
| source: "https://ai.google.dev/pricing", | |
| }, | |
| "gemini/gemini-1.5-flash-8b-exp-0924": { | |
| max_tokens: 8192, | |
| max_input_tokens: 1048576, | |
| max_output_tokens: 8192, | |
| max_images_per_prompt: 3000, | |
| max_videos_per_prompt: 10, | |
| max_video_length: 1, | |
| max_audio_length_hours: 8.4, | |
| max_audio_per_prompt: 1, | |
| max_pdf_size_mb: 30, | |
| input_cost_per_token: 0, | |
| input_cost_per_token_above_128k_tokens: 0, | |
| output_cost_per_token: 0, | |
| output_cost_per_token_above_128k_tokens: 0, | |
| litellm_provider: "gemini", | |
| mode: "chat", | |
| supports_system_messages: true, | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_response_schema: true, | |
| supports_prompt_caching: true, | |
| tpm: 4000000, | |
| rpm: 4000, | |
| source: "https://ai.google.dev/pricing", | |
| }, | |
| "gemini/gemini-1.5-flash-exp-0827": { | |
| max_tokens: 8192, | |
| max_input_tokens: 1048576, | |
| max_output_tokens: 8192, | |
| max_images_per_prompt: 3000, | |
| max_videos_per_prompt: 10, | |
| max_video_length: 1, | |
| max_audio_length_hours: 8.4, | |
| max_audio_per_prompt: 1, | |
| max_pdf_size_mb: 30, | |
| input_cost_per_token: 0, | |
| input_cost_per_token_above_128k_tokens: 0, | |
| output_cost_per_token: 0, | |
| output_cost_per_token_above_128k_tokens: 0, | |
| litellm_provider: "gemini", | |
| mode: "chat", | |
| supports_system_messages: true, | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_response_schema: true, | |
| tpm: 4000000, | |
| rpm: 2000, | |
| source: "https://ai.google.dev/pricing", | |
| }, | |
| "gemini/gemini-1.5-flash-8b-exp-0827": { | |
| max_tokens: 8192, | |
| max_input_tokens: 1000000, | |
| max_output_tokens: 8192, | |
| max_images_per_prompt: 3000, | |
| max_videos_per_prompt: 10, | |
| max_video_length: 1, | |
| max_audio_length_hours: 8.4, | |
| max_audio_per_prompt: 1, | |
| max_pdf_size_mb: 30, | |
| input_cost_per_token: 0, | |
| input_cost_per_token_above_128k_tokens: 0, | |
| output_cost_per_token: 0, | |
| output_cost_per_token_above_128k_tokens: 0, | |
| litellm_provider: "gemini", | |
| mode: "chat", | |
| supports_system_messages: true, | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_response_schema: true, | |
| tpm: 4000000, | |
| rpm: 4000, | |
| source: "https://ai.google.dev/pricing", | |
| }, | |
| "gemini/gemini-pro": { | |
| max_tokens: 8192, | |
| max_input_tokens: 32760, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 3.5e-7, | |
| input_cost_per_token_above_128k_tokens: 7e-7, | |
| output_cost_per_token: 1.05e-6, | |
| output_cost_per_token_above_128k_tokens: 2.1e-6, | |
| litellm_provider: "gemini", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| rpd: 30000, | |
| tpm: 120000, | |
| rpm: 360, | |
| source: "https://ai.google.dev/gemini-api/docs/models/gemini", | |
| }, | |
| "gemini/gemini-1.5-pro": { | |
| max_tokens: 8192, | |
| max_input_tokens: 2097152, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 3.5e-6, | |
| input_cost_per_token_above_128k_tokens: 7e-6, | |
| output_cost_per_token: 1.05e-5, | |
| output_cost_per_token_above_128k_tokens: 2.1e-5, | |
| litellm_provider: "gemini", | |
| mode: "chat", | |
| supports_system_messages: true, | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_tool_choice: true, | |
| supports_response_schema: true, | |
| tpm: 4000000, | |
| rpm: 1000, | |
| source: "https://ai.google.dev/pricing", | |
| }, | |
| "gemini/gemini-1.5-pro-002": { | |
| max_tokens: 8192, | |
| max_input_tokens: 2097152, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 3.5e-6, | |
| input_cost_per_token_above_128k_tokens: 7e-6, | |
| output_cost_per_token: 1.05e-5, | |
| output_cost_per_token_above_128k_tokens: 2.1e-5, | |
| litellm_provider: "gemini", | |
| mode: "chat", | |
| supports_system_messages: true, | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_tool_choice: true, | |
| supports_response_schema: true, | |
| supports_prompt_caching: true, | |
| tpm: 4000000, | |
| rpm: 1000, | |
| source: "https://ai.google.dev/pricing", | |
| }, | |
| "gemini/gemini-1.5-pro-001": { | |
| max_tokens: 8192, | |
| max_input_tokens: 2097152, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 3.5e-6, | |
| input_cost_per_token_above_128k_tokens: 7e-6, | |
| output_cost_per_token: 1.05e-5, | |
| output_cost_per_token_above_128k_tokens: 2.1e-5, | |
| litellm_provider: "gemini", | |
| mode: "chat", | |
| supports_system_messages: true, | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_tool_choice: true, | |
| supports_response_schema: true, | |
| supports_prompt_caching: true, | |
| tpm: 4000000, | |
| rpm: 1000, | |
| source: "https://ai.google.dev/pricing", | |
| }, | |
| "gemini/gemini-1.5-pro-exp-0801": { | |
| max_tokens: 8192, | |
| max_input_tokens: 2097152, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 3.5e-6, | |
| input_cost_per_token_above_128k_tokens: 7e-6, | |
| output_cost_per_token: 1.05e-5, | |
| output_cost_per_token_above_128k_tokens: 2.1e-5, | |
| litellm_provider: "gemini", | |
| mode: "chat", | |
| supports_system_messages: true, | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_tool_choice: true, | |
| supports_response_schema: true, | |
| tpm: 4000000, | |
| rpm: 1000, | |
| source: "https://ai.google.dev/pricing", | |
| }, | |
| "gemini/gemini-1.5-pro-exp-0827": { | |
| max_tokens: 8192, | |
| max_input_tokens: 2097152, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 0, | |
| input_cost_per_token_above_128k_tokens: 0, | |
| output_cost_per_token: 0, | |
| output_cost_per_token_above_128k_tokens: 0, | |
| litellm_provider: "gemini", | |
| mode: "chat", | |
| supports_system_messages: true, | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_tool_choice: true, | |
| supports_response_schema: true, | |
| tpm: 4000000, | |
| rpm: 1000, | |
| source: "https://ai.google.dev/pricing", | |
| }, | |
| "gemini/gemini-1.5-pro-latest": { | |
| max_tokens: 8192, | |
| max_input_tokens: 1048576, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 3.5e-6, | |
| input_cost_per_token_above_128k_tokens: 7e-6, | |
| output_cost_per_token: 1.05e-6, | |
| output_cost_per_token_above_128k_tokens: 2.1e-5, | |
| litellm_provider: "gemini", | |
| mode: "chat", | |
| supports_system_messages: true, | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_tool_choice: true, | |
| supports_response_schema: true, | |
| tpm: 4000000, | |
| rpm: 1000, | |
| source: "https://ai.google.dev/pricing", | |
| }, | |
| "gemini/gemini-pro-vision": { | |
| max_tokens: 2048, | |
| max_input_tokens: 30720, | |
| max_output_tokens: 2048, | |
| input_cost_per_token: 3.5e-7, | |
| input_cost_per_token_above_128k_tokens: 7e-7, | |
| output_cost_per_token: 1.05e-6, | |
| output_cost_per_token_above_128k_tokens: 2.1e-6, | |
| litellm_provider: "gemini", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| rpd: 30000, | |
| tpm: 120000, | |
| rpm: 360, | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "gemini/gemini-gemma-2-27b-it": { | |
| max_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 3.5e-7, | |
| output_cost_per_token: 1.05e-6, | |
| litellm_provider: "gemini", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "gemini/gemini-gemma-2-9b-it": { | |
| max_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 3.5e-7, | |
| output_cost_per_token: 1.05e-6, | |
| litellm_provider: "gemini", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", | |
| }, | |
| "command-r": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.5e-7, | |
| output_cost_per_token: 6e-7, | |
| litellm_provider: "cohere_chat", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| }, | |
| "command-r-08-2024": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.5e-7, | |
| output_cost_per_token: 6e-7, | |
| litellm_provider: "cohere_chat", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| }, | |
| "command-light": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 3e-7, | |
| output_cost_per_token: 6e-7, | |
| litellm_provider: "cohere_chat", | |
| mode: "chat", | |
| }, | |
| "command-r-plus": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 2.5e-6, | |
| output_cost_per_token: 1e-5, | |
| litellm_provider: "cohere_chat", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| }, | |
| "command-r-plus-08-2024": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 2.5e-6, | |
| output_cost_per_token: 1e-5, | |
| litellm_provider: "cohere_chat", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| }, | |
| "command-nightly": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1e-6, | |
| output_cost_per_token: 2e-6, | |
| litellm_provider: "cohere", | |
| mode: "completion", | |
| }, | |
| command: { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1e-6, | |
| output_cost_per_token: 2e-6, | |
| litellm_provider: "cohere", | |
| mode: "completion", | |
| }, | |
| "rerank-english-v3.0": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| max_query_tokens: 2048, | |
| input_cost_per_token: 0.0, | |
| input_cost_per_query: 0.002, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "cohere", | |
| mode: "rerank", | |
| }, | |
| "rerank-multilingual-v3.0": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| max_query_tokens: 2048, | |
| input_cost_per_token: 0.0, | |
| input_cost_per_query: 0.002, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "cohere", | |
| mode: "rerank", | |
| }, | |
| "rerank-english-v2.0": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| max_query_tokens: 2048, | |
| input_cost_per_token: 0.0, | |
| input_cost_per_query: 0.002, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "cohere", | |
| mode: "rerank", | |
| }, | |
| "rerank-multilingual-v2.0": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| max_query_tokens: 2048, | |
| input_cost_per_token: 0.0, | |
| input_cost_per_query: 0.002, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "cohere", | |
| mode: "rerank", | |
| }, | |
| "embed-english-v3.0": { | |
| max_tokens: 1024, | |
| max_input_tokens: 1024, | |
| input_cost_per_token: 1e-7, | |
| input_cost_per_image: 0.0001, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "cohere", | |
| mode: "embedding", | |
| supports_image_input: true, | |
| supports_embedding_image_input: true, | |
| metadata: { | |
| notes: | |
| "'supports_image_input' is a deprecated field. Use 'supports_embedding_image_input' instead.", | |
| }, | |
| }, | |
| "embed-english-light-v3.0": { | |
| max_tokens: 1024, | |
| max_input_tokens: 1024, | |
| input_cost_per_token: 1e-7, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "cohere", | |
| mode: "embedding", | |
| }, | |
| "embed-multilingual-v3.0": { | |
| max_tokens: 1024, | |
| max_input_tokens: 1024, | |
| input_cost_per_token: 1e-7, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "cohere", | |
| mode: "embedding", | |
| }, | |
| "embed-english-v2.0": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| input_cost_per_token: 1e-7, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "cohere", | |
| mode: "embedding", | |
| }, | |
| "embed-english-light-v2.0": { | |
| max_tokens: 1024, | |
| max_input_tokens: 1024, | |
| input_cost_per_token: 1e-7, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "cohere", | |
| mode: "embedding", | |
| }, | |
| "embed-multilingual-v2.0": { | |
| max_tokens: 768, | |
| max_input_tokens: 768, | |
| input_cost_per_token: 1e-7, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "cohere", | |
| mode: "embedding", | |
| }, | |
| "replicate/meta/llama-2-13b": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1e-7, | |
| output_cost_per_token: 5e-7, | |
| litellm_provider: "replicate", | |
| mode: "chat", | |
| }, | |
| "replicate/meta/llama-2-13b-chat": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1e-7, | |
| output_cost_per_token: 5e-7, | |
| litellm_provider: "replicate", | |
| mode: "chat", | |
| }, | |
| "replicate/meta/llama-2-70b": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 6.5e-7, | |
| output_cost_per_token: 2.75e-6, | |
| litellm_provider: "replicate", | |
| mode: "chat", | |
| }, | |
| "replicate/meta/llama-2-70b-chat": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 6.5e-7, | |
| output_cost_per_token: 2.75e-6, | |
| litellm_provider: "replicate", | |
| mode: "chat", | |
| }, | |
| "replicate/meta/llama-2-7b": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 5e-8, | |
| output_cost_per_token: 2.5e-7, | |
| litellm_provider: "replicate", | |
| mode: "chat", | |
| }, | |
| "replicate/meta/llama-2-7b-chat": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 5e-8, | |
| output_cost_per_token: 2.5e-7, | |
| litellm_provider: "replicate", | |
| mode: "chat", | |
| }, | |
| "replicate/meta/llama-3-70b": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 6.5e-7, | |
| output_cost_per_token: 2.75e-6, | |
| litellm_provider: "replicate", | |
| mode: "chat", | |
| }, | |
| "replicate/meta/llama-3-70b-instruct": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 6.5e-7, | |
| output_cost_per_token: 2.75e-6, | |
| litellm_provider: "replicate", | |
| mode: "chat", | |
| }, | |
| "replicate/meta/llama-3-8b": { | |
| max_tokens: 8086, | |
| max_input_tokens: 8086, | |
| max_output_tokens: 8086, | |
| input_cost_per_token: 5e-8, | |
| output_cost_per_token: 2.5e-7, | |
| litellm_provider: "replicate", | |
| mode: "chat", | |
| }, | |
| "replicate/meta/llama-3-8b-instruct": { | |
| max_tokens: 8086, | |
| max_input_tokens: 8086, | |
| max_output_tokens: 8086, | |
| input_cost_per_token: 5e-8, | |
| output_cost_per_token: 2.5e-7, | |
| litellm_provider: "replicate", | |
| mode: "chat", | |
| }, | |
| "replicate/mistralai/mistral-7b-v0.1": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 5e-8, | |
| output_cost_per_token: 2.5e-7, | |
| litellm_provider: "replicate", | |
| mode: "chat", | |
| }, | |
| "replicate/mistralai/mistral-7b-instruct-v0.2": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 5e-8, | |
| output_cost_per_token: 2.5e-7, | |
| litellm_provider: "replicate", | |
| mode: "chat", | |
| }, | |
| "replicate/mistralai/mixtral-8x7b-instruct-v0.1": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 3e-7, | |
| output_cost_per_token: 1e-6, | |
| litellm_provider: "replicate", | |
| mode: "chat", | |
| }, | |
| "openrouter/deepseek/deepseek-coder": { | |
| max_tokens: 8192, | |
| max_input_tokens: 66000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.4e-7, | |
| output_cost_per_token: 2.8e-7, | |
| litellm_provider: "openrouter", | |
| supports_prompt_caching: true, | |
| mode: "chat", | |
| }, | |
| "openrouter/microsoft/wizardlm-2-8x22b:nitro": { | |
| max_tokens: 65536, | |
| input_cost_per_token: 1e-6, | |
| output_cost_per_token: 1e-6, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| }, | |
| "openrouter/google/gemini-pro-1.5": { | |
| max_tokens: 8192, | |
| max_input_tokens: 1000000, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 2.5e-6, | |
| output_cost_per_token: 7.5e-6, | |
| input_cost_per_image: 0.00265, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| }, | |
| "openrouter/mistralai/mixtral-8x22b-instruct": { | |
| max_tokens: 65536, | |
| input_cost_per_token: 6.5e-7, | |
| output_cost_per_token: 6.5e-7, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| }, | |
| "openrouter/cohere/command-r-plus": { | |
| max_tokens: 128000, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 1.5e-5, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| }, | |
| "openrouter/databricks/dbrx-instruct": { | |
| max_tokens: 32768, | |
| input_cost_per_token: 6e-7, | |
| output_cost_per_token: 6e-7, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| }, | |
| "openrouter/anthropic/claude-3-haiku": { | |
| max_tokens: 200000, | |
| input_cost_per_token: 2.5e-7, | |
| output_cost_per_token: 1.25e-6, | |
| input_cost_per_image: 0.0004, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| }, | |
| "openrouter/anthropic/claude-3-haiku-20240307": { | |
| max_tokens: 4096, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 2.5e-7, | |
| output_cost_per_token: 1.25e-6, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| tool_use_system_prompt_tokens: 264, | |
| }, | |
| "anthropic/claude-3-5-sonnet-20241022": { | |
| max_tokens: 8192, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 1.5e-5, | |
| cache_creation_input_token_cost: 3.75e-6, | |
| cache_read_input_token_cost: 3e-7, | |
| litellm_provider: "anthropic", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| tool_use_system_prompt_tokens: 159, | |
| supports_assistant_prefill: true, | |
| supports_prompt_caching: true, | |
| }, | |
| "anthropic/claude-3-5-sonnet-latest": { | |
| max_tokens: 8192, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 1.5e-5, | |
| cache_creation_input_token_cost: 3.75e-6, | |
| cache_read_input_token_cost: 3e-7, | |
| litellm_provider: "anthropic", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| tool_use_system_prompt_tokens: 159, | |
| supports_assistant_prefill: true, | |
| supports_prompt_caching: true, | |
| }, | |
| "openrouter/anthropic/claude-3.5-sonnet": { | |
| max_tokens: 8192, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 1.5e-5, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| tool_use_system_prompt_tokens: 159, | |
| supports_assistant_prefill: true, | |
| }, | |
| "openrouter/anthropic/claude-3.5-sonnet:beta": { | |
| max_tokens: 8192, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 1.5e-5, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| tool_use_system_prompt_tokens: 159, | |
| }, | |
| "openrouter/anthropic/claude-3-sonnet": { | |
| max_tokens: 200000, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 1.5e-5, | |
| input_cost_per_image: 0.0048, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| }, | |
| "openrouter/mistralai/mistral-large": { | |
| max_tokens: 32000, | |
| input_cost_per_token: 8e-6, | |
| output_cost_per_token: 2.4e-5, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| }, | |
| "openrouter/cognitivecomputations/dolphin-mixtral-8x7b": { | |
| max_tokens: 32769, | |
| input_cost_per_token: 5e-7, | |
| output_cost_per_token: 5e-7, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| }, | |
| "openrouter/google/gemini-pro-vision": { | |
| max_tokens: 45875, | |
| input_cost_per_token: 1.25e-7, | |
| output_cost_per_token: 3.75e-7, | |
| input_cost_per_image: 0.0025, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| }, | |
| "openrouter/fireworks/firellava-13b": { | |
| max_tokens: 4096, | |
| input_cost_per_token: 2e-7, | |
| output_cost_per_token: 2e-7, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| }, | |
| "openrouter/meta-llama/llama-3-8b-instruct:free": { | |
| max_tokens: 8192, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| }, | |
| "openrouter/meta-llama/llama-3-8b-instruct:extended": { | |
| max_tokens: 16384, | |
| input_cost_per_token: 2.25e-7, | |
| output_cost_per_token: 2.25e-6, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| }, | |
| "openrouter/meta-llama/llama-3-70b-instruct:nitro": { | |
| max_tokens: 8192, | |
| input_cost_per_token: 9e-7, | |
| output_cost_per_token: 9e-7, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| }, | |
| "openrouter/meta-llama/llama-3-70b-instruct": { | |
| max_tokens: 8192, | |
| input_cost_per_token: 5.9e-7, | |
| output_cost_per_token: 7.9e-7, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| }, | |
| "openrouter/openai/o1-mini": { | |
| max_tokens: 65536, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 65536, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 1.2e-5, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_vision: false, | |
| }, | |
| "openrouter/openai/o1-mini-2024-09-12": { | |
| max_tokens: 65536, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 65536, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 1.2e-5, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_vision: false, | |
| }, | |
| "openrouter/openai/o1-preview": { | |
| max_tokens: 32768, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 32768, | |
| input_cost_per_token: 1.5e-5, | |
| output_cost_per_token: 6e-5, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_vision: false, | |
| }, | |
| "openrouter/openai/o1-preview-2024-09-12": { | |
| max_tokens: 32768, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 32768, | |
| input_cost_per_token: 1.5e-5, | |
| output_cost_per_token: 6e-5, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_vision: false, | |
| }, | |
| "openrouter/openai/gpt-4o": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 5e-6, | |
| output_cost_per_token: 1.5e-5, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_vision: true, | |
| }, | |
| "openrouter/openai/gpt-4o-2024-05-13": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 5e-6, | |
| output_cost_per_token: 1.5e-5, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_vision: true, | |
| }, | |
| "openrouter/openai/gpt-4-vision-preview": { | |
| max_tokens: 130000, | |
| input_cost_per_token: 1e-5, | |
| output_cost_per_token: 3e-5, | |
| input_cost_per_image: 0.01445, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| }, | |
| "openrouter/openai/gpt-3.5-turbo": { | |
| max_tokens: 4095, | |
| input_cost_per_token: 1.5e-6, | |
| output_cost_per_token: 2e-6, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| }, | |
| "openrouter/openai/gpt-3.5-turbo-16k": { | |
| max_tokens: 16383, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 4e-6, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| }, | |
| "openrouter/openai/gpt-4": { | |
| max_tokens: 8192, | |
| input_cost_per_token: 3e-5, | |
| output_cost_per_token: 6e-5, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| }, | |
| "openrouter/anthropic/claude-instant-v1": { | |
| max_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 1.63e-6, | |
| output_cost_per_token: 5.51e-6, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| }, | |
| "openrouter/anthropic/claude-2": { | |
| max_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 1.102e-5, | |
| output_cost_per_token: 3.268e-5, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| }, | |
| "openrouter/anthropic/claude-3-opus": { | |
| max_tokens: 4096, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.5e-5, | |
| output_cost_per_token: 7.5e-5, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| tool_use_system_prompt_tokens: 395, | |
| }, | |
| "openrouter/google/palm-2-chat-bison": { | |
| max_tokens: 25804, | |
| input_cost_per_token: 5e-7, | |
| output_cost_per_token: 5e-7, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| }, | |
| "openrouter/google/palm-2-codechat-bison": { | |
| max_tokens: 20070, | |
| input_cost_per_token: 5e-7, | |
| output_cost_per_token: 5e-7, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| }, | |
| "openrouter/meta-llama/llama-2-13b-chat": { | |
| max_tokens: 4096, | |
| input_cost_per_token: 2e-7, | |
| output_cost_per_token: 2e-7, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| }, | |
| "openrouter/meta-llama/llama-2-70b-chat": { | |
| max_tokens: 4096, | |
| input_cost_per_token: 1.5e-6, | |
| output_cost_per_token: 1.5e-6, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| }, | |
| "openrouter/meta-llama/codellama-34b-instruct": { | |
| max_tokens: 8192, | |
| input_cost_per_token: 5e-7, | |
| output_cost_per_token: 5e-7, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| }, | |
| "openrouter/nousresearch/nous-hermes-llama2-13b": { | |
| max_tokens: 4096, | |
| input_cost_per_token: 2e-7, | |
| output_cost_per_token: 2e-7, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| }, | |
| "openrouter/mancer/weaver": { | |
| max_tokens: 8000, | |
| input_cost_per_token: 5.625e-6, | |
| output_cost_per_token: 5.625e-6, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| }, | |
| "openrouter/gryphe/mythomax-l2-13b": { | |
| max_tokens: 8192, | |
| input_cost_per_token: 1.875e-6, | |
| output_cost_per_token: 1.875e-6, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| }, | |
| "openrouter/jondurbin/airoboros-l2-70b-2.1": { | |
| max_tokens: 4096, | |
| input_cost_per_token: 1.3875e-5, | |
| output_cost_per_token: 1.3875e-5, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| }, | |
| "openrouter/undi95/remm-slerp-l2-13b": { | |
| max_tokens: 6144, | |
| input_cost_per_token: 1.875e-6, | |
| output_cost_per_token: 1.875e-6, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| }, | |
| "openrouter/pygmalionai/mythalion-13b": { | |
| max_tokens: 4096, | |
| input_cost_per_token: 1.875e-6, | |
| output_cost_per_token: 1.875e-6, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| }, | |
| "openrouter/mistralai/mistral-7b-instruct": { | |
| max_tokens: 8192, | |
| input_cost_per_token: 1.3e-7, | |
| output_cost_per_token: 1.3e-7, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| }, | |
| "openrouter/mistralai/mistral-7b-instruct:free": { | |
| max_tokens: 8192, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| }, | |
| "j2-ultra": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 1.5e-5, | |
| output_cost_per_token: 1.5e-5, | |
| litellm_provider: "ai21", | |
| mode: "completion", | |
| }, | |
| "jamba-1.5-mini@001": { | |
| max_tokens: 256000, | |
| max_input_tokens: 256000, | |
| max_output_tokens: 256000, | |
| input_cost_per_token: 2e-7, | |
| output_cost_per_token: 4e-7, | |
| litellm_provider: "ai21", | |
| mode: "chat", | |
| }, | |
| "jamba-1.5-large@001": { | |
| max_tokens: 256000, | |
| max_input_tokens: 256000, | |
| max_output_tokens: 256000, | |
| input_cost_per_token: 2e-6, | |
| output_cost_per_token: 8e-6, | |
| litellm_provider: "ai21", | |
| mode: "chat", | |
| }, | |
| "jamba-1.5": { | |
| max_tokens: 256000, | |
| max_input_tokens: 256000, | |
| max_output_tokens: 256000, | |
| input_cost_per_token: 2e-7, | |
| output_cost_per_token: 4e-7, | |
| litellm_provider: "ai21", | |
| mode: "chat", | |
| }, | |
| "jamba-1.5-mini": { | |
| max_tokens: 256000, | |
| max_input_tokens: 256000, | |
| max_output_tokens: 256000, | |
| input_cost_per_token: 2e-7, | |
| output_cost_per_token: 4e-7, | |
| litellm_provider: "ai21", | |
| mode: "chat", | |
| }, | |
| "jamba-1.5-large": { | |
| max_tokens: 256000, | |
| max_input_tokens: 256000, | |
| max_output_tokens: 256000, | |
| input_cost_per_token: 2e-6, | |
| output_cost_per_token: 8e-6, | |
| litellm_provider: "ai21", | |
| mode: "chat", | |
| }, | |
| "j2-mid": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 1e-5, | |
| output_cost_per_token: 1e-5, | |
| litellm_provider: "ai21", | |
| mode: "completion", | |
| }, | |
| "j2-light": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 3e-6, | |
| litellm_provider: "ai21", | |
| mode: "completion", | |
| }, | |
| dolphin: { | |
| max_tokens: 16384, | |
| max_input_tokens: 16384, | |
| max_output_tokens: 16384, | |
| input_cost_per_token: 5e-7, | |
| output_cost_per_token: 5e-7, | |
| litellm_provider: "nlp_cloud", | |
| mode: "completion", | |
| }, | |
| chatdolphin: { | |
| max_tokens: 16384, | |
| max_input_tokens: 16384, | |
| max_output_tokens: 16384, | |
| input_cost_per_token: 5e-7, | |
| output_cost_per_token: 5e-7, | |
| litellm_provider: "nlp_cloud", | |
| mode: "chat", | |
| }, | |
| "luminous-base": { | |
| max_tokens: 2048, | |
| input_cost_per_token: 3e-5, | |
| output_cost_per_token: 3.3e-5, | |
| litellm_provider: "aleph_alpha", | |
| mode: "completion", | |
| }, | |
| "luminous-base-control": { | |
| max_tokens: 2048, | |
| input_cost_per_token: 3.75e-5, | |
| output_cost_per_token: 4.125e-5, | |
| litellm_provider: "aleph_alpha", | |
| mode: "chat", | |
| }, | |
| "luminous-extended": { | |
| max_tokens: 2048, | |
| input_cost_per_token: 4.5e-5, | |
| output_cost_per_token: 4.95e-5, | |
| litellm_provider: "aleph_alpha", | |
| mode: "completion", | |
| }, | |
| "luminous-extended-control": { | |
| max_tokens: 2048, | |
| input_cost_per_token: 5.625e-5, | |
| output_cost_per_token: 6.1875e-5, | |
| litellm_provider: "aleph_alpha", | |
| mode: "chat", | |
| }, | |
| "luminous-supreme": { | |
| max_tokens: 2048, | |
| input_cost_per_token: 0.000175, | |
| output_cost_per_token: 0.0001925, | |
| litellm_provider: "aleph_alpha", | |
| mode: "completion", | |
| }, | |
| "luminous-supreme-control": { | |
| max_tokens: 2048, | |
| input_cost_per_token: 0.00021875, | |
| output_cost_per_token: 0.000240625, | |
| litellm_provider: "aleph_alpha", | |
| mode: "chat", | |
| }, | |
| "ai21.j2-mid-v1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 8191, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 1.25e-5, | |
| output_cost_per_token: 1.25e-5, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "ai21.j2-ultra-v1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 8191, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 1.88e-5, | |
| output_cost_per_token: 1.88e-5, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "ai21.jamba-instruct-v1:0": { | |
| max_tokens: 4096, | |
| max_input_tokens: 70000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 5e-7, | |
| output_cost_per_token: 7e-7, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_system_messages: true, | |
| }, | |
| "amazon.titan-text-lite-v1": { | |
| max_tokens: 4000, | |
| max_input_tokens: 42000, | |
| max_output_tokens: 4000, | |
| input_cost_per_token: 3e-7, | |
| output_cost_per_token: 4e-7, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "amazon.titan-text-express-v1": { | |
| max_tokens: 8000, | |
| max_input_tokens: 42000, | |
| max_output_tokens: 8000, | |
| input_cost_per_token: 1.3e-6, | |
| output_cost_per_token: 1.7e-6, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "amazon.titan-text-premier-v1:0": { | |
| max_tokens: 32000, | |
| max_input_tokens: 42000, | |
| max_output_tokens: 32000, | |
| input_cost_per_token: 5e-7, | |
| output_cost_per_token: 1.5e-6, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "amazon.titan-embed-text-v1": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| output_vector_size: 1536, | |
| input_cost_per_token: 1e-7, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "bedrock", | |
| mode: "embedding", | |
| }, | |
| "amazon.titan-embed-text-v2:0": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| output_vector_size: 1024, | |
| input_cost_per_token: 2e-7, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "bedrock", | |
| mode: "embedding", | |
| }, | |
| "mistral.mistral-7b-instruct-v0:2": { | |
| max_tokens: 8191, | |
| max_input_tokens: 32000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 1.5e-7, | |
| output_cost_per_token: 2e-7, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "mistral.mixtral-8x7b-instruct-v0:1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 32000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 4.5e-7, | |
| output_cost_per_token: 7e-7, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "mistral.mistral-large-2402-v1:0": { | |
| max_tokens: 8191, | |
| max_input_tokens: 32000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 8e-6, | |
| output_cost_per_token: 2.4e-5, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| }, | |
| "mistral.mistral-large-2407-v1:0": { | |
| max_tokens: 8191, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 9e-6, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| }, | |
| "mistral.mistral-small-2402-v1:0": { | |
| max_tokens: 8191, | |
| max_input_tokens: 32000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 1e-6, | |
| output_cost_per_token: 3e-6, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| }, | |
| "bedrock/us-west-2/mistral.mixtral-8x7b-instruct-v0:1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 32000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 4.5e-7, | |
| output_cost_per_token: 7e-7, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/us-east-1/mistral.mixtral-8x7b-instruct-v0:1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 32000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 4.5e-7, | |
| output_cost_per_token: 7e-7, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/eu-west-3/mistral.mixtral-8x7b-instruct-v0:1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 32000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 5.9e-7, | |
| output_cost_per_token: 9.1e-7, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/us-west-2/mistral.mistral-7b-instruct-v0:2": { | |
| max_tokens: 8191, | |
| max_input_tokens: 32000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 1.5e-7, | |
| output_cost_per_token: 2e-7, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/us-east-1/mistral.mistral-7b-instruct-v0:2": { | |
| max_tokens: 8191, | |
| max_input_tokens: 32000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 1.5e-7, | |
| output_cost_per_token: 2e-7, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/eu-west-3/mistral.mistral-7b-instruct-v0:2": { | |
| max_tokens: 8191, | |
| max_input_tokens: 32000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 2e-7, | |
| output_cost_per_token: 2.6e-7, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/us-east-1/mistral.mistral-large-2402-v1:0": { | |
| max_tokens: 8191, | |
| max_input_tokens: 32000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 8e-6, | |
| output_cost_per_token: 2.4e-5, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/us-west-2/mistral.mistral-large-2402-v1:0": { | |
| max_tokens: 8191, | |
| max_input_tokens: 32000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 8e-6, | |
| output_cost_per_token: 2.4e-5, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| }, | |
| "bedrock/eu-west-3/mistral.mistral-large-2402-v1:0": { | |
| max_tokens: 8191, | |
| max_input_tokens: 32000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 1.04e-5, | |
| output_cost_per_token: 3.12e-5, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| }, | |
| "anthropic.claude-3-sonnet-20240229-v1:0": { | |
| max_tokens: 4096, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 1.5e-5, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| }, | |
| "anthropic.claude-3-5-sonnet-20240620-v1:0": { | |
| max_tokens: 4096, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 1.5e-5, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| }, | |
| "anthropic.claude-3-5-sonnet-20241022-v2:0": { | |
| max_tokens: 8192, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 1.5e-5, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_assistant_prefill: true, | |
| supports_prompt_caching: true, | |
| }, | |
| "anthropic.claude-3-5-sonnet-latest-v2:0": { | |
| max_tokens: 4096, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 1.5e-5, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| }, | |
| "anthropic.claude-3-haiku-20240307-v1:0": { | |
| max_tokens: 4096, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 2.5e-7, | |
| output_cost_per_token: 1.25e-6, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| }, | |
| "anthropic.claude-3-opus-20240229-v1:0": { | |
| max_tokens: 4096, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.5e-5, | |
| output_cost_per_token: 7.5e-5, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| }, | |
| "us.anthropic.claude-3-sonnet-20240229-v1:0": { | |
| max_tokens: 4096, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 1.5e-5, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| }, | |
| "us.anthropic.claude-3-5-sonnet-20240620-v1:0": { | |
| max_tokens: 4096, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 1.5e-5, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| }, | |
| "us.anthropic.claude-3-5-sonnet-20241022-v2:0": { | |
| max_tokens: 8192, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 1.5e-5, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_assistant_prefill: true, | |
| }, | |
| "us.anthropic.claude-3-haiku-20240307-v1:0": { | |
| max_tokens: 4096, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 2.5e-7, | |
| output_cost_per_token: 1.25e-6, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| }, | |
| "us.anthropic.claude-3-opus-20240229-v1:0": { | |
| max_tokens: 4096, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.5e-5, | |
| output_cost_per_token: 7.5e-5, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| }, | |
| "eu.anthropic.claude-3-sonnet-20240229-v1:0": { | |
| max_tokens: 4096, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 1.5e-5, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| }, | |
| "eu.anthropic.claude-3-5-sonnet-20240620-v1:0": { | |
| max_tokens: 4096, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 1.5e-5, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| }, | |
| "eu.anthropic.claude-3-5-sonnet-20241022-v2:0": { | |
| max_tokens: 8192, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 1.5e-5, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_assistant_prefill: true, | |
| }, | |
| "eu.anthropic.claude-3-haiku-20240307-v1:0": { | |
| max_tokens: 4096, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 2.5e-7, | |
| output_cost_per_token: 1.25e-6, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| }, | |
| "eu.anthropic.claude-3-opus-20240229-v1:0": { | |
| max_tokens: 4096, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.5e-5, | |
| output_cost_per_token: 7.5e-5, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| }, | |
| "anthropic.claude-v1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 8e-6, | |
| output_cost_per_token: 2.4e-5, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/us-east-1/anthropic.claude-v1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 8e-6, | |
| output_cost_per_token: 2.4e-5, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/us-west-2/anthropic.claude-v1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 8e-6, | |
| output_cost_per_token: 2.4e-5, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/ap-northeast-1/anthropic.claude-v1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 8e-6, | |
| output_cost_per_token: 2.4e-5, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-v1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_second: 0.0455, | |
| output_cost_per_second: 0.0455, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-v1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_second: 0.02527, | |
| output_cost_per_second: 0.02527, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/eu-central-1/anthropic.claude-v1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 8e-6, | |
| output_cost_per_token: 2.4e-5, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/eu-central-1/1-month-commitment/anthropic.claude-v1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_second: 0.0415, | |
| output_cost_per_second: 0.0415, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/eu-central-1/6-month-commitment/anthropic.claude-v1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_second: 0.02305, | |
| output_cost_per_second: 0.02305, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/us-east-1/1-month-commitment/anthropic.claude-v1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_second: 0.0175, | |
| output_cost_per_second: 0.0175, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/us-east-1/6-month-commitment/anthropic.claude-v1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_second: 0.00972, | |
| output_cost_per_second: 0.00972, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/us-west-2/1-month-commitment/anthropic.claude-v1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_second: 0.0175, | |
| output_cost_per_second: 0.0175, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/us-west-2/6-month-commitment/anthropic.claude-v1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_second: 0.00972, | |
| output_cost_per_second: 0.00972, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "anthropic.claude-v2": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 8e-6, | |
| output_cost_per_token: 2.4e-5, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/us-east-1/anthropic.claude-v2": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 8e-6, | |
| output_cost_per_token: 2.4e-5, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/us-west-2/anthropic.claude-v2": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 8e-6, | |
| output_cost_per_token: 2.4e-5, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/ap-northeast-1/anthropic.claude-v2": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 8e-6, | |
| output_cost_per_token: 2.4e-5, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-v2": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_second: 0.0455, | |
| output_cost_per_second: 0.0455, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-v2": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_second: 0.02527, | |
| output_cost_per_second: 0.02527, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/eu-central-1/anthropic.claude-v2": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 8e-6, | |
| output_cost_per_token: 2.4e-5, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/eu-central-1/1-month-commitment/anthropic.claude-v2": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_second: 0.0415, | |
| output_cost_per_second: 0.0415, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/eu-central-1/6-month-commitment/anthropic.claude-v2": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_second: 0.02305, | |
| output_cost_per_second: 0.02305, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/us-east-1/1-month-commitment/anthropic.claude-v2": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_second: 0.0175, | |
| output_cost_per_second: 0.0175, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/us-east-1/6-month-commitment/anthropic.claude-v2": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_second: 0.00972, | |
| output_cost_per_second: 0.00972, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/us-west-2/1-month-commitment/anthropic.claude-v2": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_second: 0.0175, | |
| output_cost_per_second: 0.0175, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/us-west-2/6-month-commitment/anthropic.claude-v2": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_second: 0.00972, | |
| output_cost_per_second: 0.00972, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "anthropic.claude-v2:1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 8e-6, | |
| output_cost_per_token: 2.4e-5, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/us-east-1/anthropic.claude-v2:1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 8e-6, | |
| output_cost_per_token: 2.4e-5, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/us-west-2/anthropic.claude-v2:1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 8e-6, | |
| output_cost_per_token: 2.4e-5, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/ap-northeast-1/anthropic.claude-v2:1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 8e-6, | |
| output_cost_per_token: 2.4e-5, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-v2:1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_second: 0.0455, | |
| output_cost_per_second: 0.0455, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-v2:1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_second: 0.02527, | |
| output_cost_per_second: 0.02527, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/eu-central-1/anthropic.claude-v2:1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 8e-6, | |
| output_cost_per_token: 2.4e-5, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/eu-central-1/1-month-commitment/anthropic.claude-v2:1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_second: 0.0415, | |
| output_cost_per_second: 0.0415, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/eu-central-1/6-month-commitment/anthropic.claude-v2:1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_second: 0.02305, | |
| output_cost_per_second: 0.02305, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/us-east-1/1-month-commitment/anthropic.claude-v2:1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_second: 0.0175, | |
| output_cost_per_second: 0.0175, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/us-east-1/6-month-commitment/anthropic.claude-v2:1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_second: 0.00972, | |
| output_cost_per_second: 0.00972, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/us-west-2/1-month-commitment/anthropic.claude-v2:1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_second: 0.0175, | |
| output_cost_per_second: 0.0175, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/us-west-2/6-month-commitment/anthropic.claude-v2:1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_second: 0.00972, | |
| output_cost_per_second: 0.00972, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "anthropic.claude-instant-v1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 1.63e-6, | |
| output_cost_per_token: 5.51e-6, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/us-east-1/anthropic.claude-instant-v1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 8e-7, | |
| output_cost_per_token: 2.4e-6, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/us-east-1/1-month-commitment/anthropic.claude-instant-v1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_second: 0.011, | |
| output_cost_per_second: 0.011, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/us-east-1/6-month-commitment/anthropic.claude-instant-v1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_second: 0.00611, | |
| output_cost_per_second: 0.00611, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/us-west-2/1-month-commitment/anthropic.claude-instant-v1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_second: 0.011, | |
| output_cost_per_second: 0.011, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/us-west-2/6-month-commitment/anthropic.claude-instant-v1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_second: 0.00611, | |
| output_cost_per_second: 0.00611, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/us-west-2/anthropic.claude-instant-v1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 8e-7, | |
| output_cost_per_token: 2.4e-6, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/ap-northeast-1/anthropic.claude-instant-v1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 2.23e-6, | |
| output_cost_per_token: 7.55e-6, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-instant-v1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_second: 0.01475, | |
| output_cost_per_second: 0.01475, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-instant-v1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_second: 0.008194, | |
| output_cost_per_second: 0.008194, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/eu-central-1/anthropic.claude-instant-v1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 2.48e-6, | |
| output_cost_per_token: 8.38e-6, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/eu-central-1/1-month-commitment/anthropic.claude-instant-v1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_second: 0.01635, | |
| output_cost_per_second: 0.01635, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/eu-central-1/6-month-commitment/anthropic.claude-instant-v1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 100000, | |
| max_output_tokens: 8191, | |
| input_cost_per_second: 0.009083, | |
| output_cost_per_second: 0.009083, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "cohere.command-text-v14": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.5e-6, | |
| output_cost_per_token: 2e-6, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/*/1-month-commitment/cohere.command-text-v14": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_second: 0.011, | |
| output_cost_per_second: 0.011, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/*/6-month-commitment/cohere.command-text-v14": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_second: 0.0066027, | |
| output_cost_per_second: 0.0066027, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "cohere.command-light-text-v14": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 3e-7, | |
| output_cost_per_token: 6e-7, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/*/1-month-commitment/cohere.command-light-text-v14": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_second: 0.001902, | |
| output_cost_per_second: 0.001902, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/*/6-month-commitment/cohere.command-light-text-v14": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_second: 0.0011416, | |
| output_cost_per_second: 0.0011416, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "cohere.command-r-plus-v1:0": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 1.5e-5, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "cohere.command-r-v1:0": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 5e-7, | |
| output_cost_per_token: 1.5e-6, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "cohere.embed-english-v3": { | |
| max_tokens: 512, | |
| max_input_tokens: 512, | |
| input_cost_per_token: 1e-7, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "bedrock", | |
| mode: "embedding", | |
| }, | |
| "cohere.embed-multilingual-v3": { | |
| max_tokens: 512, | |
| max_input_tokens: 512, | |
| input_cost_per_token: 1e-7, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "bedrock", | |
| mode: "embedding", | |
| }, | |
| "meta.llama2-13b-chat-v1": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 7.5e-7, | |
| output_cost_per_token: 1e-6, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "meta.llama2-70b-chat-v1": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.95e-6, | |
| output_cost_per_token: 2.56e-6, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "meta.llama3-8b-instruct-v1:0": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 3e-7, | |
| output_cost_per_token: 6e-7, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/us-east-1/meta.llama3-8b-instruct-v1:0": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 3e-7, | |
| output_cost_per_token: 6e-7, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/us-west-1/meta.llama3-8b-instruct-v1:0": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 3e-7, | |
| output_cost_per_token: 6e-7, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/ap-south-1/meta.llama3-8b-instruct-v1:0": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 3.6e-7, | |
| output_cost_per_token: 7.2e-7, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/ca-central-1/meta.llama3-8b-instruct-v1:0": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 3.5e-7, | |
| output_cost_per_token: 6.9e-7, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/eu-west-1/meta.llama3-8b-instruct-v1:0": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 3.2e-7, | |
| output_cost_per_token: 6.5e-7, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/eu-west-2/meta.llama3-8b-instruct-v1:0": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 3.9e-7, | |
| output_cost_per_token: 7.8e-7, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/sa-east-1/meta.llama3-8b-instruct-v1:0": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 5e-7, | |
| output_cost_per_token: 1.01e-6, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "meta.llama3-70b-instruct-v1:0": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 2.65e-6, | |
| output_cost_per_token: 3.5e-6, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/us-east-1/meta.llama3-70b-instruct-v1:0": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 2.65e-6, | |
| output_cost_per_token: 3.5e-6, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/us-west-1/meta.llama3-70b-instruct-v1:0": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 2.65e-6, | |
| output_cost_per_token: 3.5e-6, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/ap-south-1/meta.llama3-70b-instruct-v1:0": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 3.18e-6, | |
| output_cost_per_token: 4.2e-6, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/ca-central-1/meta.llama3-70b-instruct-v1:0": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 3.05e-6, | |
| output_cost_per_token: 4.03e-6, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/eu-west-1/meta.llama3-70b-instruct-v1:0": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 2.86e-6, | |
| output_cost_per_token: 3.78e-6, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/eu-west-2/meta.llama3-70b-instruct-v1:0": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 3.45e-6, | |
| output_cost_per_token: 4.55e-6, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "bedrock/sa-east-1/meta.llama3-70b-instruct-v1:0": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 4.45e-6, | |
| output_cost_per_token: 5.88e-6, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| }, | |
| "meta.llama3-1-8b-instruct-v1:0": { | |
| max_tokens: 128000, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 2048, | |
| input_cost_per_token: 2.2e-7, | |
| output_cost_per_token: 2.2e-7, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_tool_choice: false, | |
| }, | |
| "meta.llama3-1-70b-instruct-v1:0": { | |
| max_tokens: 128000, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 2048, | |
| input_cost_per_token: 9.9e-7, | |
| output_cost_per_token: 9.9e-7, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_tool_choice: false, | |
| }, | |
| "meta.llama3-1-405b-instruct-v1:0": { | |
| max_tokens: 128000, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 5.32e-6, | |
| output_cost_per_token: 1.6e-5, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_tool_choice: false, | |
| }, | |
| "meta.llama3-2-1b-instruct-v1:0": { | |
| max_tokens: 128000, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1e-7, | |
| output_cost_per_token: 1e-7, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_tool_choice: false, | |
| }, | |
| "us.meta.llama3-2-1b-instruct-v1:0": { | |
| max_tokens: 128000, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1e-7, | |
| output_cost_per_token: 1e-7, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_tool_choice: false, | |
| }, | |
| "eu.meta.llama3-2-1b-instruct-v1:0": { | |
| max_tokens: 128000, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.3e-7, | |
| output_cost_per_token: 1.3e-7, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_tool_choice: false, | |
| }, | |
| "meta.llama3-2-3b-instruct-v1:0": { | |
| max_tokens: 128000, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.5e-7, | |
| output_cost_per_token: 1.5e-7, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_tool_choice: false, | |
| }, | |
| "us.meta.llama3-2-3b-instruct-v1:0": { | |
| max_tokens: 128000, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.5e-7, | |
| output_cost_per_token: 1.5e-7, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_tool_choice: false, | |
| }, | |
| "eu.meta.llama3-2-3b-instruct-v1:0": { | |
| max_tokens: 128000, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.9e-7, | |
| output_cost_per_token: 1.9e-7, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_tool_choice: false, | |
| }, | |
| "meta.llama3-2-11b-instruct-v1:0": { | |
| max_tokens: 128000, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 3.5e-7, | |
| output_cost_per_token: 3.5e-7, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_tool_choice: false, | |
| }, | |
| "us.meta.llama3-2-11b-instruct-v1:0": { | |
| max_tokens: 128000, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 3.5e-7, | |
| output_cost_per_token: 3.5e-7, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_tool_choice: false, | |
| }, | |
| "meta.llama3-2-90b-instruct-v1:0": { | |
| max_tokens: 128000, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 2e-6, | |
| output_cost_per_token: 2e-6, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_tool_choice: false, | |
| }, | |
| "us.meta.llama3-2-90b-instruct-v1:0": { | |
| max_tokens: 128000, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 2e-6, | |
| output_cost_per_token: 2e-6, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_tool_choice: false, | |
| }, | |
| "512-x-512/50-steps/stability.stable-diffusion-xl-v0": { | |
| max_tokens: 77, | |
| max_input_tokens: 77, | |
| output_cost_per_image: 0.018, | |
| litellm_provider: "bedrock", | |
| mode: "image_generation", | |
| }, | |
| "512-x-512/max-steps/stability.stable-diffusion-xl-v0": { | |
| max_tokens: 77, | |
| max_input_tokens: 77, | |
| output_cost_per_image: 0.036, | |
| litellm_provider: "bedrock", | |
| mode: "image_generation", | |
| }, | |
| "max-x-max/50-steps/stability.stable-diffusion-xl-v0": { | |
| max_tokens: 77, | |
| max_input_tokens: 77, | |
| output_cost_per_image: 0.036, | |
| litellm_provider: "bedrock", | |
| mode: "image_generation", | |
| }, | |
| "max-x-max/max-steps/stability.stable-diffusion-xl-v0": { | |
| max_tokens: 77, | |
| max_input_tokens: 77, | |
| output_cost_per_image: 0.072, | |
| litellm_provider: "bedrock", | |
| mode: "image_generation", | |
| }, | |
| "1024-x-1024/50-steps/stability.stable-diffusion-xl-v1": { | |
| max_tokens: 77, | |
| max_input_tokens: 77, | |
| output_cost_per_image: 0.04, | |
| litellm_provider: "bedrock", | |
| mode: "image_generation", | |
| }, | |
| "1024-x-1024/max-steps/stability.stable-diffusion-xl-v1": { | |
| max_tokens: 77, | |
| max_input_tokens: 77, | |
| output_cost_per_image: 0.08, | |
| litellm_provider: "bedrock", | |
| mode: "image_generation", | |
| }, | |
| "sagemaker/meta-textgeneration-llama-2-7b": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "sagemaker", | |
| mode: "completion", | |
| }, | |
| "sagemaker/meta-textgeneration-llama-2-7b-f": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "sagemaker", | |
| mode: "chat", | |
| }, | |
| "sagemaker/meta-textgeneration-llama-2-13b": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "sagemaker", | |
| mode: "completion", | |
| }, | |
| "sagemaker/meta-textgeneration-llama-2-13b-f": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "sagemaker", | |
| mode: "chat", | |
| }, | |
| "sagemaker/meta-textgeneration-llama-2-70b": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "sagemaker", | |
| mode: "completion", | |
| }, | |
| "sagemaker/meta-textgeneration-llama-2-70b-b-f": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "sagemaker", | |
| mode: "chat", | |
| }, | |
| "together-ai-up-to-4b": { | |
| input_cost_per_token: 1e-7, | |
| output_cost_per_token: 1e-7, | |
| litellm_provider: "together_ai", | |
| mode: "chat", | |
| }, | |
| "together-ai-4.1b-8b": { | |
| input_cost_per_token: 2e-7, | |
| output_cost_per_token: 2e-7, | |
| litellm_provider: "together_ai", | |
| mode: "chat", | |
| }, | |
| "together-ai-8.1b-21b": { | |
| max_tokens: 1000, | |
| input_cost_per_token: 3e-7, | |
| output_cost_per_token: 3e-7, | |
| litellm_provider: "together_ai", | |
| mode: "chat", | |
| }, | |
| "together-ai-21.1b-41b": { | |
| input_cost_per_token: 8e-7, | |
| output_cost_per_token: 8e-7, | |
| litellm_provider: "together_ai", | |
| mode: "chat", | |
| }, | |
| "together-ai-41.1b-80b": { | |
| input_cost_per_token: 9e-7, | |
| output_cost_per_token: 9e-7, | |
| litellm_provider: "together_ai", | |
| mode: "chat", | |
| }, | |
| "together-ai-81.1b-110b": { | |
| input_cost_per_token: 1.8e-6, | |
| output_cost_per_token: 1.8e-6, | |
| litellm_provider: "together_ai", | |
| mode: "chat", | |
| }, | |
| "together-ai-embedding-up-to-150m": { | |
| input_cost_per_token: 8e-9, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "together_ai", | |
| mode: "embedding", | |
| }, | |
| "together-ai-embedding-151m-to-350m": { | |
| input_cost_per_token: 1.6e-8, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "together_ai", | |
| mode: "embedding", | |
| }, | |
| "together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1": { | |
| input_cost_per_token: 6e-7, | |
| output_cost_per_token: 6e-7, | |
| litellm_provider: "together_ai", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_response_schema: true, | |
| mode: "chat", | |
| }, | |
| "together_ai/mistralai/Mistral-7B-Instruct-v0.1": { | |
| litellm_provider: "together_ai", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_response_schema: true, | |
| mode: "chat", | |
| }, | |
| "together_ai/togethercomputer/CodeLlama-34b-Instruct": { | |
| litellm_provider: "together_ai", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| mode: "chat", | |
| }, | |
| "ollama/codegemma": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "ollama", | |
| mode: "completion", | |
| }, | |
| "ollama/codegeex4": { | |
| max_tokens: 32768, | |
| max_input_tokens: 32768, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "ollama", | |
| mode: "chat", | |
| supports_function_calling: false, | |
| }, | |
| "ollama/deepseek-coder-v2-instruct": { | |
| max_tokens: 32768, | |
| max_input_tokens: 32768, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "ollama", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| }, | |
| "ollama/deepseek-coder-v2-base": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "ollama", | |
| mode: "completion", | |
| supports_function_calling: true, | |
| }, | |
| "ollama/deepseek-coder-v2-lite-instruct": { | |
| max_tokens: 32768, | |
| max_input_tokens: 32768, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "ollama", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| }, | |
| "ollama/deepseek-coder-v2-lite-base": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "ollama", | |
| mode: "completion", | |
| supports_function_calling: true, | |
| }, | |
| "ollama/internlm2_5-20b-chat": { | |
| max_tokens: 32768, | |
| max_input_tokens: 32768, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "ollama", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| }, | |
| "ollama/llama2": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "ollama", | |
| mode: "chat", | |
| }, | |
| "ollama/llama2:7b": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "ollama", | |
| mode: "chat", | |
| }, | |
| "ollama/llama2:13b": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "ollama", | |
| mode: "chat", | |
| }, | |
| "ollama/llama2:70b": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "ollama", | |
| mode: "chat", | |
| }, | |
| "ollama/llama2-uncensored": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "ollama", | |
| mode: "completion", | |
| }, | |
| "ollama/llama3": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "ollama", | |
| mode: "chat", | |
| }, | |
| "ollama/llama3:8b": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "ollama", | |
| mode: "chat", | |
| }, | |
| "ollama/llama3:70b": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "ollama", | |
| mode: "chat", | |
| }, | |
| "ollama/llama3.1": { | |
| max_tokens: 32768, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "ollama", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| }, | |
| "ollama/mistral-large-instruct-2407": { | |
| max_tokens: 65536, | |
| max_input_tokens: 65536, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "ollama", | |
| mode: "chat", | |
| }, | |
| "ollama/mistral": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "ollama", | |
| mode: "completion", | |
| }, | |
| "ollama/mistral-7B-Instruct-v0.1": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "ollama", | |
| mode: "chat", | |
| }, | |
| "ollama/mistral-7B-Instruct-v0.2": { | |
| max_tokens: 32768, | |
| max_input_tokens: 32768, | |
| max_output_tokens: 32768, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "ollama", | |
| mode: "chat", | |
| }, | |
| "ollama/mixtral-8x7B-Instruct-v0.1": { | |
| max_tokens: 32768, | |
| max_input_tokens: 32768, | |
| max_output_tokens: 32768, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "ollama", | |
| mode: "chat", | |
| }, | |
| "ollama/mixtral-8x22B-Instruct-v0.1": { | |
| max_tokens: 65536, | |
| max_input_tokens: 65536, | |
| max_output_tokens: 65536, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "ollama", | |
| mode: "chat", | |
| }, | |
| "ollama/codellama": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "ollama", | |
| mode: "completion", | |
| }, | |
| "ollama/orca-mini": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "ollama", | |
| mode: "completion", | |
| }, | |
| "ollama/vicuna": { | |
| max_tokens: 2048, | |
| max_input_tokens: 2048, | |
| max_output_tokens: 2048, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "ollama", | |
| mode: "completion", | |
| }, | |
| "deepinfra/lizpreciatior/lzlv_70b_fp16_hf": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 7e-7, | |
| output_cost_per_token: 9e-7, | |
| litellm_provider: "deepinfra", | |
| mode: "chat", | |
| }, | |
| "deepinfra/Gryphe/MythoMax-L2-13b": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 2.2e-7, | |
| output_cost_per_token: 2.2e-7, | |
| litellm_provider: "deepinfra", | |
| mode: "chat", | |
| }, | |
| "deepinfra/mistralai/Mistral-7B-Instruct-v0.1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 32768, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 1.3e-7, | |
| output_cost_per_token: 1.3e-7, | |
| litellm_provider: "deepinfra", | |
| mode: "chat", | |
| }, | |
| "deepinfra/meta-llama/Llama-2-70b-chat-hf": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 7e-7, | |
| output_cost_per_token: 9e-7, | |
| litellm_provider: "deepinfra", | |
| mode: "chat", | |
| }, | |
| "deepinfra/cognitivecomputations/dolphin-2.6-mixtral-8x7b": { | |
| max_tokens: 8191, | |
| max_input_tokens: 32768, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 2.7e-7, | |
| output_cost_per_token: 2.7e-7, | |
| litellm_provider: "deepinfra", | |
| mode: "chat", | |
| }, | |
| "deepinfra/codellama/CodeLlama-34b-Instruct-hf": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 6e-7, | |
| output_cost_per_token: 6e-7, | |
| litellm_provider: "deepinfra", | |
| mode: "chat", | |
| }, | |
| "deepinfra/deepinfra/mixtral": { | |
| max_tokens: 4096, | |
| max_input_tokens: 32000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 2.7e-7, | |
| output_cost_per_token: 2.7e-7, | |
| litellm_provider: "deepinfra", | |
| mode: "completion", | |
| }, | |
| "deepinfra/Phind/Phind-CodeLlama-34B-v2": { | |
| max_tokens: 4096, | |
| max_input_tokens: 16384, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 6e-7, | |
| output_cost_per_token: 6e-7, | |
| litellm_provider: "deepinfra", | |
| mode: "chat", | |
| }, | |
| "deepinfra/mistralai/Mixtral-8x7B-Instruct-v0.1": { | |
| max_tokens: 8191, | |
| max_input_tokens: 32768, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 2.7e-7, | |
| output_cost_per_token: 2.7e-7, | |
| litellm_provider: "deepinfra", | |
| mode: "chat", | |
| }, | |
| "deepinfra/deepinfra/airoboros-70b": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 7e-7, | |
| output_cost_per_token: 9e-7, | |
| litellm_provider: "deepinfra", | |
| mode: "chat", | |
| }, | |
| "deepinfra/01-ai/Yi-34B-Chat": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 6e-7, | |
| output_cost_per_token: 6e-7, | |
| litellm_provider: "deepinfra", | |
| mode: "chat", | |
| }, | |
| "deepinfra/01-ai/Yi-6B-200K": { | |
| max_tokens: 4096, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.3e-7, | |
| output_cost_per_token: 1.3e-7, | |
| litellm_provider: "deepinfra", | |
| mode: "completion", | |
| }, | |
| "deepinfra/jondurbin/airoboros-l2-70b-gpt4-1.4.1": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 7e-7, | |
| output_cost_per_token: 9e-7, | |
| litellm_provider: "deepinfra", | |
| mode: "chat", | |
| }, | |
| "deepinfra/meta-llama/Llama-2-13b-chat-hf": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 2.2e-7, | |
| output_cost_per_token: 2.2e-7, | |
| litellm_provider: "deepinfra", | |
| mode: "chat", | |
| }, | |
| "deepinfra/amazon/MistralLite": { | |
| max_tokens: 8191, | |
| max_input_tokens: 32768, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 2e-7, | |
| output_cost_per_token: 2e-7, | |
| litellm_provider: "deepinfra", | |
| mode: "chat", | |
| }, | |
| "deepinfra/meta-llama/Llama-2-7b-chat-hf": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.3e-7, | |
| output_cost_per_token: 1.3e-7, | |
| litellm_provider: "deepinfra", | |
| mode: "chat", | |
| }, | |
| "deepinfra/meta-llama/Meta-Llama-3-8B-Instruct": { | |
| max_tokens: 8191, | |
| max_input_tokens: 8191, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 8e-8, | |
| output_cost_per_token: 8e-8, | |
| litellm_provider: "deepinfra", | |
| mode: "chat", | |
| }, | |
| "deepinfra/meta-llama/Meta-Llama-3-70B-Instruct": { | |
| max_tokens: 8191, | |
| max_input_tokens: 8191, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 5.9e-7, | |
| output_cost_per_token: 7.9e-7, | |
| litellm_provider: "deepinfra", | |
| mode: "chat", | |
| }, | |
| "deepinfra/01-ai/Yi-34B-200K": { | |
| max_tokens: 4096, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 6e-7, | |
| output_cost_per_token: 6e-7, | |
| litellm_provider: "deepinfra", | |
| mode: "completion", | |
| }, | |
| "deepinfra/openchat/openchat_3.5": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.3e-7, | |
| output_cost_per_token: 1.3e-7, | |
| litellm_provider: "deepinfra", | |
| mode: "chat", | |
| }, | |
| "perplexity/codellama-34b-instruct": { | |
| max_tokens: 16384, | |
| max_input_tokens: 16384, | |
| max_output_tokens: 16384, | |
| input_cost_per_token: 3.5e-7, | |
| output_cost_per_token: 1.4e-6, | |
| litellm_provider: "perplexity", | |
| mode: "chat", | |
| }, | |
| "perplexity/codellama-70b-instruct": { | |
| max_tokens: 16384, | |
| max_input_tokens: 16384, | |
| max_output_tokens: 16384, | |
| input_cost_per_token: 7e-7, | |
| output_cost_per_token: 2.8e-6, | |
| litellm_provider: "perplexity", | |
| mode: "chat", | |
| }, | |
| "perplexity/llama-3.1-70b-instruct": { | |
| max_tokens: 131072, | |
| max_input_tokens: 131072, | |
| max_output_tokens: 131072, | |
| input_cost_per_token: 1e-6, | |
| output_cost_per_token: 1e-6, | |
| litellm_provider: "perplexity", | |
| mode: "chat", | |
| }, | |
| "perplexity/llama-3.1-8b-instruct": { | |
| max_tokens: 131072, | |
| max_input_tokens: 131072, | |
| max_output_tokens: 131072, | |
| input_cost_per_token: 2e-7, | |
| output_cost_per_token: 2e-7, | |
| litellm_provider: "perplexity", | |
| mode: "chat", | |
| }, | |
| "perplexity/llama-3.1-sonar-huge-128k-online": { | |
| max_tokens: 127072, | |
| max_input_tokens: 127072, | |
| max_output_tokens: 127072, | |
| input_cost_per_token: 5e-6, | |
| output_cost_per_token: 5e-6, | |
| litellm_provider: "perplexity", | |
| mode: "chat", | |
| }, | |
| "perplexity/llama-3.1-sonar-large-128k-online": { | |
| max_tokens: 127072, | |
| max_input_tokens: 127072, | |
| max_output_tokens: 127072, | |
| input_cost_per_token: 1e-6, | |
| output_cost_per_token: 1e-6, | |
| litellm_provider: "perplexity", | |
| mode: "chat", | |
| }, | |
| "perplexity/llama-3.1-sonar-large-128k-chat": { | |
| max_tokens: 131072, | |
| max_input_tokens: 131072, | |
| max_output_tokens: 131072, | |
| input_cost_per_token: 1e-6, | |
| output_cost_per_token: 1e-6, | |
| litellm_provider: "perplexity", | |
| mode: "chat", | |
| }, | |
| "perplexity/llama-3.1-sonar-small-128k-chat": { | |
| max_tokens: 131072, | |
| max_input_tokens: 131072, | |
| max_output_tokens: 131072, | |
| input_cost_per_token: 2e-7, | |
| output_cost_per_token: 2e-7, | |
| litellm_provider: "perplexity", | |
| mode: "chat", | |
| }, | |
| "perplexity/llama-3.1-sonar-small-128k-online": { | |
| max_tokens: 127072, | |
| max_input_tokens: 127072, | |
| max_output_tokens: 127072, | |
| input_cost_per_token: 2e-7, | |
| output_cost_per_token: 2e-7, | |
| litellm_provider: "perplexity", | |
| mode: "chat", | |
| }, | |
| "perplexity/pplx-7b-chat": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 7e-8, | |
| output_cost_per_token: 2.8e-7, | |
| litellm_provider: "perplexity", | |
| mode: "chat", | |
| }, | |
| "perplexity/pplx-70b-chat": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 7e-7, | |
| output_cost_per_token: 2.8e-6, | |
| litellm_provider: "perplexity", | |
| mode: "chat", | |
| }, | |
| "perplexity/pplx-7b-online": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 2.8e-7, | |
| input_cost_per_request: 0.005, | |
| litellm_provider: "perplexity", | |
| mode: "chat", | |
| }, | |
| "perplexity/pplx-70b-online": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 2.8e-6, | |
| input_cost_per_request: 0.005, | |
| litellm_provider: "perplexity", | |
| mode: "chat", | |
| }, | |
| "perplexity/llama-2-70b-chat": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 7e-7, | |
| output_cost_per_token: 2.8e-6, | |
| litellm_provider: "perplexity", | |
| mode: "chat", | |
| }, | |
| "perplexity/mistral-7b-instruct": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 7e-8, | |
| output_cost_per_token: 2.8e-7, | |
| litellm_provider: "perplexity", | |
| mode: "chat", | |
| }, | |
| "perplexity/mixtral-8x7b-instruct": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 7e-8, | |
| output_cost_per_token: 2.8e-7, | |
| litellm_provider: "perplexity", | |
| mode: "chat", | |
| }, | |
| "perplexity/sonar-small-chat": { | |
| max_tokens: 16384, | |
| max_input_tokens: 16384, | |
| max_output_tokens: 16384, | |
| input_cost_per_token: 7e-8, | |
| output_cost_per_token: 2.8e-7, | |
| litellm_provider: "perplexity", | |
| mode: "chat", | |
| }, | |
| "perplexity/sonar-small-online": { | |
| max_tokens: 12000, | |
| max_input_tokens: 12000, | |
| max_output_tokens: 12000, | |
| input_cost_per_token: 0, | |
| output_cost_per_token: 2.8e-7, | |
| input_cost_per_request: 0.005, | |
| litellm_provider: "perplexity", | |
| mode: "chat", | |
| }, | |
| "perplexity/sonar-medium-chat": { | |
| max_tokens: 16384, | |
| max_input_tokens: 16384, | |
| max_output_tokens: 16384, | |
| input_cost_per_token: 6e-7, | |
| output_cost_per_token: 1.8e-6, | |
| litellm_provider: "perplexity", | |
| mode: "chat", | |
| }, | |
| "perplexity/sonar-medium-online": { | |
| max_tokens: 12000, | |
| max_input_tokens: 12000, | |
| max_output_tokens: 12000, | |
| input_cost_per_token: 0, | |
| output_cost_per_token: 1.8e-6, | |
| input_cost_per_request: 0.005, | |
| litellm_provider: "perplexity", | |
| mode: "chat", | |
| }, | |
| "fireworks_ai/accounts/fireworks/models/llama-v3p2-1b-instruct": { | |
| max_tokens: 16384, | |
| max_input_tokens: 16384, | |
| max_output_tokens: 16384, | |
| input_cost_per_token: 1e-7, | |
| output_cost_per_token: 1e-7, | |
| litellm_provider: "fireworks_ai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_response_schema: true, | |
| source: "https://fireworks.ai/pricing", | |
| }, | |
| "fireworks_ai/accounts/fireworks/models/llama-v3p2-3b-instruct": { | |
| max_tokens: 16384, | |
| max_input_tokens: 16384, | |
| max_output_tokens: 16384, | |
| input_cost_per_token: 1e-7, | |
| output_cost_per_token: 1e-7, | |
| litellm_provider: "fireworks_ai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_response_schema: true, | |
| source: "https://fireworks.ai/pricing", | |
| }, | |
| "fireworks_ai/accounts/fireworks/models/llama-v3p2-11b-vision-instruct": { | |
| max_tokens: 16384, | |
| max_input_tokens: 16384, | |
| max_output_tokens: 16384, | |
| input_cost_per_token: 2e-7, | |
| output_cost_per_token: 2e-7, | |
| litellm_provider: "fireworks_ai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_response_schema: true, | |
| source: "https://fireworks.ai/pricing", | |
| }, | |
| "accounts/fireworks/models/llama-v3p2-90b-vision-instruct": { | |
| max_tokens: 16384, | |
| max_input_tokens: 16384, | |
| max_output_tokens: 16384, | |
| input_cost_per_token: 9e-7, | |
| output_cost_per_token: 9e-7, | |
| litellm_provider: "fireworks_ai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_response_schema: true, | |
| source: "https://fireworks.ai/pricing", | |
| }, | |
| "fireworks_ai/accounts/fireworks/models/firefunction-v2": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 9e-7, | |
| output_cost_per_token: 9e-7, | |
| litellm_provider: "fireworks_ai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_response_schema: true, | |
| source: "https://fireworks.ai/pricing", | |
| }, | |
| "fireworks_ai/accounts/fireworks/models/mixtral-8x22b-instruct-hf": { | |
| max_tokens: 65536, | |
| max_input_tokens: 65536, | |
| max_output_tokens: 65536, | |
| input_cost_per_token: 1.2e-6, | |
| output_cost_per_token: 1.2e-6, | |
| litellm_provider: "fireworks_ai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_response_schema: true, | |
| source: "https://fireworks.ai/pricing", | |
| }, | |
| "fireworks_ai/accounts/fireworks/models/qwen2-72b-instruct": { | |
| max_tokens: 32768, | |
| max_input_tokens: 32768, | |
| max_output_tokens: 32768, | |
| input_cost_per_token: 9e-7, | |
| output_cost_per_token: 9e-7, | |
| litellm_provider: "fireworks_ai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_response_schema: true, | |
| source: "https://fireworks.ai/pricing", | |
| }, | |
| "fireworks_ai/accounts/fireworks/models/yi-large": { | |
| max_tokens: 32768, | |
| max_input_tokens: 32768, | |
| max_output_tokens: 32768, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 3e-6, | |
| litellm_provider: "fireworks_ai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_response_schema: true, | |
| source: "https://fireworks.ai/pricing", | |
| }, | |
| "fireworks_ai/accounts/fireworks/models/deepseek-coder-v2-instruct": { | |
| max_tokens: 65536, | |
| max_input_tokens: 65536, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 1.2e-6, | |
| output_cost_per_token: 1.2e-6, | |
| litellm_provider: "fireworks_ai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_response_schema: true, | |
| source: "https://fireworks.ai/pricing", | |
| }, | |
| "fireworks_ai/nomic-ai/nomic-embed-text-v1.5": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| input_cost_per_token: 8e-9, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "fireworks_ai-embedding-models", | |
| mode: "embedding", | |
| source: "https://fireworks.ai/pricing", | |
| }, | |
| "fireworks_ai/nomic-ai/nomic-embed-text-v1": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| input_cost_per_token: 8e-9, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "fireworks_ai-embedding-models", | |
| mode: "embedding", | |
| source: "https://fireworks.ai/pricing", | |
| }, | |
| "fireworks_ai/WhereIsAI/UAE-Large-V1": { | |
| max_tokens: 512, | |
| max_input_tokens: 512, | |
| input_cost_per_token: 1.6e-8, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "fireworks_ai-embedding-models", | |
| mode: "embedding", | |
| source: "https://fireworks.ai/pricing", | |
| }, | |
| "fireworks_ai/thenlper/gte-large": { | |
| max_tokens: 512, | |
| max_input_tokens: 512, | |
| input_cost_per_token: 1.6e-8, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "fireworks_ai-embedding-models", | |
| mode: "embedding", | |
| source: "https://fireworks.ai/pricing", | |
| }, | |
| "fireworks_ai/thenlper/gte-base": { | |
| max_tokens: 512, | |
| max_input_tokens: 512, | |
| input_cost_per_token: 8e-9, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "fireworks_ai-embedding-models", | |
| mode: "embedding", | |
| source: "https://fireworks.ai/pricing", | |
| }, | |
| "fireworks-ai-up-to-16b": { | |
| input_cost_per_token: 2e-7, | |
| output_cost_per_token: 2e-7, | |
| litellm_provider: "fireworks_ai", | |
| }, | |
| "fireworks-ai-16.1b-to-80b": { | |
| input_cost_per_token: 9e-7, | |
| output_cost_per_token: 9e-7, | |
| litellm_provider: "fireworks_ai", | |
| }, | |
| "fireworks-ai-moe-up-to-56b": { | |
| input_cost_per_token: 5e-7, | |
| output_cost_per_token: 5e-7, | |
| litellm_provider: "fireworks_ai", | |
| }, | |
| "fireworks-ai-56b-to-176b": { | |
| input_cost_per_token: 1.2e-6, | |
| output_cost_per_token: 1.2e-6, | |
| litellm_provider: "fireworks_ai", | |
| }, | |
| "fireworks-ai-default": { | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "fireworks_ai", | |
| }, | |
| "fireworks-ai-embedding-up-to-150m": { | |
| input_cost_per_token: 8e-9, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "fireworks_ai-embedding-models", | |
| }, | |
| "fireworks-ai-embedding-150m-to-350m": { | |
| input_cost_per_token: 1.6e-8, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "fireworks_ai-embedding-models", | |
| }, | |
| "anyscale/mistralai/Mistral-7B-Instruct-v0.1": { | |
| max_tokens: 16384, | |
| max_input_tokens: 16384, | |
| max_output_tokens: 16384, | |
| input_cost_per_token: 1.5e-7, | |
| output_cost_per_token: 1.5e-7, | |
| litellm_provider: "anyscale", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| source: | |
| "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/mistralai-Mistral-7B-Instruct-v0.1", | |
| }, | |
| "anyscale/mistralai/Mixtral-8x7B-Instruct-v0.1": { | |
| max_tokens: 16384, | |
| max_input_tokens: 16384, | |
| max_output_tokens: 16384, | |
| input_cost_per_token: 1.5e-7, | |
| output_cost_per_token: 1.5e-7, | |
| litellm_provider: "anyscale", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| source: | |
| "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/mistralai-Mixtral-8x7B-Instruct-v0.1", | |
| }, | |
| "anyscale/mistralai/Mixtral-8x22B-Instruct-v0.1": { | |
| max_tokens: 65536, | |
| max_input_tokens: 65536, | |
| max_output_tokens: 65536, | |
| input_cost_per_token: 9e-7, | |
| output_cost_per_token: 9e-7, | |
| litellm_provider: "anyscale", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| source: | |
| "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/mistralai-Mixtral-8x22B-Instruct-v0.1", | |
| }, | |
| "anyscale/HuggingFaceH4/zephyr-7b-beta": { | |
| max_tokens: 16384, | |
| max_input_tokens: 16384, | |
| max_output_tokens: 16384, | |
| input_cost_per_token: 1.5e-7, | |
| output_cost_per_token: 1.5e-7, | |
| litellm_provider: "anyscale", | |
| mode: "chat", | |
| }, | |
| "anyscale/google/gemma-7b-it": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 1.5e-7, | |
| output_cost_per_token: 1.5e-7, | |
| litellm_provider: "anyscale", | |
| mode: "chat", | |
| source: | |
| "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/google-gemma-7b-it", | |
| }, | |
| "anyscale/meta-llama/Llama-2-7b-chat-hf": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.5e-7, | |
| output_cost_per_token: 1.5e-7, | |
| litellm_provider: "anyscale", | |
| mode: "chat", | |
| }, | |
| "anyscale/meta-llama/Llama-2-13b-chat-hf": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 2.5e-7, | |
| output_cost_per_token: 2.5e-7, | |
| litellm_provider: "anyscale", | |
| mode: "chat", | |
| }, | |
| "anyscale/meta-llama/Llama-2-70b-chat-hf": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1e-6, | |
| output_cost_per_token: 1e-6, | |
| litellm_provider: "anyscale", | |
| mode: "chat", | |
| }, | |
| "anyscale/codellama/CodeLlama-34b-Instruct-hf": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1e-6, | |
| output_cost_per_token: 1e-6, | |
| litellm_provider: "anyscale", | |
| mode: "chat", | |
| }, | |
| "anyscale/codellama/CodeLlama-70b-Instruct-hf": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1e-6, | |
| output_cost_per_token: 1e-6, | |
| litellm_provider: "anyscale", | |
| mode: "chat", | |
| source: | |
| "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/codellama-CodeLlama-70b-Instruct-hf", | |
| }, | |
| "anyscale/meta-llama/Meta-Llama-3-8B-Instruct": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 1.5e-7, | |
| output_cost_per_token: 1.5e-7, | |
| litellm_provider: "anyscale", | |
| mode: "chat", | |
| source: | |
| "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/meta-llama-Meta-Llama-3-8B-Instruct", | |
| }, | |
| "anyscale/meta-llama/Meta-Llama-3-70B-Instruct": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 1e-6, | |
| output_cost_per_token: 1e-6, | |
| litellm_provider: "anyscale", | |
| mode: "chat", | |
| source: | |
| "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/meta-llama-Meta-Llama-3-70B-Instruct", | |
| }, | |
| "cloudflare/@cf/meta/llama-2-7b-chat-fp16": { | |
| max_tokens: 3072, | |
| max_input_tokens: 3072, | |
| max_output_tokens: 3072, | |
| input_cost_per_token: 1.923e-6, | |
| output_cost_per_token: 1.923e-6, | |
| litellm_provider: "cloudflare", | |
| mode: "chat", | |
| }, | |
| "cloudflare/@cf/meta/llama-2-7b-chat-int8": { | |
| max_tokens: 2048, | |
| max_input_tokens: 2048, | |
| max_output_tokens: 2048, | |
| input_cost_per_token: 1.923e-6, | |
| output_cost_per_token: 1.923e-6, | |
| litellm_provider: "cloudflare", | |
| mode: "chat", | |
| }, | |
| "cloudflare/@cf/mistral/mistral-7b-instruct-v0.1": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 1.923e-6, | |
| output_cost_per_token: 1.923e-6, | |
| litellm_provider: "cloudflare", | |
| mode: "chat", | |
| }, | |
| "cloudflare/@hf/thebloke/codellama-7b-instruct-awq": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.923e-6, | |
| output_cost_per_token: 1.923e-6, | |
| litellm_provider: "cloudflare", | |
| mode: "chat", | |
| }, | |
| "voyage/voyage-01": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| input_cost_per_token: 1e-7, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "voyage", | |
| mode: "embedding", | |
| }, | |
| "voyage/voyage-lite-01": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| input_cost_per_token: 1e-7, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "voyage", | |
| mode: "embedding", | |
| }, | |
| "voyage/voyage-large-2": { | |
| max_tokens: 16000, | |
| max_input_tokens: 16000, | |
| input_cost_per_token: 1.2e-7, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "voyage", | |
| mode: "embedding", | |
| }, | |
| "voyage/voyage-law-2": { | |
| max_tokens: 16000, | |
| max_input_tokens: 16000, | |
| input_cost_per_token: 1.2e-7, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "voyage", | |
| mode: "embedding", | |
| }, | |
| "voyage/voyage-code-2": { | |
| max_tokens: 16000, | |
| max_input_tokens: 16000, | |
| input_cost_per_token: 1.2e-7, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "voyage", | |
| mode: "embedding", | |
| }, | |
| "voyage/voyage-2": { | |
| max_tokens: 4000, | |
| max_input_tokens: 4000, | |
| input_cost_per_token: 1e-7, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "voyage", | |
| mode: "embedding", | |
| }, | |
| "voyage/voyage-lite-02-instruct": { | |
| max_tokens: 4000, | |
| max_input_tokens: 4000, | |
| input_cost_per_token: 1e-7, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "voyage", | |
| mode: "embedding", | |
| }, | |
| "voyage/voyage-finance-2": { | |
| max_tokens: 32000, | |
| max_input_tokens: 32000, | |
| input_cost_per_token: 1.2e-7, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "voyage", | |
| mode: "embedding", | |
| }, | |
| "databricks/databricks-meta-llama-3-1-405b-instruct": { | |
| max_tokens: 128000, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 128000, | |
| input_cost_per_token: 5e-6, | |
| input_dbu_cost_per_token: 7.1429e-5, | |
| output_cost_per_token: 1.500002e-5, | |
| output_db_cost_per_token: 0.000214286, | |
| litellm_provider: "databricks", | |
| mode: "chat", | |
| source: | |
| "https://www.databricks.com/product/pricing/foundation-model-serving", | |
| metadata: { | |
| notes: | |
| "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation.", | |
| }, | |
| }, | |
| "databricks/databricks-meta-llama-3-1-70b-instruct": { | |
| max_tokens: 128000, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 128000, | |
| input_cost_per_token: 1.00002e-6, | |
| input_dbu_cost_per_token: 1.4286e-5, | |
| output_cost_per_token: 2.99999e-6, | |
| output_dbu_cost_per_token: 4.2857e-5, | |
| litellm_provider: "databricks", | |
| mode: "chat", | |
| source: | |
| "https://www.databricks.com/product/pricing/foundation-model-serving", | |
| metadata: { | |
| notes: | |
| "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation.", | |
| }, | |
| }, | |
| "databricks/databricks-dbrx-instruct": { | |
| max_tokens: 32768, | |
| max_input_tokens: 32768, | |
| max_output_tokens: 32768, | |
| input_cost_per_token: 7.4998e-7, | |
| input_dbu_cost_per_token: 1.0714e-5, | |
| output_cost_per_token: 2.24901e-6, | |
| output_dbu_cost_per_token: 3.2143e-5, | |
| litellm_provider: "databricks", | |
| mode: "chat", | |
| source: | |
| "https://www.databricks.com/product/pricing/foundation-model-serving", | |
| metadata: { | |
| notes: | |
| "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation.", | |
| }, | |
| }, | |
| "databricks/databricks-meta-llama-3-70b-instruct": { | |
| max_tokens: 128000, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 128000, | |
| input_cost_per_token: 1.00002e-6, | |
| input_dbu_cost_per_token: 1.4286e-5, | |
| output_cost_per_token: 2.99999e-6, | |
| output_dbu_cost_per_token: 4.2857e-5, | |
| litellm_provider: "databricks", | |
| mode: "chat", | |
| source: | |
| "https://www.databricks.com/product/pricing/foundation-model-serving", | |
| metadata: { | |
| notes: | |
| "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation.", | |
| }, | |
| }, | |
| "databricks/databricks-llama-2-70b-chat": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 5.0001e-7, | |
| input_dbu_cost_per_token: 7.143e-6, | |
| output_cost_per_token: 1.5e-6, | |
| output_dbu_cost_per_token: 2.1429e-5, | |
| litellm_provider: "databricks", | |
| mode: "chat", | |
| source: | |
| "https://www.databricks.com/product/pricing/foundation-model-serving", | |
| metadata: { | |
| notes: | |
| "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation.", | |
| }, | |
| }, | |
| "databricks/databricks-mixtral-8x7b-instruct": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 5.0001e-7, | |
| input_dbu_cost_per_token: 7.143e-6, | |
| output_cost_per_token: 9.9902e-7, | |
| output_dbu_cost_per_token: 1.4286e-5, | |
| litellm_provider: "databricks", | |
| mode: "chat", | |
| source: | |
| "https://www.databricks.com/product/pricing/foundation-model-serving", | |
| metadata: { | |
| notes: | |
| "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation.", | |
| }, | |
| }, | |
| "databricks/databricks-mpt-30b-instruct": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 9.9902e-7, | |
| input_dbu_cost_per_token: 1.4286e-5, | |
| output_cost_per_token: 9.9902e-7, | |
| output_dbu_cost_per_token: 1.4286e-5, | |
| litellm_provider: "databricks", | |
| mode: "chat", | |
| source: | |
| "https://www.databricks.com/product/pricing/foundation-model-serving", | |
| metadata: { | |
| notes: | |
| "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation.", | |
| }, | |
| }, | |
| "databricks/databricks-mpt-7b-instruct": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 5.0001e-7, | |
| input_dbu_cost_per_token: 7.143e-6, | |
| output_cost_per_token: 0.0, | |
| output_dbu_cost_per_token: 0.0, | |
| litellm_provider: "databricks", | |
| mode: "chat", | |
| source: | |
| "https://www.databricks.com/product/pricing/foundation-model-serving", | |
| metadata: { | |
| notes: | |
| "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation.", | |
| }, | |
| }, | |
| "databricks/databricks-bge-large-en": { | |
| max_tokens: 512, | |
| max_input_tokens: 512, | |
| output_vector_size: 1024, | |
| input_cost_per_token: 1.0003e-7, | |
| input_dbu_cost_per_token: 1.429e-6, | |
| output_cost_per_token: 0.0, | |
| output_dbu_cost_per_token: 0.0, | |
| litellm_provider: "databricks", | |
| mode: "embedding", | |
| source: | |
| "https://www.databricks.com/product/pricing/foundation-model-serving", | |
| metadata: { | |
| notes: | |
| "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation.", | |
| }, | |
| }, | |
| "databricks/databricks-gte-large-en": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| output_vector_size: 1024, | |
| input_cost_per_token: 1.2999e-7, | |
| input_dbu_cost_per_token: 1.857e-6, | |
| output_cost_per_token: 0.0, | |
| output_dbu_cost_per_token: 0.0, | |
| litellm_provider: "databricks", | |
| mode: "embedding", | |
| source: | |
| "https://www.databricks.com/product/pricing/foundation-model-serving", | |
| metadata: { | |
| notes: | |
| "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation.", | |
| }, | |
| }, | |
| "azure/gpt-4o-mini-2024-07-18": { | |
| max_tokens: 16384, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 16384, | |
| input_cost_per_token: 1.65e-7, | |
| output_cost_per_token: 6.6e-7, | |
| cache_read_input_token_cost: 7.5e-8, | |
| litellm_provider: "azure", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_response_schema: true, | |
| supports_vision: true, | |
| supports_prompt_caching: true, | |
| }, | |
| "amazon.titan-embed-image-v1": { | |
| max_tokens: 128, | |
| max_input_tokens: 128, | |
| output_vector_size: 1024, | |
| input_cost_per_token: 8e-7, | |
| input_cost_per_image: 6e-5, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "bedrock", | |
| supports_image_input: true, | |
| supports_embedding_image_input: true, | |
| mode: "embedding", | |
| source: | |
| "https://us-east-1.console.aws.amazon.com/bedrock/home?region=us-east-1#/providers?model=amazon.titan-image-generator-v1", | |
| metadata: { | |
| notes: | |
| "'supports_image_input' is a deprecated field. Use 'supports_embedding_image_input' instead.", | |
| }, | |
| }, | |
| "azure_ai/mistral-large-2407": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 2e-6, | |
| output_cost_per_token: 6e-6, | |
| litellm_provider: "azure_ai", | |
| supports_function_calling: true, | |
| mode: "chat", | |
| source: | |
| "https://azuremarketplace.microsoft.com/en/marketplace/apps/000-000.mistral-ai-large-2407-offer?tab=Overview", | |
| }, | |
| "azure_ai/ministral-3b": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 4e-8, | |
| output_cost_per_token: 4e-8, | |
| litellm_provider: "azure_ai", | |
| supports_function_calling: true, | |
| mode: "chat", | |
| source: | |
| "https://azuremarketplace.microsoft.com/en/marketplace/apps/000-000.ministral-3b-2410-offer?tab=Overview", | |
| }, | |
| "azure_ai/Llama-3.2-11B-Vision-Instruct": { | |
| max_tokens: 2048, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 2048, | |
| input_cost_per_token: 3.7e-7, | |
| output_cost_per_token: 3.7e-7, | |
| litellm_provider: "azure_ai", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| mode: "chat", | |
| source: | |
| "https://azuremarketplace.microsoft.com/en/marketplace/apps/metagenai.meta-llama-3-2-11b-vision-instruct-offer?tab=Overview", | |
| }, | |
| "azure_ai/Llama-3.2-90B-Vision-Instruct": { | |
| max_tokens: 2048, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 2048, | |
| input_cost_per_token: 2.04e-6, | |
| output_cost_per_token: 2.04e-6, | |
| litellm_provider: "azure_ai", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| mode: "chat", | |
| source: | |
| "https://azuremarketplace.microsoft.com/en/marketplace/apps/metagenai.meta-llama-3-2-90b-vision-instruct-offer?tab=Overview", | |
| }, | |
| "azure_ai/Phi-3.5-mini-instruct": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.3e-7, | |
| output_cost_per_token: 5.2e-7, | |
| litellm_provider: "azure_ai", | |
| mode: "chat", | |
| supports_vision: false, | |
| source: "https://azure.microsoft.com/en-us/pricing/details/phi-3/", | |
| }, | |
| "azure_ai/Phi-3.5-vision-instruct": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.3e-7, | |
| output_cost_per_token: 5.2e-7, | |
| litellm_provider: "azure_ai", | |
| mode: "chat", | |
| supports_vision: true, | |
| source: "https://azure.microsoft.com/en-us/pricing/details/phi-3/", | |
| }, | |
| "azure_ai/Phi-3.5-MoE-instruct": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.6e-7, | |
| output_cost_per_token: 6.4e-7, | |
| litellm_provider: "azure_ai", | |
| mode: "chat", | |
| supports_vision: false, | |
| source: "https://azure.microsoft.com/en-us/pricing/details/phi-3/", | |
| }, | |
| "azure_ai/Phi-3-mini-4k-instruct": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.3e-7, | |
| output_cost_per_token: 5.2e-7, | |
| litellm_provider: "azure_ai", | |
| mode: "chat", | |
| supports_vision: false, | |
| source: "https://azure.microsoft.com/en-us/pricing/details/phi-3/", | |
| }, | |
| "azure_ai/Phi-3-mini-128k-instruct": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.3e-7, | |
| output_cost_per_token: 5.2e-7, | |
| litellm_provider: "azure_ai", | |
| mode: "chat", | |
| supports_vision: false, | |
| source: "https://azure.microsoft.com/en-us/pricing/details/phi-3/", | |
| }, | |
| "azure_ai/Phi-3-small-8k-instruct": { | |
| max_tokens: 4096, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.5e-7, | |
| output_cost_per_token: 6e-7, | |
| litellm_provider: "azure_ai", | |
| mode: "chat", | |
| supports_vision: false, | |
| source: "https://azure.microsoft.com/en-us/pricing/details/phi-3/", | |
| }, | |
| "azure_ai/Phi-3-small-128k-instruct": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.5e-7, | |
| output_cost_per_token: 6e-7, | |
| litellm_provider: "azure_ai", | |
| mode: "chat", | |
| supports_vision: false, | |
| source: "https://azure.microsoft.com/en-us/pricing/details/phi-3/", | |
| }, | |
| "azure_ai/Phi-3-medium-4k-instruct": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.7e-7, | |
| output_cost_per_token: 6.8e-7, | |
| litellm_provider: "azure_ai", | |
| mode: "chat", | |
| supports_vision: false, | |
| source: "https://azure.microsoft.com/en-us/pricing/details/phi-3/", | |
| }, | |
| "azure_ai/Phi-3-medium-128k-instruct": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.7e-7, | |
| output_cost_per_token: 6.8e-7, | |
| litellm_provider: "azure_ai", | |
| mode: "chat", | |
| supports_vision: false, | |
| source: "https://azure.microsoft.com/en-us/pricing/details/phi-3/", | |
| }, | |
| "xai/grok-beta": { | |
| max_tokens: 131072, | |
| max_input_tokens: 131072, | |
| max_output_tokens: 131072, | |
| input_cost_per_token: 5e-6, | |
| output_cost_per_token: 1.5e-5, | |
| litellm_provider: "xai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| }, | |
| "claude-3-5-haiku-20241022": { | |
| max_tokens: 8192, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 1e-6, | |
| output_cost_per_token: 5e-6, | |
| cache_creation_input_token_cost: 1.25e-6, | |
| cache_read_input_token_cost: 1e-7, | |
| litellm_provider: "anthropic", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| tool_use_system_prompt_tokens: 264, | |
| supports_assistant_prefill: true, | |
| supports_prompt_caching: true, | |
| supports_response_schema: true, | |
| }, | |
| "vertex_ai/claude-3-5-haiku@20241022": { | |
| max_tokens: 8192, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 1e-6, | |
| output_cost_per_token: 5e-6, | |
| litellm_provider: "vertex_ai-anthropic_models", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_assistant_prefill: true, | |
| }, | |
| "openrouter/anthropic/claude-3-5-haiku": { | |
| max_tokens: 200000, | |
| input_cost_per_token: 1e-6, | |
| output_cost_per_token: 5e-6, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| }, | |
| "openrouter/anthropic/claude-3-5-haiku-20241022": { | |
| max_tokens: 8192, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 1e-6, | |
| output_cost_per_token: 5e-6, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| tool_use_system_prompt_tokens: 264, | |
| }, | |
| "anthropic.claude-3-5-haiku-20241022-v1:0": { | |
| max_tokens: 8192, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 1e-6, | |
| output_cost_per_token: 5e-6, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_assistant_prefill: true, | |
| supports_function_calling: true, | |
| supports_prompt_caching: true, | |
| }, | |
| "us.anthropic.claude-3-5-haiku-20241022-v1:0": { | |
| max_tokens: 8192, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 1e-6, | |
| output_cost_per_token: 5e-6, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_assistant_prefill: true, | |
| supports_function_calling: true, | |
| }, | |
| "eu.anthropic.claude-3-5-haiku-20241022-v1:0": { | |
| max_tokens: 8192, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 1e-6, | |
| output_cost_per_token: 5e-6, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| }, | |
| "stability.sd3-large-v1:0": { | |
| max_tokens: 77, | |
| max_input_tokens: 77, | |
| output_cost_per_image: 0.08, | |
| litellm_provider: "bedrock", | |
| mode: "image_generation", | |
| }, | |
| "gpt-4o-2024-11-20": { | |
| max_tokens: 16384, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 16384, | |
| input_cost_per_token: 2.5e-6, | |
| output_cost_per_token: 1e-5, | |
| input_cost_per_token_batches: 1.25e-6, | |
| output_cost_per_token_batches: 5e-6, | |
| cache_read_input_token_cost: 1.25e-6, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_response_schema: true, | |
| supports_vision: true, | |
| supports_prompt_caching: true, | |
| supports_system_messages: true, | |
| }, | |
| "ft:gpt-4o-2024-11-20": { | |
| max_tokens: 16384, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 16384, | |
| input_cost_per_token: 3.75e-6, | |
| cache_creation_input_token_cost: 1.875e-6, | |
| output_cost_per_token: 1.5e-5, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_response_schema: true, | |
| supports_vision: true, | |
| supports_prompt_caching: true, | |
| supports_system_messages: true, | |
| }, | |
| "azure/gpt-4o-2024-11-20": { | |
| max_tokens: 16384, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 16384, | |
| input_cost_per_token: 2.75e-6, | |
| output_cost_per_token: 1.1e-5, | |
| litellm_provider: "azure", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_response_schema: true, | |
| supports_vision: true, | |
| }, | |
| "azure/global-standard/gpt-4o-2024-11-20": { | |
| max_tokens: 16384, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 16384, | |
| input_cost_per_token: 2.5e-6, | |
| output_cost_per_token: 1e-5, | |
| litellm_provider: "azure", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_response_schema: true, | |
| supports_vision: true, | |
| }, | |
| "groq/llama-3.2-1b-preview": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 4e-8, | |
| output_cost_per_token: 4e-8, | |
| litellm_provider: "groq", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_response_schema: true, | |
| }, | |
| "groq/llama-3.2-3b-preview": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 6e-8, | |
| output_cost_per_token: 6e-8, | |
| litellm_provider: "groq", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_response_schema: true, | |
| }, | |
| "groq/llama-3.2-11b-text-preview": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 1.8e-7, | |
| output_cost_per_token: 1.8e-7, | |
| litellm_provider: "groq", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_response_schema: true, | |
| }, | |
| "groq/llama-3.2-11b-vision-preview": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 1.8e-7, | |
| output_cost_per_token: 1.8e-7, | |
| litellm_provider: "groq", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_response_schema: true, | |
| supports_vision: true, | |
| }, | |
| "groq/llama-3.2-90b-text-preview": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 9e-7, | |
| output_cost_per_token: 9e-7, | |
| litellm_provider: "groq", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_response_schema: true, | |
| }, | |
| "groq/llama-3.2-90b-vision-preview": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 9e-7, | |
| output_cost_per_token: 9e-7, | |
| litellm_provider: "groq", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_response_schema: true, | |
| supports_vision: true, | |
| }, | |
| "vertex_ai/claude-3-sonnet": { | |
| max_tokens: 4096, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 1.5e-5, | |
| litellm_provider: "vertex_ai-anthropic_models", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_assistant_prefill: true, | |
| }, | |
| "vertex_ai/claude-3-5-sonnet": { | |
| max_tokens: 8192, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 1.5e-5, | |
| litellm_provider: "vertex_ai-anthropic_models", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_assistant_prefill: true, | |
| }, | |
| "vertex_ai/claude-3-5-sonnet-v2": { | |
| max_tokens: 8192, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 3e-6, | |
| output_cost_per_token: 1.5e-5, | |
| litellm_provider: "vertex_ai-anthropic_models", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_assistant_prefill: true, | |
| }, | |
| "vertex_ai/claude-3-haiku": { | |
| max_tokens: 4096, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 2.5e-7, | |
| output_cost_per_token: 1.25e-6, | |
| litellm_provider: "vertex_ai-anthropic_models", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_assistant_prefill: true, | |
| }, | |
| "vertex_ai/claude-3-5-haiku": { | |
| max_tokens: 8192, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 1e-6, | |
| output_cost_per_token: 5e-6, | |
| litellm_provider: "vertex_ai-anthropic_models", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_assistant_prefill: true, | |
| }, | |
| "vertex_ai/claude-3-opus": { | |
| max_tokens: 4096, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.5e-5, | |
| output_cost_per_token: 7.5e-5, | |
| litellm_provider: "vertex_ai-anthropic_models", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_assistant_prefill: true, | |
| }, | |
| "gemini/gemini-exp-1114": { | |
| max_tokens: 8192, | |
| max_input_tokens: 1048576, | |
| max_output_tokens: 8192, | |
| max_images_per_prompt: 3000, | |
| max_videos_per_prompt: 10, | |
| max_video_length: 1, | |
| max_audio_length_hours: 8.4, | |
| max_audio_per_prompt: 1, | |
| max_pdf_size_mb: 30, | |
| input_cost_per_token: 0, | |
| input_cost_per_token_above_128k_tokens: 0, | |
| output_cost_per_token: 0, | |
| output_cost_per_token_above_128k_tokens: 0, | |
| litellm_provider: "gemini", | |
| mode: "chat", | |
| supports_system_messages: true, | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_response_schema: true, | |
| tpm: 4000000, | |
| rpm: 1000, | |
| source: "https://ai.google.dev/pricing", | |
| metadata: { | |
| notes: | |
| "Rate limits not documented for gemini-exp-1114. Assuming same as gemini-1.5-pro.", | |
| }, | |
| }, | |
| "openrouter/qwen/qwen-2.5-coder-32b-instruct": { | |
| max_tokens: 33792, | |
| max_input_tokens: 33792, | |
| max_output_tokens: 33792, | |
| input_cost_per_token: 1.8e-7, | |
| output_cost_per_token: 1.8e-7, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| }, | |
| "us.meta.llama3-1-8b-instruct-v1:0": { | |
| max_tokens: 128000, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 2048, | |
| input_cost_per_token: 2.2e-7, | |
| output_cost_per_token: 2.2e-7, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_tool_choice: false, | |
| }, | |
| "us.meta.llama3-1-70b-instruct-v1:0": { | |
| max_tokens: 128000, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 2048, | |
| input_cost_per_token: 9.9e-7, | |
| output_cost_per_token: 9.9e-7, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_tool_choice: false, | |
| }, | |
| "us.meta.llama3-1-405b-instruct-v1:0": { | |
| max_tokens: 128000, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 5.32e-6, | |
| output_cost_per_token: 1.6e-5, | |
| litellm_provider: "bedrock", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_tool_choice: false, | |
| }, | |
| "stability.stable-image-ultra-v1:0": { | |
| max_tokens: 77, | |
| max_input_tokens: 77, | |
| output_cost_per_image: 0.14, | |
| litellm_provider: "bedrock", | |
| mode: "image_generation", | |
| }, | |
| "fireworks_ai/accounts/fireworks/models/qwen2p5-coder-32b-instruct": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 9e-7, | |
| output_cost_per_token: 9e-7, | |
| litellm_provider: "fireworks_ai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_response_schema: true, | |
| source: "https://fireworks.ai/pricing", | |
| }, | |
| "omni-moderation-latest": { | |
| max_tokens: 32768, | |
| max_input_tokens: 32768, | |
| max_output_tokens: 0, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "openai", | |
| mode: "moderation", | |
| }, | |
| "omni-moderation-latest-intents": { | |
| max_tokens: 32768, | |
| max_input_tokens: 32768, | |
| max_output_tokens: 0, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "openai", | |
| mode: "moderation", | |
| }, | |
| "omni-moderation-2024-09-26": { | |
| max_tokens: 32768, | |
| max_input_tokens: 32768, | |
| max_output_tokens: 0, | |
| input_cost_per_token: 0.0, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "openai", | |
| mode: "moderation", | |
| }, | |
| "gpt-4o-audio-preview-2024-12-17": { | |
| max_tokens: 16384, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 16384, | |
| input_cost_per_token: 2.5e-6, | |
| input_cost_per_audio_token: 4e-5, | |
| output_cost_per_token: 1e-5, | |
| output_cost_per_audio_token: 8e-5, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_audio_input: true, | |
| supports_audio_output: true, | |
| supports_system_messages: true, | |
| }, | |
| "gpt-4o-mini-audio-preview-2024-12-17": { | |
| max_tokens: 16384, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 16384, | |
| input_cost_per_token: 1.5e-7, | |
| input_cost_per_audio_token: 1e-5, | |
| output_cost_per_token: 6e-7, | |
| output_cost_per_audio_token: 2e-5, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_audio_input: true, | |
| supports_audio_output: true, | |
| supports_system_messages: true, | |
| }, | |
| o1: { | |
| max_tokens: 100000, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 100000, | |
| input_cost_per_token: 1.5e-5, | |
| output_cost_per_token: 6e-5, | |
| cache_read_input_token_cost: 7.5e-6, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_vision: true, | |
| supports_prompt_caching: true, | |
| supports_system_messages: true, | |
| supports_response_schema: true, | |
| }, | |
| "o1-2024-12-17": { | |
| max_tokens: 100000, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 100000, | |
| input_cost_per_token: 1.5e-5, | |
| output_cost_per_token: 6e-5, | |
| cache_read_input_token_cost: 7.5e-6, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_vision: true, | |
| supports_prompt_caching: true, | |
| supports_system_messages: true, | |
| supports_response_schema: true, | |
| }, | |
| "gpt-4o-realtime-preview-2024-10-01": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 5e-6, | |
| input_cost_per_audio_token: 0.0001, | |
| cache_read_input_token_cost: 2.5e-6, | |
| cache_creation_input_audio_token_cost: 2e-5, | |
| output_cost_per_token: 2e-5, | |
| output_cost_per_audio_token: 0.0002, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_audio_input: true, | |
| supports_audio_output: true, | |
| supports_system_messages: true, | |
| }, | |
| "gpt-4o-realtime-preview": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 5e-6, | |
| input_cost_per_audio_token: 4e-5, | |
| cache_read_input_token_cost: 2.5e-6, | |
| output_cost_per_token: 2e-5, | |
| output_cost_per_audio_token: 8e-5, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_audio_input: true, | |
| supports_audio_output: true, | |
| supports_system_messages: true, | |
| }, | |
| "gpt-4o-realtime-preview-2024-12-17": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 5e-6, | |
| input_cost_per_audio_token: 4e-5, | |
| cache_read_input_token_cost: 2.5e-6, | |
| output_cost_per_token: 2e-5, | |
| output_cost_per_audio_token: 8e-5, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_audio_input: true, | |
| supports_audio_output: true, | |
| supports_system_messages: true, | |
| }, | |
| "gpt-4o-mini-realtime-preview": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 6e-7, | |
| input_cost_per_audio_token: 1e-5, | |
| cache_read_input_token_cost: 3e-7, | |
| cache_creation_input_audio_token_cost: 3e-7, | |
| output_cost_per_token: 2.4e-6, | |
| output_cost_per_audio_token: 2e-5, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_audio_input: true, | |
| supports_audio_output: true, | |
| supports_system_messages: true, | |
| }, | |
| "gpt-4o-mini-realtime-preview-2024-12-17": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 6e-7, | |
| input_cost_per_audio_token: 1e-5, | |
| cache_read_input_token_cost: 3e-7, | |
| cache_creation_input_audio_token_cost: 3e-7, | |
| output_cost_per_token: 2.4e-6, | |
| output_cost_per_audio_token: 2e-5, | |
| litellm_provider: "openai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_audio_input: true, | |
| supports_audio_output: true, | |
| supports_system_messages: true, | |
| }, | |
| "azure/o1": { | |
| max_tokens: 100000, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 100000, | |
| input_cost_per_token: 1.5e-5, | |
| output_cost_per_token: 6e-5, | |
| cache_read_input_token_cost: 7.5e-6, | |
| litellm_provider: "azure", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_vision: true, | |
| supports_prompt_caching: true, | |
| }, | |
| "azure_ai/Llama-3.3-70B-Instruct": { | |
| max_tokens: 2048, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 2048, | |
| input_cost_per_token: 7.1e-7, | |
| output_cost_per_token: 7.1e-7, | |
| litellm_provider: "azure_ai", | |
| supports_function_calling: true, | |
| mode: "chat", | |
| source: | |
| "https://azuremarketplace.microsoft.com/en/marketplace/apps/metagenai.llama-3-3-70b-instruct-offer?tab=Overview", | |
| }, | |
| "mistral/mistral-large-2411": { | |
| max_tokens: 128000, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 128000, | |
| input_cost_per_token: 2e-6, | |
| output_cost_per_token: 6e-6, | |
| litellm_provider: "mistral", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_assistant_prefill: true, | |
| }, | |
| "mistral/pixtral-large-latest": { | |
| max_tokens: 128000, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 128000, | |
| input_cost_per_token: 2e-6, | |
| output_cost_per_token: 6e-6, | |
| litellm_provider: "mistral", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_assistant_prefill: true, | |
| supports_vision: true, | |
| }, | |
| "mistral/pixtral-large-2411": { | |
| max_tokens: 128000, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 128000, | |
| input_cost_per_token: 2e-6, | |
| output_cost_per_token: 6e-6, | |
| litellm_provider: "mistral", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_assistant_prefill: true, | |
| supports_vision: true, | |
| }, | |
| "deepseek/deepseek-chat": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.4e-7, | |
| input_cost_per_token_cache_hit: 1.4e-8, | |
| cache_read_input_token_cost: 1.4e-8, | |
| cache_creation_input_token_cost: 0.0, | |
| output_cost_per_token: 2.8e-7, | |
| litellm_provider: "deepseek", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_assistant_prefill: true, | |
| supports_tool_choice: true, | |
| supports_prompt_caching: true, | |
| }, | |
| "deepseek/deepseek-coder": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.4e-7, | |
| input_cost_per_token_cache_hit: 1.4e-8, | |
| output_cost_per_token: 2.8e-7, | |
| litellm_provider: "deepseek", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_assistant_prefill: true, | |
| supports_tool_choice: true, | |
| supports_prompt_caching: true, | |
| }, | |
| "groq/llama-3.3-70b-versatile": { | |
| max_tokens: 8192, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 5.9e-7, | |
| output_cost_per_token: 7.9e-7, | |
| litellm_provider: "groq", | |
| mode: "chat", | |
| }, | |
| "groq/llama-3.3-70b-specdec": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 5.9e-7, | |
| output_cost_per_token: 9.9e-7, | |
| litellm_provider: "groq", | |
| mode: "chat", | |
| }, | |
| "friendliai/meta-llama-3.1-8b-instruct": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 1e-7, | |
| output_cost_per_token: 1e-7, | |
| litellm_provider: "friendliai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_system_messages: true, | |
| supports_response_schema: true, | |
| }, | |
| "friendliai/meta-llama-3.1-70b-instruct": { | |
| max_tokens: 8192, | |
| max_input_tokens: 8192, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 6e-7, | |
| output_cost_per_token: 6e-7, | |
| litellm_provider: "friendliai", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_system_messages: true, | |
| supports_response_schema: true, | |
| }, | |
| "gemini-2.0-flash-exp": { | |
| max_tokens: 8192, | |
| max_input_tokens: 1048576, | |
| max_output_tokens: 8192, | |
| max_images_per_prompt: 3000, | |
| max_videos_per_prompt: 10, | |
| max_video_length: 1, | |
| max_audio_length_hours: 8.4, | |
| max_audio_per_prompt: 1, | |
| max_pdf_size_mb: 30, | |
| input_cost_per_image: 0, | |
| input_cost_per_video_per_second: 0, | |
| input_cost_per_audio_per_second: 0, | |
| input_cost_per_token: 0, | |
| input_cost_per_character: 0, | |
| input_cost_per_token_above_128k_tokens: 0, | |
| input_cost_per_character_above_128k_tokens: 0, | |
| input_cost_per_image_above_128k_tokens: 0, | |
| input_cost_per_video_per_second_above_128k_tokens: 0, | |
| input_cost_per_audio_per_second_above_128k_tokens: 0, | |
| output_cost_per_token: 0, | |
| output_cost_per_character: 0, | |
| output_cost_per_token_above_128k_tokens: 0, | |
| output_cost_per_character_above_128k_tokens: 0, | |
| litellm_provider: "vertex_ai-language-models", | |
| mode: "chat", | |
| supports_system_messages: true, | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_response_schema: true, | |
| supports_audio_output: true, | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", | |
| }, | |
| "gemini/gemini-2.0-flash-exp": { | |
| max_tokens: 8192, | |
| max_input_tokens: 1048576, | |
| max_output_tokens: 8192, | |
| max_images_per_prompt: 3000, | |
| max_videos_per_prompt: 10, | |
| max_video_length: 1, | |
| max_audio_length_hours: 8.4, | |
| max_audio_per_prompt: 1, | |
| max_pdf_size_mb: 30, | |
| input_cost_per_image: 0, | |
| input_cost_per_video_per_second: 0, | |
| input_cost_per_audio_per_second: 0, | |
| input_cost_per_token: 0, | |
| input_cost_per_character: 0, | |
| input_cost_per_token_above_128k_tokens: 0, | |
| input_cost_per_character_above_128k_tokens: 0, | |
| input_cost_per_image_above_128k_tokens: 0, | |
| input_cost_per_video_per_second_above_128k_tokens: 0, | |
| input_cost_per_audio_per_second_above_128k_tokens: 0, | |
| output_cost_per_token: 0, | |
| output_cost_per_character: 0, | |
| output_cost_per_token_above_128k_tokens: 0, | |
| output_cost_per_character_above_128k_tokens: 0, | |
| litellm_provider: "gemini", | |
| mode: "chat", | |
| supports_system_messages: true, | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_response_schema: true, | |
| supports_audio_output: true, | |
| tpm: 4000000, | |
| rpm: 10, | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", | |
| }, | |
| "vertex_ai/mistral-large@2411-001": { | |
| max_tokens: 8191, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 2e-6, | |
| output_cost_per_token: 6e-6, | |
| litellm_provider: "vertex_ai-mistral_models", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| }, | |
| "vertex_ai/mistral-large-2411": { | |
| max_tokens: 8191, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 8191, | |
| input_cost_per_token: 2e-6, | |
| output_cost_per_token: 6e-6, | |
| litellm_provider: "vertex_ai-mistral_models", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| }, | |
| "text-embedding-005": { | |
| max_tokens: 2048, | |
| max_input_tokens: 2048, | |
| output_vector_size: 768, | |
| input_cost_per_character: 2.5e-8, | |
| input_cost_per_token: 1e-7, | |
| output_cost_per_token: 0, | |
| litellm_provider: "vertex_ai-embedding-models", | |
| mode: "embedding", | |
| source: | |
| "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models", | |
| }, | |
| "gemini/gemini-1.5-flash-8b": { | |
| max_tokens: 8192, | |
| max_input_tokens: 1048576, | |
| max_output_tokens: 8192, | |
| max_images_per_prompt: 3000, | |
| max_videos_per_prompt: 10, | |
| max_video_length: 1, | |
| max_audio_length_hours: 8.4, | |
| max_audio_per_prompt: 1, | |
| max_pdf_size_mb: 30, | |
| input_cost_per_token: 0, | |
| input_cost_per_token_above_128k_tokens: 0, | |
| output_cost_per_token: 0, | |
| output_cost_per_token_above_128k_tokens: 0, | |
| litellm_provider: "gemini", | |
| mode: "chat", | |
| supports_system_messages: true, | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_response_schema: true, | |
| supports_prompt_caching: true, | |
| tpm: 4000000, | |
| rpm: 4000, | |
| source: "https://ai.google.dev/pricing", | |
| }, | |
| "gemini/gemini-exp-1206": { | |
| max_tokens: 8192, | |
| max_input_tokens: 2097152, | |
| max_output_tokens: 8192, | |
| max_images_per_prompt: 3000, | |
| max_videos_per_prompt: 10, | |
| max_video_length: 1, | |
| max_audio_length_hours: 8.4, | |
| max_audio_per_prompt: 1, | |
| max_pdf_size_mb: 30, | |
| input_cost_per_token: 0, | |
| input_cost_per_token_above_128k_tokens: 0, | |
| output_cost_per_token: 0, | |
| output_cost_per_token_above_128k_tokens: 0, | |
| litellm_provider: "gemini", | |
| mode: "chat", | |
| supports_system_messages: true, | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_response_schema: true, | |
| tpm: 4000000, | |
| rpm: 1000, | |
| source: "https://ai.google.dev/pricing", | |
| metadata: { | |
| notes: | |
| "Rate limits not documented for gemini-exp-1206. Assuming same as gemini-1.5-pro.", | |
| }, | |
| }, | |
| "command-r7b-12-2024": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.5e-7, | |
| output_cost_per_token: 3.75e-8, | |
| litellm_provider: "cohere_chat", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| source: "https://docs.cohere.com/v2/docs/command-r7b", | |
| }, | |
| "rerank-v3.5": { | |
| max_tokens: 4096, | |
| max_input_tokens: 4096, | |
| max_output_tokens: 4096, | |
| max_query_tokens: 2048, | |
| input_cost_per_token: 0.0, | |
| input_cost_per_query: 0.002, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "cohere", | |
| mode: "rerank", | |
| }, | |
| "openrouter/deepseek/deepseek-chat": { | |
| max_tokens: 8192, | |
| max_input_tokens: 66000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 1.4e-7, | |
| output_cost_per_token: 2.8e-7, | |
| litellm_provider: "openrouter", | |
| supports_prompt_caching: true, | |
| mode: "chat", | |
| }, | |
| "openrouter/openai/o1": { | |
| max_tokens: 100000, | |
| max_input_tokens: 200000, | |
| max_output_tokens: 100000, | |
| input_cost_per_token: 1.5e-5, | |
| output_cost_per_token: 6e-5, | |
| cache_read_input_token_cost: 7.5e-6, | |
| litellm_provider: "openrouter", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_vision: true, | |
| supports_prompt_caching: true, | |
| supports_system_messages: true, | |
| supports_response_schema: true, | |
| }, | |
| "amazon.nova-micro-v1:0": { | |
| max_tokens: 4096, | |
| max_input_tokens: 300000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 3.5e-8, | |
| output_cost_per_token: 1.4e-7, | |
| litellm_provider: "bedrock_converse", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_prompt_caching: true, | |
| }, | |
| "amazon.nova-lite-v1:0": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 6e-8, | |
| output_cost_per_token: 2.4e-7, | |
| litellm_provider: "bedrock_converse", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_pdf_input: true, | |
| supports_prompt_caching: true, | |
| }, | |
| "amazon.nova-pro-v1:0": { | |
| max_tokens: 4096, | |
| max_input_tokens: 300000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 8e-7, | |
| output_cost_per_token: 3.2e-6, | |
| litellm_provider: "bedrock_converse", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_vision: true, | |
| supports_pdf_input: true, | |
| supports_prompt_caching: true, | |
| }, | |
| "meta.llama3-3-70b-instruct-v1:0": { | |
| max_tokens: 4096, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 4096, | |
| input_cost_per_token: 7.2e-7, | |
| output_cost_per_token: 7.2e-7, | |
| litellm_provider: "bedrock_converse", | |
| mode: "chat", | |
| }, | |
| "together_ai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": { | |
| input_cost_per_token: 1.8e-7, | |
| output_cost_per_token: 1.8e-7, | |
| litellm_provider: "together_ai", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_response_schema: true, | |
| mode: "chat", | |
| }, | |
| "together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": { | |
| input_cost_per_token: 8.8e-7, | |
| output_cost_per_token: 8.8e-7, | |
| litellm_provider: "together_ai", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| supports_response_schema: true, | |
| mode: "chat", | |
| }, | |
| "together_ai/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": { | |
| input_cost_per_token: 3.5e-6, | |
| output_cost_per_token: 3.5e-6, | |
| litellm_provider: "together_ai", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| mode: "chat", | |
| }, | |
| "deepinfra/meta-llama/Meta-Llama-3.1-405B-Instruct": { | |
| max_tokens: 32768, | |
| max_input_tokens: 32768, | |
| max_output_tokens: 32768, | |
| input_cost_per_token: 9e-7, | |
| output_cost_per_token: 9e-7, | |
| litellm_provider: "deepinfra", | |
| mode: "chat", | |
| supports_function_calling: true, | |
| supports_parallel_function_calling: true, | |
| }, | |
| "fireworks_ai/accounts/fireworks/models/deepseek-v3": { | |
| max_tokens: 8192, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 8192, | |
| input_cost_per_token: 9e-7, | |
| output_cost_per_token: 9e-7, | |
| litellm_provider: "fireworks_ai", | |
| mode: "chat", | |
| supports_response_schema: true, | |
| source: "https://fireworks.ai/pricing", | |
| }, | |
| "voyage/voyage-3-large": { | |
| max_tokens: 32000, | |
| max_input_tokens: 32000, | |
| input_cost_per_token: 1.8e-7, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "voyage", | |
| mode: "embedding", | |
| }, | |
| "voyage/voyage-3": { | |
| max_tokens: 32000, | |
| max_input_tokens: 32000, | |
| input_cost_per_token: 6e-8, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "voyage", | |
| mode: "embedding", | |
| }, | |
| "voyage/voyage-3-lite": { | |
| max_tokens: 32000, | |
| max_input_tokens: 32000, | |
| input_cost_per_token: 2e-8, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "voyage", | |
| mode: "embedding", | |
| }, | |
| "voyage/voyage-code-3": { | |
| max_tokens: 32000, | |
| max_input_tokens: 32000, | |
| input_cost_per_token: 1.8e-7, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "voyage", | |
| mode: "embedding", | |
| }, | |
| "voyage/voyage-multimodal-3": { | |
| max_tokens: 32000, | |
| max_input_tokens: 32000, | |
| input_cost_per_token: 1.2e-7, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "voyage", | |
| mode: "embedding", | |
| }, | |
| "voyage/rerank-2": { | |
| max_tokens: 16000, | |
| max_input_tokens: 16000, | |
| max_output_tokens: 16000, | |
| max_query_tokens: 16000, | |
| input_cost_per_token: 5e-8, | |
| input_cost_per_query: 5e-8, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "voyage", | |
| mode: "rerank", | |
| }, | |
| "voyage/rerank-2-lite": { | |
| max_tokens: 8000, | |
| max_input_tokens: 8000, | |
| max_output_tokens: 8000, | |
| max_query_tokens: 8000, | |
| input_cost_per_token: 2e-8, | |
| input_cost_per_query: 2e-8, | |
| output_cost_per_token: 0.0, | |
| litellm_provider: "voyage", | |
| mode: "rerank", | |
| }, | |
| "databricks/meta-llama-3.3-70b-instruct": { | |
| max_tokens: 128000, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 128000, | |
| input_cost_per_token: 1.00002e-6, | |
| input_dbu_cost_per_token: 1.4286e-5, | |
| output_cost_per_token: 2.99999e-6, | |
| output_dbu_cost_per_token: 4.2857e-5, | |
| litellm_provider: "databricks", | |
| mode: "chat", | |
| source: | |
| "https://www.databricks.com/product/pricing/foundation-model-serving", | |
| metadata: { | |
| notes: | |
| "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation.", | |
| }, | |
| }, | |
| "sambanova/Meta-Llama-3.1-8B-Instruct": { | |
| max_tokens: 16000, | |
| max_input_tokens: 16000, | |
| max_output_tokens: 16000, | |
| input_cost_per_token: 1e-7, | |
| output_cost_per_token: 2e-7, | |
| litellm_provider: "sambanova", | |
| supports_function_calling: true, | |
| mode: "chat", | |
| }, | |
| "sambanova/Meta-Llama-3.1-70B-Instruct": { | |
| max_tokens: 128000, | |
| max_input_tokens: 128000, | |
| max_output_tokens: 128000, | |
| input_cost_per_token: 6e-7, | |
| output_cost_per_token: 1.2e-6, | |
| litellm_provider: "sambanova", | |
| supports_function_calling: true, | |
| mode: "chat", | |
| }, | |
| "sambanova/Meta-Llama-3.1-405B-Instruct": { | |
| max_tokens: 16000, | |
| max_input_tokens: 16000, | |
| max_output_tokens: 16000, | |
| input_cost_per_token: 5e-6, | |
| output_cost_per_token: 1e-5, | |
| litellm_provider: "sambanova", | |
| supports_function_calling: true, | |
| mode: "chat", | |
| }, | |
| "sambanova/Meta-Llama-3.2-1B-Instruct": { | |
| max_tokens: 16000, | |
| max_input_tokens: 16000, | |
| max_output_tokens: 16000, | |
| input_cost_per_token: 4e-7, | |
| output_cost_per_token: 8e-7, | |
| litellm_provider: "sambanova", | |
| supports_function_calling: true, | |
| mode: "chat", | |
| }, | |
| "sambanova/Meta-Llama-3.2-3B-Instruct": { | |
| max_tokens: 4000, | |
| max_input_tokens: 4000, | |
| max_output_tokens: 4000, | |
| input_cost_per_token: 8e-7, | |
| output_cost_per_token: 1.6e-6, | |
| litellm_provider: "sambanova", | |
| supports_function_calling: true, | |
| mode: "chat", | |
| }, | |
| "sambanova/Qwen2.5-Coder-32B-Instruct": { | |
| max_tokens: 8000, | |
| max_input_tokens: 8000, | |
| max_output_tokens: 8000, | |
| input_cost_per_token: 1.5e-6, | |
| output_cost_per_token: 3e-6, | |
| litellm_provider: "sambanova", | |
| supports_function_calling: true, | |
| mode: "chat", | |
| }, | |
| "sambanova/Qwen2.5-72B-Instruct": { | |
| max_tokens: 8000, | |
| max_input_tokens: 8000, | |
| max_output_tokens: 8000, | |
| input_cost_per_token: 2e-6, | |
| output_cost_per_token: 4e-6, | |
| litellm_provider: "sambanova", | |
| supports_function_calling: true, | |
| mode: "chat", | |
| }, | |
| }; | |