| use serde::{Deserialize, Serialize}; |
| use std::collections::HashMap; |
| use once_cell::sync::Lazy; |
| use crate::proxy::token_manager::ProxyToken; |
|
|
| #[derive(Debug, Clone, Serialize, Deserialize)] |
| pub struct ModelSpec { |
| pub max_output_tokens: Option<u64>, |
| pub thinking_budget: Option<u64>, |
| pub is_thinking: Option<bool>, |
| } |
|
|
| #[derive(Debug, Clone, Serialize, Deserialize)] |
| struct SpecsConfig { |
| models: HashMap<String, ModelSpec>, |
| aliases: HashMap<String, String>, |
| } |
|
|
| static SPECS: Lazy<SpecsConfig> = Lazy::new(|| { |
| let json_str = include_str!("../../resources/model_specs.json"); |
| serde_json::from_str(json_str).expect("Failed to parse model_specs.json") |
| }); |
|
|
| |
| pub fn resolve_alias(model_id: &str) -> String { |
| SPECS.aliases.get(model_id).cloned().unwrap_or_else(|| model_id.to_string()) |
| } |
|
|
| |
| pub fn get_max_output_tokens(model_id: &str, token: Option<&ProxyToken>) -> u64 { |
| let std_id = resolve_alias(model_id); |
| |
| |
| if let Some(t) = token { |
| if let Some(&limit) = t.model_limits.get(&std_id) { |
| return limit; |
| } |
| |
| if let Some(&limit) = t.model_limits.get(model_id) { |
| return limit; |
| } |
| } |
| |
| |
| if let Some(spec) = SPECS.models.get(&std_id) { |
| if let Some(limit) = spec.max_output_tokens { |
| return limit; |
| } |
| } |
|
|
| |
| 65535 |
| } |
|
|
| |
| pub fn get_thinking_budget(model_id: &str, _token: Option<&ProxyToken>) -> u64 { |
| let std_id = resolve_alias(model_id); |
| |
| |
| |
| |
| |
| |
| if let Some(spec) = SPECS.models.get(&std_id) { |
| if let Some(budget) = spec.thinking_budget { |
| return budget; |
| } |
| } |
|
|
| |
| 24576 |
| } |
|
|
| |
| #[allow(dead_code)] |
| pub fn is_thinking_model(model_id: &str) -> bool { |
| let std_id = resolve_alias(model_id); |
| if let Some(spec) = SPECS.models.get(&std_id) { |
| return spec.is_thinking.unwrap_or(false); |
| } |
| model_id.contains("-thinking") || model_id.contains("thinking") |
| } |
|
|