File size: 6,584 Bytes
a21c316 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 | // Claude 辅助函数
// JSON Schema 清理、签名处理等
// 已移除未使用的 Value 导入
/// 将 JSON Schema 中的类型名称转为大写 (Gemini 要求)
/// 例如: "string" -> "STRING", "integer" -> "INTEGER"
// 已移除未使用的 uppercase_schema_types 函数
/// 根据模型名称获取上下文 Token 限制
pub fn get_context_limit_for_model(model: &str) -> u32 {
if model.contains("pro") {
2_097_152 // 2M for Pro
} else if model.contains("flash") {
1_048_576 // 1M for Flash
} else {
1_048_576 // Default 1M
}
}
pub fn to_claude_usage(usage_metadata: &super::models::UsageMetadata, scaling_enabled: bool, context_limit: u32) -> super::models::Usage {
let prompt_tokens = usage_metadata.prompt_token_count.unwrap_or(0);
let cached_tokens = usage_metadata.cached_content_token_count.unwrap_or(0);
// 【改进的智能阈值回归算法】
// 目标:既利用 Gemini 大窗口,又能在高用量时让 Claude Code 正确触发 compact 提示
//
// 分阶段策略:
// - 0-50%: 激进压缩,享受大上下文
// - 50-70%: 开始加速回升
// - 70-85%: 快速回升到显示 70%+
// - 85%+: 接近 1:1 显示,确保触发 Claude Code 的 compact 提示
let total_raw = prompt_tokens;
// [FIX] Restore low token threshold - don't scale if under 30k tokens
const SCALING_THRESHOLD: u32 = 30_000;
let scaled_total = if scaling_enabled && total_raw > SCALING_THRESHOLD {
const TARGET_MAX: f64 = 195_000.0; // 接近 Claude 的 200k 限制
let ratio = total_raw as f64 / context_limit as f64;
if ratio <= 0.5 {
// 阶段1 (0-50%): 激进压缩,享受大上下文
// 真实 50% → 显示 ~30%
let display_ratio = ratio * 0.6;
(display_ratio * TARGET_MAX) as u32
} else if ratio <= 0.7 {
// 阶段2 (50-70%): 开始加速回升
// 线性从 30% 回升到 50%
let progress = (ratio - 0.5) / 0.2;
let display_ratio = 0.3 + progress * 0.2;
(display_ratio * TARGET_MAX) as u32
} else if ratio <= 0.85 {
// 阶段3 (70-85%): 快速回升到显示 70%
// 这个阶段让用户开始注意到上下文在增长
let progress = (ratio - 0.7) / 0.15;
let display_ratio = 0.5 + progress * 0.2;
(display_ratio * TARGET_MAX) as u32
} else {
// 阶段4 (85%+): 接近 1:1 显示,触发 Claude Code 的 compact 提示
// 85% 真实 → 70% 显示
// 100% 真实 → 97% 显示
let progress = (ratio - 0.85) / 0.15;
let display_ratio = 0.7 + progress * 0.27;
(display_ratio.min(0.97) * TARGET_MAX) as u32
}
} else {
total_raw
};
// 【调试日志】方便手动验证
if scaling_enabled && total_raw > 30_000 {
let ratio = total_raw as f64 / context_limit as f64;
let display_ratio = scaled_total as f64 / 195_000.0;
tracing::debug!(
"[Claude-Scaling] Raw: {} ({:.1}%), Display: {} ({:.1}%), Compression: {:.1}x",
total_raw, ratio * 100.0, scaled_total, display_ratio * 100.0,
total_raw as f64 / scaled_total as f64
);
}
// 按比例分配缩放后的总量到 input 和 cache_read
let (reported_input, reported_cache) = if total_raw > 0 {
let cache_ratio = (cached_tokens as f64) / (total_raw as f64);
let sc_cache = (scaled_total as f64 * cache_ratio) as u32;
(scaled_total.saturating_sub(sc_cache), Some(sc_cache))
} else {
(scaled_total, None)
};
super::models::Usage {
input_tokens: reported_input,
output_tokens: usage_metadata.candidates_token_count.unwrap_or(0),
cache_read_input_tokens: reported_cache,
cache_creation_input_tokens: Some(0),
server_tool_use: None,
}
}
/// 提取 thoughtSignature
// 已移除未使用的 extract_thought_signature 函数
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_to_claude_usage() {
use super::super::models::UsageMetadata;
let usage = UsageMetadata {
prompt_token_count: Some(100),
candidates_token_count: Some(50),
total_token_count: Some(150),
cached_content_token_count: None,
};
let claude_usage = to_claude_usage(&usage, true, 1_000_000);
// 100 tokens is < 30k, minimal scaling
assert!(claude_usage.input_tokens < 200);
assert_eq!(claude_usage.output_tokens, 50);
// 测试 50% 负载 (500k) - 应该显示 ~30%
let usage_50 = UsageMetadata {
prompt_token_count: Some(500_000),
candidates_token_count: Some(10),
total_token_count: Some(500_010),
cached_content_token_count: None,
};
let res_50 = to_claude_usage(&usage_50, true, 1_000_000);
// 50% * 0.6 = 30% of 195k = 58,500
assert!(res_50.input_tokens > 55_000 && res_50.input_tokens < 62_000);
// 测试 70% 负载 (700k) - 应该显示 ~50%
let usage_70 = UsageMetadata {
prompt_token_count: Some(700_000),
candidates_token_count: Some(10),
total_token_count: Some(700_010),
cached_content_token_count: None,
};
let res_70 = to_claude_usage(&usage_70, true, 1_000_000);
// 50% of 195k = 97,500
assert!(res_70.input_tokens > 90_000 && res_70.input_tokens < 105_000);
// 测试 85% 负载 (850k) - 应该显示 ~70%
let usage_85 = UsageMetadata {
prompt_token_count: Some(850_000),
candidates_token_count: Some(10),
total_token_count: Some(850_010),
cached_content_token_count: None,
};
let res_85 = to_claude_usage(&usage_85, true, 1_000_000);
// 70% of 195k = 136,500
assert!(res_85.input_tokens > 130_000 && res_85.input_tokens < 145_000);
// 测试 100% 负载 (1M) - 应该显示 ~97%
let usage_100 = UsageMetadata {
prompt_token_count: Some(1_000_000),
candidates_token_count: Some(10),
total_token_count: Some(1_000_010),
cached_content_token_count: None,
};
let res_100 = to_claude_usage(&usage_100, true, 1_000_000);
// 97% of 195k = 189,150
assert!(res_100.input_tokens > 185_000 && res_100.input_tokens <= 190_000);
}
}
|