#include "request_parsing.h" #include #include static int estimate_text_tokens_rough(const std::string &text) { if (text.empty()) return 0; return std::max(1, static_cast((text.size() + 3) / 4)); } static std::string flatten_json_content(const json &content) { if (content.is_string()) { return content.get(); } if (content.is_array()) { std::ostringstream oss; bool first = true; for (const auto &item : content) { std::string part; if (item.is_string()) { part = item.get(); } else if (item.is_object() && item.contains("text") && item["text"].is_string()) { part = item["text"].get(); } if (part.empty()) continue; if (!first) oss << '\n'; oss << part; first = false; } return oss.str(); } return ""; } std::optional estimate_chat_tokens( const json &payload, const LimitsConfig &limits, std::string &error) { if (!payload.is_object()) { error = "Expected JSON object"; return std::nullopt; } if (!payload.contains("messages") || !payload["messages"].is_array()) { error = "Expected 'messages' array"; return std::nullopt; } TokenEstimate estimate; estimate.requested_max_tokens = limits.default_max_tokens; if (payload.contains("max_tokens")) { if (!payload["max_tokens"].is_number_integer()) { error = "Expected integer 'max_tokens'"; return std::nullopt; } estimate.requested_max_tokens = payload["max_tokens"].get(); } if (estimate.requested_max_tokens <= 0) { // -1 (and other non-positive values) is a sentinel meaning "unlimited" — use the default estimate.requested_max_tokens = limits.default_max_tokens; } for (const auto &message : payload["messages"]) { if (!message.is_object()) continue; if (message.contains("role") && message["role"].is_string()) { estimate.prompt_tokens += estimate_text_tokens_rough(message["role"].get()); } if (message.contains("content")) { estimate.prompt_tokens += estimate_text_tokens_rough(flatten_json_content(message["content"])); } estimate.prompt_tokens += 4; } estimate.estimated_total_tokens = estimate.prompt_tokens + estimate.requested_max_tokens; if (estimate.estimated_total_tokens > limits.max_tokens_per_request) { error = "Estimated request tokens exceed configured limit"; return std::nullopt; } error.clear(); return estimate; } bool request_stream_enabled(const json &payload) { if (!payload.is_object() || !payload.contains("stream")) return false; return payload["stream"].is_boolean() && payload["stream"].get(); }