Spaces:
Running
Running
File size: 2,972 Bytes
332826f 3634ca6 332826f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 | #include "request_parsing.h"
#include <algorithm>
#include <sstream>
static int estimate_text_tokens_rough(const std::string &text) {
if (text.empty()) return 0;
return std::max(1, static_cast<int>((text.size() + 3) / 4));
}
static std::string flatten_json_content(const json &content) {
if (content.is_string()) {
return content.get<std::string>();
}
if (content.is_array()) {
std::ostringstream oss;
bool first = true;
for (const auto &item : content) {
std::string part;
if (item.is_string()) {
part = item.get<std::string>();
} else if (item.is_object() && item.contains("text") && item["text"].is_string()) {
part = item["text"].get<std::string>();
}
if (part.empty()) continue;
if (!first) oss << '\n';
oss << part;
first = false;
}
return oss.str();
}
return "";
}
std::optional<TokenEstimate> estimate_chat_tokens(
const json &payload,
const LimitsConfig &limits,
std::string &error) {
if (!payload.is_object()) {
error = "Expected JSON object";
return std::nullopt;
}
if (!payload.contains("messages") || !payload["messages"].is_array()) {
error = "Expected 'messages' array";
return std::nullopt;
}
TokenEstimate estimate;
estimate.requested_max_tokens = limits.default_max_tokens;
if (payload.contains("max_tokens")) {
if (!payload["max_tokens"].is_number_integer()) {
error = "Expected integer 'max_tokens'";
return std::nullopt;
}
estimate.requested_max_tokens = payload["max_tokens"].get<int>();
}
if (estimate.requested_max_tokens <= 0) {
// -1 (and other non-positive values) is a sentinel meaning "unlimited" — use the default
estimate.requested_max_tokens = limits.default_max_tokens;
}
for (const auto &message : payload["messages"]) {
if (!message.is_object()) continue;
if (message.contains("role") && message["role"].is_string()) {
estimate.prompt_tokens += estimate_text_tokens_rough(message["role"].get<std::string>());
}
if (message.contains("content")) {
estimate.prompt_tokens += estimate_text_tokens_rough(flatten_json_content(message["content"]));
}
estimate.prompt_tokens += 4;
}
estimate.estimated_total_tokens = estimate.prompt_tokens + estimate.requested_max_tokens;
if (estimate.estimated_total_tokens > limits.max_tokens_per_request) {
error = "Estimated request tokens exceed configured limit";
return std::nullopt;
}
error.clear();
return estimate;
}
bool request_stream_enabled(const json &payload) {
if (!payload.is_object() || !payload.contains("stream")) return false;
return payload["stream"].is_boolean() && payload["stream"].get<bool>();
}
|