AGI / cpp /request_parsing.cpp
Dmitry Beresnev
Refactor the C++ LLM manager into modular components, moves Python modules under python/, and keeps the current control-plane behavior intact. The C++ server now has clearer separation for config, model lifecycle, runtime services, request parsing, HTTP helpers, and server routing, while Docker build/runtime paths were updated to compile multiple C++ files and load Python code from the new package folder
332826f
#include "request_parsing.h"
#include <algorithm>
#include <sstream>
static int estimate_text_tokens_rough(const std::string &text) {
if (text.empty()) return 0;
return std::max(1, static_cast<int>((text.size() + 3) / 4));
}
static std::string flatten_json_content(const json &content) {
if (content.is_string()) {
return content.get<std::string>();
}
if (content.is_array()) {
std::ostringstream oss;
bool first = true;
for (const auto &item : content) {
std::string part;
if (item.is_string()) {
part = item.get<std::string>();
} else if (item.is_object() && item.contains("text") && item["text"].is_string()) {
part = item["text"].get<std::string>();
}
if (part.empty()) continue;
if (!first) oss << '\n';
oss << part;
first = false;
}
return oss.str();
}
return "";
}
std::optional<TokenEstimate> estimate_chat_tokens(
const json &payload,
const LimitsConfig &limits,
std::string &error) {
if (!payload.is_object()) {
error = "Expected JSON object";
return std::nullopt;
}
if (!payload.contains("messages") || !payload["messages"].is_array()) {
error = "Expected 'messages' array";
return std::nullopt;
}
TokenEstimate estimate;
estimate.requested_max_tokens = limits.default_max_tokens;
if (payload.contains("max_tokens")) {
if (!payload["max_tokens"].is_number_integer()) {
error = "Expected integer 'max_tokens'";
return std::nullopt;
}
estimate.requested_max_tokens = payload["max_tokens"].get<int>();
}
if (estimate.requested_max_tokens <= 0) {
error = "'max_tokens' must be > 0";
return std::nullopt;
}
for (const auto &message : payload["messages"]) {
if (!message.is_object()) continue;
if (message.contains("role") && message["role"].is_string()) {
estimate.prompt_tokens += estimate_text_tokens_rough(message["role"].get<std::string>());
}
if (message.contains("content")) {
estimate.prompt_tokens += estimate_text_tokens_rough(flatten_json_content(message["content"]));
}
estimate.prompt_tokens += 4;
}
estimate.estimated_total_tokens = estimate.prompt_tokens + estimate.requested_max_tokens;
if (estimate.estimated_total_tokens > limits.max_tokens_per_request) {
error = "Estimated request tokens exceed configured limit";
return std::nullopt;
}
error.clear();
return estimate;
}
bool request_stream_enabled(const json &payload) {
if (!payload.is_object() || !payload.contains("stream")) return false;
return payload["stream"].is_boolean() && payload["stream"].get<bool>();
}