Spaces:

ResearchEngineering
/

AGI

Building

AGI / cpp /request_parsing.cpp

Dmitry Beresnev

Refactor the C++ LLM manager into modular components, moves Python modules under python/, and keeps the current control-plane behavior intact. The C++ server now has clearer separation for config, model lifecycle, runtime services, request parsing, HTTP helpers, and server routing, while Docker build/runtime paths were updated to compile multiple C++ files and load Python code from the new package folder

332826f about 19 hours ago

raw

history blame contribute delete

2.88 kB

	#include "request_parsing.h"

	#include <algorithm>
	#include <sstream>

	static int estimate_text_tokens_rough(const std::string &text) {
	if (text.empty()) return 0;
	return std::max(1, static_cast<int>((text.size() + 3) / 4));
	}

	static std::string flatten_json_content(const json &content) {
	if (content.is_string()) {
	return content.get<std::string>();
	}
	if (content.is_array()) {
	std::ostringstream oss;
	bool first = true;
	for (const auto &item : content) {
	std::string part;
	if (item.is_string()) {
	part = item.get<std::string>();
	} else if (item.is_object() && item.contains("text") && item["text"].is_string()) {
	part = item["text"].get<std::string>();
	}
	if (part.empty()) continue;
	if (!first) oss << '\n';
	oss << part;
	first = false;
	}
	return oss.str();
	}
	return "";
	}

	std::optional<TokenEstimate> estimate_chat_tokens(
	const json &payload,
	const LimitsConfig &limits,
	std::string &error) {
	if (!payload.is_object()) {
	error = "Expected JSON object";
	return std::nullopt;
	}
	if (!payload.contains("messages") \|\| !payload["messages"].is_array()) {
	error = "Expected 'messages' array";
	return std::nullopt;
	}

	TokenEstimate estimate;
	estimate.requested_max_tokens = limits.default_max_tokens;
	if (payload.contains("max_tokens")) {
	if (!payload["max_tokens"].is_number_integer()) {
	error = "Expected integer 'max_tokens'";
	return std::nullopt;
	}
	estimate.requested_max_tokens = payload["max_tokens"].get<int>();
	}

	if (estimate.requested_max_tokens <= 0) {
	error = "'max_tokens' must be > 0";
	return std::nullopt;
	}

	for (const auto &message : payload["messages"]) {
	if (!message.is_object()) continue;
	if (message.contains("role") && message["role"].is_string()) {
	estimate.prompt_tokens += estimate_text_tokens_rough(message["role"].get<std::string>());
	}
	if (message.contains("content")) {
	estimate.prompt_tokens += estimate_text_tokens_rough(flatten_json_content(message["content"]));
	}
	estimate.prompt_tokens += 4;
	}

	estimate.estimated_total_tokens = estimate.prompt_tokens + estimate.requested_max_tokens;
	if (estimate.estimated_total_tokens > limits.max_tokens_per_request) {
	error = "Estimated request tokens exceed configured limit";
	return std::nullopt;
	}

	error.clear();
	return estimate;
	}

	bool request_stream_enabled(const json &payload) {
	if (!payload.is_object() \|\| !payload.contains("stream")) return false;
	return payload["stream"].is_boolean() && payload["stream"].get<bool>();
	}