Spaces:

ResearchEngineering
/

AGI

Building

AGI / cpp /config.cpp

Dmitry Beresnev

Refactor the C++ LLM manager into modular components, moves Python modules under python/, and keeps the current control-plane behavior intact. The C++ server now has clearer separation for config, model lifecycle, runtime services, request parsing, HTTP helpers, and server routing, while Docker build/runtime paths were updated to compile multiple C++ files and load Python code from the new package folder

332826f about 19 hours ago

raw

history blame contribute delete

9.6 kB

	#include "config.h"

	#include "http_helpers.h"

	#include <algorithm>
	#include <cstdlib>
	#include <filesystem>
	#include <fstream>
	#include <unordered_map>

	std::string get_env_or(const char *name, const std::string &fallback) {
	const char *v = std::getenv(name);
	return (v && *v) ? std::string(v) : fallback;
	}

	int get_env_int_or(const char *name, int fallback) {
	const char *v = std::getenv(name);
	if (!v \|\| !*v) return fallback;
	try {
	return std::stoi(v);
	} catch (...) {
	return fallback;
	}
	}

	std::string trim_copy(const std::string &value) {
	const auto first = value.find_first_not_of(" \t\r\n");
	if (first == std::string::npos) return "";
	const auto last = value.find_last_not_of(" \t\r\n");
	return value.substr(first, last - first + 1);
	}

	std::string strip_quotes(const std::string &value) {
	if (value.size() >= 2) {
	const char first = value.front();
	const char last = value.back();
	if ((first == '"' && last == '"') \|\| (first == '\'' && last == '\'')) {
	return value.substr(1, value.size() - 2);
	}
	}
	return value;
	}

	bool parse_bool_or(const std::string &value, bool fallback) {
	const std::string normalized = trim_copy(value);
	if (normalized == "true") return true;
	if (normalized == "false") return false;
	return fallback;
	}

	Role parse_role_or(const std::string &value, Role fallback) {
	const std::string normalized = trim_copy(value);
	if (normalized == "admin" \|\| normalized == "ADMIN") return Role::ADMIN;
	if (normalized == "user" \|\| normalized == "USER") return Role::USER;
	return fallback;
	}

	static std::unordered_map<std::string, std::unordered_map<std::string, std::string>>
	parse_simple_toml(const std::string &path) {
	std::unordered_map<std::string, std::unordered_map<std::string, std::string>> out;
	std::ifstream input(path);
	if (!input.is_open()) return out;

	std::string current_section;
	std::string line;
	while (std::getline(input, line)) {
	auto hash = line.find('#');
	if (hash != std::string::npos) line = line.substr(0, hash);
	line = trim_copy(line);
	if (line.empty()) continue;

	if (line.front() == '[' && line.back() == ']') {
	current_section = trim_copy(line.substr(1, line.size() - 2));
	continue;
	}

	const auto eq = line.find('=');
	if (eq == std::string::npos) continue;

	std::string key = trim_copy(line.substr(0, eq));
	std::string value = strip_quotes(trim_copy(line.substr(eq + 1)));
	out[current_section][key] = value;
	}
	return out;
	}

	static std::vector<ApiKeyRecord> parse_api_keys_toml(const std::string &path) {
	std::vector<ApiKeyRecord> keys;
	std::ifstream input(path);
	if (!input.is_open()) return keys;

	std::string line;
	bool in_api_key = false;
	ApiKeyRecord current;
	bool has_any_field = false;

	auto flush_current = [&]() {
	if (has_any_field && !current.key_id.empty() && !current.secret.empty()) {
	keys.push_back(current);
	}
	current = ApiKeyRecord{};
	has_any_field = false;
	};

	while (std::getline(input, line)) {
	auto hash = line.find('#');
	if (hash != std::string::npos) line = line.substr(0, hash);
	line = trim_copy(line);
	if (line.empty()) continue;

	if (line == "[[api_keys]]") {
	flush_current();
	in_api_key = true;
	continue;
	}

	if (!in_api_key) continue;
	if (line.front() == '[' && line.back() == ']') {
	flush_current();
	in_api_key = false;
	continue;
	}

	const auto eq = line.find('=');
	if (eq == std::string::npos) continue;

	std::string key = trim_copy(line.substr(0, eq));
	std::string value = strip_quotes(trim_copy(line.substr(eq + 1)));
	has_any_field = true;

	if (key == "key_id") current.key_id = value;
	else if (key == "secret") current.secret = value;
	else if (key == "role") current.role = parse_role_or(value, current.role);
	else if (key == "enabled") current.enabled = parse_bool_or(value, current.enabled);
	}

	flush_current();
	return keys;
	}

	static std::string get_toml_string_or(
	const std::unordered_map<std::string, std::unordered_map<std::string, std::string>> &data,
	const std::string &section,
	const std::string &key,
	const std::string &fallback) {
	const auto it = data.find(section);
	if (it == data.end()) return fallback;
	const auto kv = it->second.find(key);
	if (kv == it->second.end() \|\| kv->second.empty()) return fallback;
	return kv->second;
	}

	static int get_toml_int_or(
	const std::unordered_map<std::string, std::unordered_map<std::string, std::string>> &data,
	const std::string &section,
	const std::string &key,
	int fallback) {
	const auto it = data.find(section);
	if (it == data.end()) return fallback;
	const auto kv = it->second.find(key);
	if (kv == it->second.end() \|\| kv->second.empty()) return fallback;
	try {
	return std::stoi(kv->second);
	} catch (...) {
	return fallback;
	}
	}

	ManagerConfig load_manager_config() {
	ManagerConfig cfg;

	const std::string config_path = get_env_or("MANAGER_CONFIG", "config.toml");
	std::unordered_map<std::string, std::unordered_map<std::string, std::string>> toml;
	if (std::filesystem::exists(config_path)) {
	toml = parse_simple_toml(config_path);
	log_line("config: loaded " + config_path);
	} else {
	log_line("config: using environment/defaults (file not found: " + config_path + ")");
	}

	cfg.server.host = get_env_or("MANAGER_HOST", get_toml_string_or(toml, "server", "host", cfg.server.host));
	cfg.server.port = get_env_int_or("MANAGER_PORT", get_toml_int_or(toml, "server", "port", cfg.server.port));

	cfg.worker.default_model = get_env_or("DEFAULT_MODEL", get_toml_string_or(toml, "worker", "default_model", cfg.worker.default_model));
	cfg.worker.llama_server_bin = get_env_or("LLAMA_SERVER_BIN", get_toml_string_or(toml, "worker", "llama_server_bin", cfg.worker.llama_server_bin));
	cfg.worker.host = get_env_or("WORKER_HOST", get_toml_string_or(toml, "worker", "host", cfg.worker.host));
	cfg.worker.bind_host = get_env_or("WORKER_BIND_HOST", get_toml_string_or(toml, "worker", "bind_host", cfg.worker.bind_host));
	cfg.worker.base_port = get_env_int_or("WORKER_BASE_PORT", get_toml_int_or(toml, "worker", "base_port", cfg.worker.base_port));
	cfg.worker.switch_timeout_sec = get_env_int_or("SWITCH_TIMEOUT_SEC", get_toml_int_or(toml, "worker", "switch_timeout_sec", cfg.worker.switch_timeout_sec));

	cfg.llama.n_ctx = get_env_int_or("MODEL_N_CTX", get_toml_int_or(toml, "llama", "n_ctx", cfg.llama.n_ctx));
	cfg.llama.threads = get_env_int_or("MODEL_THREADS", get_toml_int_or(toml, "llama", "threads", cfg.llama.threads));
	cfg.llama.ngl = get_env_int_or("MODEL_NGL", get_toml_int_or(toml, "llama", "ngl", cfg.llama.ngl));
	cfg.llama.batch = get_env_int_or("MODEL_BATCH", get_toml_int_or(toml, "llama", "batch", cfg.llama.batch));
	cfg.llama.ubatch = get_env_int_or("MODEL_UBATCH", get_toml_int_or(toml, "llama", "ubatch", cfg.llama.ubatch));

	cfg.auth.header = get_env_or("AUTH_HEADER", get_toml_string_or(toml, "auth", "header", cfg.auth.header));
	cfg.auth.scheme = get_env_or("AUTH_SCHEME", get_toml_string_or(toml, "auth", "scheme", cfg.auth.scheme));

	cfg.limits.default_max_tokens = get_env_int_or("DEFAULT_MAX_TOKENS", get_toml_int_or(toml, "limits", "default_max_tokens", cfg.limits.default_max_tokens));
	cfg.limits.max_tokens_per_request = get_env_int_or("MAX_TOKENS_PER_REQUEST", get_toml_int_or(toml, "limits", "max_tokens_per_request", cfg.limits.max_tokens_per_request));
	cfg.limits.request_timeout_sec = get_env_int_or("REQUEST_TIMEOUT_SEC", get_toml_int_or(toml, "limits", "request_timeout_sec", cfg.limits.request_timeout_sec));

	cfg.queue.max_size = static_cast<size_t>(std::max(
	1,
	get_env_int_or("QUEUE_MAX_SIZE", get_toml_int_or(toml, "queue", "max_size", static_cast<int>(cfg.queue.max_size)))));
	cfg.queue.max_tokens = get_env_int_or("QUEUE_MAX_TOKENS", get_toml_int_or(toml, "queue", "max_tokens", cfg.queue.max_tokens));
	cfg.queue.admin_quota = get_env_int_or("QUEUE_ADMIN_QUOTA", get_toml_int_or(toml, "queue", "admin_quota", cfg.queue.admin_quota));
	cfg.queue.retry_after_sec = get_env_int_or("QUEUE_RETRY_AFTER_SEC", get_toml_int_or(toml, "queue", "retry_after_sec", cfg.queue.retry_after_sec));

	cfg.rate_limit.requests_per_minute = get_env_int_or("REQUESTS_PER_MINUTE", get_toml_int_or(toml, "rate_limit", "requests_per_minute", cfg.rate_limit.requests_per_minute));
	cfg.rate_limit.estimated_tokens_per_minute = get_env_int_or("ESTIMATED_TOKENS_PER_MINUTE", get_toml_int_or(toml, "rate_limit", "estimated_tokens_per_minute", cfg.rate_limit.estimated_tokens_per_minute));

	cfg.scheduler.max_concurrent = get_env_int_or("SCHEDULER_MAX_CONCURRENT", get_toml_int_or(toml, "scheduler", "max_concurrent", cfg.scheduler.max_concurrent));
	cfg.streaming.enabled = parse_bool_or(
	get_env_or("STREAMING_ENABLED", get_toml_string_or(toml, "streaming", "enabled", cfg.streaming.enabled ? "true" : "false")),
	cfg.streaming.enabled);

	if (!config_path.empty() && std::filesystem::exists(config_path)) {
	cfg.api_keys = parse_api_keys_toml(config_path);
	}

	return cfg;
	}