AGI / cpp /config.cpp
Dmitry Beresnev
Refactor the C++ LLM manager into modular components, moves Python modules under python/, and keeps the current control-plane behavior intact. The C++ server now has clearer separation for config, model lifecycle, runtime services, request parsing, HTTP helpers, and server routing, while Docker build/runtime paths were updated to compile multiple C++ files and load Python code from the new package folder
332826f
#include "config.h"
#include "http_helpers.h"
#include <algorithm>
#include <cstdlib>
#include <filesystem>
#include <fstream>
#include <unordered_map>
std::string get_env_or(const char *name, const std::string &fallback) {
const char *v = std::getenv(name);
return (v && *v) ? std::string(v) : fallback;
}
int get_env_int_or(const char *name, int fallback) {
const char *v = std::getenv(name);
if (!v || !*v) return fallback;
try {
return std::stoi(v);
} catch (...) {
return fallback;
}
}
std::string trim_copy(const std::string &value) {
const auto first = value.find_first_not_of(" \t\r\n");
if (first == std::string::npos) return "";
const auto last = value.find_last_not_of(" \t\r\n");
return value.substr(first, last - first + 1);
}
std::string strip_quotes(const std::string &value) {
if (value.size() >= 2) {
const char first = value.front();
const char last = value.back();
if ((first == '"' && last == '"') || (first == '\'' && last == '\'')) {
return value.substr(1, value.size() - 2);
}
}
return value;
}
bool parse_bool_or(const std::string &value, bool fallback) {
const std::string normalized = trim_copy(value);
if (normalized == "true") return true;
if (normalized == "false") return false;
return fallback;
}
Role parse_role_or(const std::string &value, Role fallback) {
const std::string normalized = trim_copy(value);
if (normalized == "admin" || normalized == "ADMIN") return Role::ADMIN;
if (normalized == "user" || normalized == "USER") return Role::USER;
return fallback;
}
static std::unordered_map<std::string, std::unordered_map<std::string, std::string>>
parse_simple_toml(const std::string &path) {
std::unordered_map<std::string, std::unordered_map<std::string, std::string>> out;
std::ifstream input(path);
if (!input.is_open()) return out;
std::string current_section;
std::string line;
while (std::getline(input, line)) {
auto hash = line.find('#');
if (hash != std::string::npos) line = line.substr(0, hash);
line = trim_copy(line);
if (line.empty()) continue;
if (line.front() == '[' && line.back() == ']') {
current_section = trim_copy(line.substr(1, line.size() - 2));
continue;
}
const auto eq = line.find('=');
if (eq == std::string::npos) continue;
std::string key = trim_copy(line.substr(0, eq));
std::string value = strip_quotes(trim_copy(line.substr(eq + 1)));
out[current_section][key] = value;
}
return out;
}
static std::vector<ApiKeyRecord> parse_api_keys_toml(const std::string &path) {
std::vector<ApiKeyRecord> keys;
std::ifstream input(path);
if (!input.is_open()) return keys;
std::string line;
bool in_api_key = false;
ApiKeyRecord current;
bool has_any_field = false;
auto flush_current = [&]() {
if (has_any_field && !current.key_id.empty() && !current.secret.empty()) {
keys.push_back(current);
}
current = ApiKeyRecord{};
has_any_field = false;
};
while (std::getline(input, line)) {
auto hash = line.find('#');
if (hash != std::string::npos) line = line.substr(0, hash);
line = trim_copy(line);
if (line.empty()) continue;
if (line == "[[api_keys]]") {
flush_current();
in_api_key = true;
continue;
}
if (!in_api_key) continue;
if (line.front() == '[' && line.back() == ']') {
flush_current();
in_api_key = false;
continue;
}
const auto eq = line.find('=');
if (eq == std::string::npos) continue;
std::string key = trim_copy(line.substr(0, eq));
std::string value = strip_quotes(trim_copy(line.substr(eq + 1)));
has_any_field = true;
if (key == "key_id") current.key_id = value;
else if (key == "secret") current.secret = value;
else if (key == "role") current.role = parse_role_or(value, current.role);
else if (key == "enabled") current.enabled = parse_bool_or(value, current.enabled);
}
flush_current();
return keys;
}
static std::string get_toml_string_or(
const std::unordered_map<std::string, std::unordered_map<std::string, std::string>> &data,
const std::string &section,
const std::string &key,
const std::string &fallback) {
const auto it = data.find(section);
if (it == data.end()) return fallback;
const auto kv = it->second.find(key);
if (kv == it->second.end() || kv->second.empty()) return fallback;
return kv->second;
}
static int get_toml_int_or(
const std::unordered_map<std::string, std::unordered_map<std::string, std::string>> &data,
const std::string &section,
const std::string &key,
int fallback) {
const auto it = data.find(section);
if (it == data.end()) return fallback;
const auto kv = it->second.find(key);
if (kv == it->second.end() || kv->second.empty()) return fallback;
try {
return std::stoi(kv->second);
} catch (...) {
return fallback;
}
}
ManagerConfig load_manager_config() {
ManagerConfig cfg;
const std::string config_path = get_env_or("MANAGER_CONFIG", "config.toml");
std::unordered_map<std::string, std::unordered_map<std::string, std::string>> toml;
if (std::filesystem::exists(config_path)) {
toml = parse_simple_toml(config_path);
log_line("config: loaded " + config_path);
} else {
log_line("config: using environment/defaults (file not found: " + config_path + ")");
}
cfg.server.host = get_env_or("MANAGER_HOST", get_toml_string_or(toml, "server", "host", cfg.server.host));
cfg.server.port = get_env_int_or("MANAGER_PORT", get_toml_int_or(toml, "server", "port", cfg.server.port));
cfg.worker.default_model = get_env_or("DEFAULT_MODEL", get_toml_string_or(toml, "worker", "default_model", cfg.worker.default_model));
cfg.worker.llama_server_bin = get_env_or("LLAMA_SERVER_BIN", get_toml_string_or(toml, "worker", "llama_server_bin", cfg.worker.llama_server_bin));
cfg.worker.host = get_env_or("WORKER_HOST", get_toml_string_or(toml, "worker", "host", cfg.worker.host));
cfg.worker.bind_host = get_env_or("WORKER_BIND_HOST", get_toml_string_or(toml, "worker", "bind_host", cfg.worker.bind_host));
cfg.worker.base_port = get_env_int_or("WORKER_BASE_PORT", get_toml_int_or(toml, "worker", "base_port", cfg.worker.base_port));
cfg.worker.switch_timeout_sec = get_env_int_or("SWITCH_TIMEOUT_SEC", get_toml_int_or(toml, "worker", "switch_timeout_sec", cfg.worker.switch_timeout_sec));
cfg.llama.n_ctx = get_env_int_or("MODEL_N_CTX", get_toml_int_or(toml, "llama", "n_ctx", cfg.llama.n_ctx));
cfg.llama.threads = get_env_int_or("MODEL_THREADS", get_toml_int_or(toml, "llama", "threads", cfg.llama.threads));
cfg.llama.ngl = get_env_int_or("MODEL_NGL", get_toml_int_or(toml, "llama", "ngl", cfg.llama.ngl));
cfg.llama.batch = get_env_int_or("MODEL_BATCH", get_toml_int_or(toml, "llama", "batch", cfg.llama.batch));
cfg.llama.ubatch = get_env_int_or("MODEL_UBATCH", get_toml_int_or(toml, "llama", "ubatch", cfg.llama.ubatch));
cfg.auth.header = get_env_or("AUTH_HEADER", get_toml_string_or(toml, "auth", "header", cfg.auth.header));
cfg.auth.scheme = get_env_or("AUTH_SCHEME", get_toml_string_or(toml, "auth", "scheme", cfg.auth.scheme));
cfg.limits.default_max_tokens = get_env_int_or("DEFAULT_MAX_TOKENS", get_toml_int_or(toml, "limits", "default_max_tokens", cfg.limits.default_max_tokens));
cfg.limits.max_tokens_per_request = get_env_int_or("MAX_TOKENS_PER_REQUEST", get_toml_int_or(toml, "limits", "max_tokens_per_request", cfg.limits.max_tokens_per_request));
cfg.limits.request_timeout_sec = get_env_int_or("REQUEST_TIMEOUT_SEC", get_toml_int_or(toml, "limits", "request_timeout_sec", cfg.limits.request_timeout_sec));
cfg.queue.max_size = static_cast<size_t>(std::max(
1,
get_env_int_or("QUEUE_MAX_SIZE", get_toml_int_or(toml, "queue", "max_size", static_cast<int>(cfg.queue.max_size)))));
cfg.queue.max_tokens = get_env_int_or("QUEUE_MAX_TOKENS", get_toml_int_or(toml, "queue", "max_tokens", cfg.queue.max_tokens));
cfg.queue.admin_quota = get_env_int_or("QUEUE_ADMIN_QUOTA", get_toml_int_or(toml, "queue", "admin_quota", cfg.queue.admin_quota));
cfg.queue.retry_after_sec = get_env_int_or("QUEUE_RETRY_AFTER_SEC", get_toml_int_or(toml, "queue", "retry_after_sec", cfg.queue.retry_after_sec));
cfg.rate_limit.requests_per_minute = get_env_int_or("REQUESTS_PER_MINUTE", get_toml_int_or(toml, "rate_limit", "requests_per_minute", cfg.rate_limit.requests_per_minute));
cfg.rate_limit.estimated_tokens_per_minute = get_env_int_or("ESTIMATED_TOKENS_PER_MINUTE", get_toml_int_or(toml, "rate_limit", "estimated_tokens_per_minute", cfg.rate_limit.estimated_tokens_per_minute));
cfg.scheduler.max_concurrent = get_env_int_or("SCHEDULER_MAX_CONCURRENT", get_toml_int_or(toml, "scheduler", "max_concurrent", cfg.scheduler.max_concurrent));
cfg.streaming.enabled = parse_bool_or(
get_env_or("STREAMING_ENABLED", get_toml_string_or(toml, "streaming", "enabled", cfg.streaming.enabled ? "true" : "false")),
cfg.streaming.enabled);
if (!config_path.empty() && std::filesystem::exists(config_path)) {
cfg.api_keys = parse_api_keys_toml(config_path);
}
return cfg;
}