Spaces:
Building
Building
Dmitry Beresnev
Refactor the C++ LLM manager into modular components, moves Python modules under python/, and keeps the current control-plane behavior intact. The C++ server now has clearer separation for config, model lifecycle, runtime services, request parsing, HTTP helpers, and server routing, while Docker build/runtime paths were updated to compile multiple C++ files and load Python code from the new package folder
332826f | std::string get_env_or(const char *name, const std::string &fallback) { | |
| const char *v = std::getenv(name); | |
| return (v && *v) ? std::string(v) : fallback; | |
| } | |
| int get_env_int_or(const char *name, int fallback) { | |
| const char *v = std::getenv(name); | |
| if (!v || !*v) return fallback; | |
| try { | |
| return std::stoi(v); | |
| } catch (...) { | |
| return fallback; | |
| } | |
| } | |
| std::string trim_copy(const std::string &value) { | |
| const auto first = value.find_first_not_of(" \t\r\n"); | |
| if (first == std::string::npos) return ""; | |
| const auto last = value.find_last_not_of(" \t\r\n"); | |
| return value.substr(first, last - first + 1); | |
| } | |
| std::string strip_quotes(const std::string &value) { | |
| if (value.size() >= 2) { | |
| const char first = value.front(); | |
| const char last = value.back(); | |
| if ((first == '"' && last == '"') || (first == '\'' && last == '\'')) { | |
| return value.substr(1, value.size() - 2); | |
| } | |
| } | |
| return value; | |
| } | |
| bool parse_bool_or(const std::string &value, bool fallback) { | |
| const std::string normalized = trim_copy(value); | |
| if (normalized == "true") return true; | |
| if (normalized == "false") return false; | |
| return fallback; | |
| } | |
| Role parse_role_or(const std::string &value, Role fallback) { | |
| const std::string normalized = trim_copy(value); | |
| if (normalized == "admin" || normalized == "ADMIN") return Role::ADMIN; | |
| if (normalized == "user" || normalized == "USER") return Role::USER; | |
| return fallback; | |
| } | |
| static std::unordered_map<std::string, std::unordered_map<std::string, std::string>> | |
| parse_simple_toml(const std::string &path) { | |
| std::unordered_map<std::string, std::unordered_map<std::string, std::string>> out; | |
| std::ifstream input(path); | |
| if (!input.is_open()) return out; | |
| std::string current_section; | |
| std::string line; | |
| while (std::getline(input, line)) { | |
| auto hash = line.find('#'); | |
| if (hash != std::string::npos) line = line.substr(0, hash); | |
| line = trim_copy(line); | |
| if (line.empty()) continue; | |
| if (line.front() == '[' && line.back() == ']') { | |
| current_section = trim_copy(line.substr(1, line.size() - 2)); | |
| continue; | |
| } | |
| const auto eq = line.find('='); | |
| if (eq == std::string::npos) continue; | |
| std::string key = trim_copy(line.substr(0, eq)); | |
| std::string value = strip_quotes(trim_copy(line.substr(eq + 1))); | |
| out[current_section][key] = value; | |
| } | |
| return out; | |
| } | |
| static std::vector<ApiKeyRecord> parse_api_keys_toml(const std::string &path) { | |
| std::vector<ApiKeyRecord> keys; | |
| std::ifstream input(path); | |
| if (!input.is_open()) return keys; | |
| std::string line; | |
| bool in_api_key = false; | |
| ApiKeyRecord current; | |
| bool has_any_field = false; | |
| auto flush_current = [&]() { | |
| if (has_any_field && !current.key_id.empty() && !current.secret.empty()) { | |
| keys.push_back(current); | |
| } | |
| current = ApiKeyRecord{}; | |
| has_any_field = false; | |
| }; | |
| while (std::getline(input, line)) { | |
| auto hash = line.find('#'); | |
| if (hash != std::string::npos) line = line.substr(0, hash); | |
| line = trim_copy(line); | |
| if (line.empty()) continue; | |
| if (line == "[[api_keys]]") { | |
| flush_current(); | |
| in_api_key = true; | |
| continue; | |
| } | |
| if (!in_api_key) continue; | |
| if (line.front() == '[' && line.back() == ']') { | |
| flush_current(); | |
| in_api_key = false; | |
| continue; | |
| } | |
| const auto eq = line.find('='); | |
| if (eq == std::string::npos) continue; | |
| std::string key = trim_copy(line.substr(0, eq)); | |
| std::string value = strip_quotes(trim_copy(line.substr(eq + 1))); | |
| has_any_field = true; | |
| if (key == "key_id") current.key_id = value; | |
| else if (key == "secret") current.secret = value; | |
| else if (key == "role") current.role = parse_role_or(value, current.role); | |
| else if (key == "enabled") current.enabled = parse_bool_or(value, current.enabled); | |
| } | |
| flush_current(); | |
| return keys; | |
| } | |
| static std::string get_toml_string_or( | |
| const std::unordered_map<std::string, std::unordered_map<std::string, std::string>> &data, | |
| const std::string §ion, | |
| const std::string &key, | |
| const std::string &fallback) { | |
| const auto it = data.find(section); | |
| if (it == data.end()) return fallback; | |
| const auto kv = it->second.find(key); | |
| if (kv == it->second.end() || kv->second.empty()) return fallback; | |
| return kv->second; | |
| } | |
| static int get_toml_int_or( | |
| const std::unordered_map<std::string, std::unordered_map<std::string, std::string>> &data, | |
| const std::string §ion, | |
| const std::string &key, | |
| int fallback) { | |
| const auto it = data.find(section); | |
| if (it == data.end()) return fallback; | |
| const auto kv = it->second.find(key); | |
| if (kv == it->second.end() || kv->second.empty()) return fallback; | |
| try { | |
| return std::stoi(kv->second); | |
| } catch (...) { | |
| return fallback; | |
| } | |
| } | |
| ManagerConfig load_manager_config() { | |
| ManagerConfig cfg; | |
| const std::string config_path = get_env_or("MANAGER_CONFIG", "config.toml"); | |
| std::unordered_map<std::string, std::unordered_map<std::string, std::string>> toml; | |
| if (std::filesystem::exists(config_path)) { | |
| toml = parse_simple_toml(config_path); | |
| log_line("config: loaded " + config_path); | |
| } else { | |
| log_line("config: using environment/defaults (file not found: " + config_path + ")"); | |
| } | |
| cfg.server.host = get_env_or("MANAGER_HOST", get_toml_string_or(toml, "server", "host", cfg.server.host)); | |
| cfg.server.port = get_env_int_or("MANAGER_PORT", get_toml_int_or(toml, "server", "port", cfg.server.port)); | |
| cfg.worker.default_model = get_env_or("DEFAULT_MODEL", get_toml_string_or(toml, "worker", "default_model", cfg.worker.default_model)); | |
| cfg.worker.llama_server_bin = get_env_or("LLAMA_SERVER_BIN", get_toml_string_or(toml, "worker", "llama_server_bin", cfg.worker.llama_server_bin)); | |
| cfg.worker.host = get_env_or("WORKER_HOST", get_toml_string_or(toml, "worker", "host", cfg.worker.host)); | |
| cfg.worker.bind_host = get_env_or("WORKER_BIND_HOST", get_toml_string_or(toml, "worker", "bind_host", cfg.worker.bind_host)); | |
| cfg.worker.base_port = get_env_int_or("WORKER_BASE_PORT", get_toml_int_or(toml, "worker", "base_port", cfg.worker.base_port)); | |
| cfg.worker.switch_timeout_sec = get_env_int_or("SWITCH_TIMEOUT_SEC", get_toml_int_or(toml, "worker", "switch_timeout_sec", cfg.worker.switch_timeout_sec)); | |
| cfg.llama.n_ctx = get_env_int_or("MODEL_N_CTX", get_toml_int_or(toml, "llama", "n_ctx", cfg.llama.n_ctx)); | |
| cfg.llama.threads = get_env_int_or("MODEL_THREADS", get_toml_int_or(toml, "llama", "threads", cfg.llama.threads)); | |
| cfg.llama.ngl = get_env_int_or("MODEL_NGL", get_toml_int_or(toml, "llama", "ngl", cfg.llama.ngl)); | |
| cfg.llama.batch = get_env_int_or("MODEL_BATCH", get_toml_int_or(toml, "llama", "batch", cfg.llama.batch)); | |
| cfg.llama.ubatch = get_env_int_or("MODEL_UBATCH", get_toml_int_or(toml, "llama", "ubatch", cfg.llama.ubatch)); | |
| cfg.auth.header = get_env_or("AUTH_HEADER", get_toml_string_or(toml, "auth", "header", cfg.auth.header)); | |
| cfg.auth.scheme = get_env_or("AUTH_SCHEME", get_toml_string_or(toml, "auth", "scheme", cfg.auth.scheme)); | |
| cfg.limits.default_max_tokens = get_env_int_or("DEFAULT_MAX_TOKENS", get_toml_int_or(toml, "limits", "default_max_tokens", cfg.limits.default_max_tokens)); | |
| cfg.limits.max_tokens_per_request = get_env_int_or("MAX_TOKENS_PER_REQUEST", get_toml_int_or(toml, "limits", "max_tokens_per_request", cfg.limits.max_tokens_per_request)); | |
| cfg.limits.request_timeout_sec = get_env_int_or("REQUEST_TIMEOUT_SEC", get_toml_int_or(toml, "limits", "request_timeout_sec", cfg.limits.request_timeout_sec)); | |
| cfg.queue.max_size = static_cast<size_t>(std::max( | |
| 1, | |
| get_env_int_or("QUEUE_MAX_SIZE", get_toml_int_or(toml, "queue", "max_size", static_cast<int>(cfg.queue.max_size))))); | |
| cfg.queue.max_tokens = get_env_int_or("QUEUE_MAX_TOKENS", get_toml_int_or(toml, "queue", "max_tokens", cfg.queue.max_tokens)); | |
| cfg.queue.admin_quota = get_env_int_or("QUEUE_ADMIN_QUOTA", get_toml_int_or(toml, "queue", "admin_quota", cfg.queue.admin_quota)); | |
| cfg.queue.retry_after_sec = get_env_int_or("QUEUE_RETRY_AFTER_SEC", get_toml_int_or(toml, "queue", "retry_after_sec", cfg.queue.retry_after_sec)); | |
| cfg.rate_limit.requests_per_minute = get_env_int_or("REQUESTS_PER_MINUTE", get_toml_int_or(toml, "rate_limit", "requests_per_minute", cfg.rate_limit.requests_per_minute)); | |
| cfg.rate_limit.estimated_tokens_per_minute = get_env_int_or("ESTIMATED_TOKENS_PER_MINUTE", get_toml_int_or(toml, "rate_limit", "estimated_tokens_per_minute", cfg.rate_limit.estimated_tokens_per_minute)); | |
| cfg.scheduler.max_concurrent = get_env_int_or("SCHEDULER_MAX_CONCURRENT", get_toml_int_or(toml, "scheduler", "max_concurrent", cfg.scheduler.max_concurrent)); | |
| cfg.streaming.enabled = parse_bool_or( | |
| get_env_or("STREAMING_ENABLED", get_toml_string_or(toml, "streaming", "enabled", cfg.streaming.enabled ? "true" : "false")), | |
| cfg.streaming.enabled); | |
| if (!config_path.empty() && std::filesystem::exists(config_path)) { | |
| cfg.api_keys = parse_api_keys_toml(config_path); | |
| } | |
| return cfg; | |
| } | |