#pragma once #include #include #include #include #include "llm_manager_types.h" class ModelManager { public: explicit ModelManager(const ManagerConfig &config); bool initialize_default(std::string &error); bool switch_model(const std::string &model, std::string &error); bool restart_active(std::string &error); std::optional active_worker(); json models_view(); private: std::mutex mu_; std::optional active_; bool switch_in_progress_ = false; std::string default_model_; std::string llama_server_bin_; std::string worker_host_; std::string worker_bind_host_; int base_port_; int switch_timeout_sec_; int n_ctx_; int n_threads_; int n_gpu_layers_; int n_batch_; int n_ubatch_; int next_port_; int allocate_port(); void finish_switch(bool ok); pid_t spawn_worker(const std::string &model, int port); bool wait_until_ready(pid_t pid, int port, int timeout_sec); std::pair http_get(int port, const std::string &target); }; bool is_alive(pid_t pid); void shutdown_worker(pid_t pid, int wait_seconds = 15);