Spaces:
Building
Building
Dmitry Beresnev
Refactor the C++ LLM manager into modular components, moves Python modules under python/, and keeps the current control-plane behavior intact. The C++ server now has clearer separation for config, model lifecycle, runtime services, request parsing, HTTP helpers, and server routing, while Docker build/runtime paths were updated to compile multiple C++ files and load Python code from the new package folder
332826f | namespace asio = boost::asio; | |
| namespace beast = boost::beast; | |
| namespace http = beast::http; | |
| std::string now_utc_iso() { | |
| std::time_t t = std::time(nullptr); | |
| std::tm tm{}; | |
| gmtime_r(&t, &tm); | |
| std::ostringstream oss; | |
| oss << std::put_time(&tm, "%Y-%m-%dT%H:%M:%SZ"); | |
| return oss.str(); | |
| } | |
| void log_line(const std::string &line) { | |
| std::cout << "[" << now_utc_iso() << "] " << line << std::endl; | |
| } | |
| std::string truncate_body(const std::string &body, size_t max_len) { | |
| if (body.size() <= max_len) return body; | |
| return body.substr(0, max_len) + "...[truncated]"; | |
| } | |
| std::optional<std::string> extract_cancel_request_id(const std::string &path) { | |
| const std::string prefix = "/requests/"; | |
| const std::string suffix = "/cancel"; | |
| if (path.size() <= prefix.size() + suffix.size()) return std::nullopt; | |
| if (path.rfind(prefix, 0) != 0) return std::nullopt; | |
| if (path.substr(path.size() - suffix.size()) != suffix) return std::nullopt; | |
| const std::string request_id = path.substr(prefix.size(), path.size() - prefix.size() - suffix.size()); | |
| if (request_id.empty()) return std::nullopt; | |
| return request_id; | |
| } | |
| std::pair<int, std::string> forward_chat(const WorkerInfo &worker, const std::string &body) { | |
| asio::io_context ioc; | |
| asio::ip::tcp::resolver resolver(ioc); | |
| beast::tcp_stream stream(ioc); | |
| auto const results = resolver.resolve("127.0.0.1", std::to_string(worker.port)); | |
| stream.connect(results); | |
| http::request<http::string_body> req{http::verb::post, "/v1/chat/completions", 11}; | |
| req.set(http::field::host, "127.0.0.1"); | |
| req.set(http::field::content_type, "application/json"); | |
| req.set(http::field::user_agent, "llm-manager"); | |
| req.body() = body; | |
| req.prepare_payload(); | |
| http::write(stream, req); | |
| beast::flat_buffer buffer; | |
| http::response<http::string_body> res; | |
| http::read(stream, buffer, res); | |
| beast::error_code ec; | |
| stream.socket().shutdown(asio::ip::tcp::socket::shutdown_both, ec); | |
| return {res.result_int(), res.body()}; | |
| } | |
| ProxiedGetResult forward_get_to_worker(const WorkerInfo &worker, const std::string &target) { | |
| asio::io_context ioc; | |
| asio::ip::tcp::resolver resolver(ioc); | |
| beast::tcp_stream stream(ioc); | |
| auto const results = resolver.resolve("127.0.0.1", std::to_string(worker.port)); | |
| stream.connect(results); | |
| http::request<http::string_body> req{http::verb::get, target, 11}; | |
| req.set(http::field::host, "127.0.0.1"); | |
| req.set(http::field::user_agent, "llm-manager"); | |
| req.set(http::field::accept_encoding, "gzip, identity"); | |
| http::write(stream, req); | |
| beast::flat_buffer buffer; | |
| http::response<http::string_body> res; | |
| http::read(stream, buffer, res); | |
| beast::error_code ec; | |
| stream.socket().shutdown(asio::ip::tcp::socket::shutdown_both, ec); | |
| ProxiedGetResult out; | |
| out.status = res.result_int(); | |
| out.body = res.body(); | |
| if (res.base().find(http::field::content_type) != res.base().end()) { | |
| out.content_type = res.base()[http::field::content_type].to_string(); | |
| } | |
| if (res.base().find(http::field::content_encoding) != res.base().end()) { | |
| out.content_encoding = res.base()[http::field::content_encoding].to_string(); | |
| } | |
| return out; | |
| } | |