Spaces:
Sleeping
Sleeping
Dmitry Beresnev commited on
Commit ·
a4ee76d
1
Parent(s): fc0860f
fix routing in llm manager
Browse files- cpp/llm_manager.cpp +65 -0
cpp/llm_manager.cpp
CHANGED
|
@@ -302,6 +302,43 @@ static std::pair<int, std::string> forward_chat(const WorkerInfo &worker, const
|
|
| 302 |
return {res.result_int(), res.body()};
|
| 303 |
}
|
| 304 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 305 |
template <typename Body, typename Allocator>
|
| 306 |
http::response<http::string_body> handle_request(
|
| 307 |
ModelManager &manager,
|
|
@@ -388,6 +425,34 @@ http::response<http::string_body> handle_request(
|
|
| 388 |
return res;
|
| 389 |
}
|
| 390 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 391 |
return json_response(http::status::not_found, {{"error", "Not found"}});
|
| 392 |
} catch (const std::exception &e) {
|
| 393 |
return json_response(http::status::internal_server_error, {{"error", e.what()}});
|
|
|
|
| 302 |
return {res.result_int(), res.body()};
|
| 303 |
}
|
| 304 |
|
| 305 |
+
struct ProxiedGetResult {
|
| 306 |
+
int status = 500;
|
| 307 |
+
std::string body;
|
| 308 |
+
std::string content_type = "text/plain; charset=utf-8";
|
| 309 |
+
std::string content_encoding;
|
| 310 |
+
};
|
| 311 |
+
|
| 312 |
+
static ProxiedGetResult forward_get_to_worker(const WorkerInfo &worker,
|
| 313 |
+
const std::string &target) {
|
| 314 |
+
asio::io_context ioc;
|
| 315 |
+
asio::ip::tcp::resolver resolver(ioc);
|
| 316 |
+
beast::tcp_stream stream(ioc);
|
| 317 |
+
auto const results = resolver.resolve("127.0.0.1", std::to_string(worker.port));
|
| 318 |
+
stream.connect(results);
|
| 319 |
+
|
| 320 |
+
http::request<http::string_body> req{http::verb::get, target, 11};
|
| 321 |
+
req.set(http::field::host, "127.0.0.1");
|
| 322 |
+
req.set(http::field::user_agent, "llm-manager");
|
| 323 |
+
http::write(stream, req);
|
| 324 |
+
|
| 325 |
+
beast::flat_buffer buffer;
|
| 326 |
+
http::response<http::string_body> res;
|
| 327 |
+
http::read(stream, buffer, res);
|
| 328 |
+
beast::error_code ec;
|
| 329 |
+
stream.socket().shutdown(asio::ip::tcp::socket::shutdown_both, ec);
|
| 330 |
+
ProxiedGetResult out;
|
| 331 |
+
out.status = res.result_int();
|
| 332 |
+
out.body = res.body();
|
| 333 |
+
if (res.base().find(http::field::content_type) != res.base().end()) {
|
| 334 |
+
out.content_type = res.base()[http::field::content_type].to_string();
|
| 335 |
+
}
|
| 336 |
+
if (res.base().find(http::field::content_encoding) != res.base().end()) {
|
| 337 |
+
out.content_encoding = res.base()[http::field::content_encoding].to_string();
|
| 338 |
+
}
|
| 339 |
+
return out;
|
| 340 |
+
}
|
| 341 |
+
|
| 342 |
template <typename Body, typename Allocator>
|
| 343 |
http::response<http::string_body> handle_request(
|
| 344 |
ModelManager &manager,
|
|
|
|
| 425 |
return res;
|
| 426 |
}
|
| 427 |
|
| 428 |
+
// Proxy GET requests not handled by manager endpoints to active llama-server.
|
| 429 |
+
// This enables llama.cpp UI/static routes (including "/").
|
| 430 |
+
if (req.method() == http::verb::get) {
|
| 431 |
+
auto worker = manager.active_worker();
|
| 432 |
+
if (!worker) {
|
| 433 |
+
return json_response(http::status::service_unavailable, {{"error", "No active model"}});
|
| 434 |
+
}
|
| 435 |
+
auto upstream = forward_get_to_worker(*worker, target);
|
| 436 |
+
http::response<http::string_body> res{
|
| 437 |
+
static_cast<http::status>(upstream.status), req.version()};
|
| 438 |
+
res.set(http::field::content_type, upstream.content_type);
|
| 439 |
+
if (!upstream.content_encoding.empty()) {
|
| 440 |
+
res.set(http::field::content_encoding, upstream.content_encoding);
|
| 441 |
+
}
|
| 442 |
+
res.set(http::field::server, "llm-manager");
|
| 443 |
+
res.keep_alive(req.keep_alive());
|
| 444 |
+
res.body() = upstream.body;
|
| 445 |
+
res.prepare_payload();
|
| 446 |
+
auto elapsed_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
|
| 447 |
+
std::chrono::steady_clock::now() - start)
|
| 448 |
+
.count();
|
| 449 |
+
log_line("request_id=" + std::to_string(req_id) +
|
| 450 |
+
" proxied_get model=" + worker->model +
|
| 451 |
+
" upstream_status=" + std::to_string(upstream.status) +
|
| 452 |
+
" elapsed_ms=" + std::to_string(elapsed_ms));
|
| 453 |
+
return res;
|
| 454 |
+
}
|
| 455 |
+
|
| 456 |
return json_response(http::status::not_found, {{"error", "Not found"}});
|
| 457 |
} catch (const std::exception &e) {
|
| 458 |
return json_response(http::status::internal_server_error, {{"error", e.what()}});
|