Dmitry Beresnev commited on
Commit
a4ee76d
·
1 Parent(s): fc0860f

fix routing in llm manager

Browse files
Files changed (1) hide show
  1. cpp/llm_manager.cpp +65 -0
cpp/llm_manager.cpp CHANGED
@@ -302,6 +302,43 @@ static std::pair<int, std::string> forward_chat(const WorkerInfo &worker, const
302
  return {res.result_int(), res.body()};
303
  }
304
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
  template <typename Body, typename Allocator>
306
  http::response<http::string_body> handle_request(
307
  ModelManager &manager,
@@ -388,6 +425,34 @@ http::response<http::string_body> handle_request(
388
  return res;
389
  }
390
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
391
  return json_response(http::status::not_found, {{"error", "Not found"}});
392
  } catch (const std::exception &e) {
393
  return json_response(http::status::internal_server_error, {{"error", e.what()}});
 
302
  return {res.result_int(), res.body()};
303
  }
304
 
305
+ struct ProxiedGetResult {
306
+ int status = 500;
307
+ std::string body;
308
+ std::string content_type = "text/plain; charset=utf-8";
309
+ std::string content_encoding;
310
+ };
311
+
312
+ static ProxiedGetResult forward_get_to_worker(const WorkerInfo &worker,
313
+ const std::string &target) {
314
+ asio::io_context ioc;
315
+ asio::ip::tcp::resolver resolver(ioc);
316
+ beast::tcp_stream stream(ioc);
317
+ auto const results = resolver.resolve("127.0.0.1", std::to_string(worker.port));
318
+ stream.connect(results);
319
+
320
+ http::request<http::string_body> req{http::verb::get, target, 11};
321
+ req.set(http::field::host, "127.0.0.1");
322
+ req.set(http::field::user_agent, "llm-manager");
323
+ http::write(stream, req);
324
+
325
+ beast::flat_buffer buffer;
326
+ http::response<http::string_body> res;
327
+ http::read(stream, buffer, res);
328
+ beast::error_code ec;
329
+ stream.socket().shutdown(asio::ip::tcp::socket::shutdown_both, ec);
330
+ ProxiedGetResult out;
331
+ out.status = res.result_int();
332
+ out.body = res.body();
333
+ if (res.base().find(http::field::content_type) != res.base().end()) {
334
+ out.content_type = res.base()[http::field::content_type].to_string();
335
+ }
336
+ if (res.base().find(http::field::content_encoding) != res.base().end()) {
337
+ out.content_encoding = res.base()[http::field::content_encoding].to_string();
338
+ }
339
+ return out;
340
+ }
341
+
342
  template <typename Body, typename Allocator>
343
  http::response<http::string_body> handle_request(
344
  ModelManager &manager,
 
425
  return res;
426
  }
427
 
428
+ // Proxy GET requests not handled by manager endpoints to active llama-server.
429
+ // This enables llama.cpp UI/static routes (including "/").
430
+ if (req.method() == http::verb::get) {
431
+ auto worker = manager.active_worker();
432
+ if (!worker) {
433
+ return json_response(http::status::service_unavailable, {{"error", "No active model"}});
434
+ }
435
+ auto upstream = forward_get_to_worker(*worker, target);
436
+ http::response<http::string_body> res{
437
+ static_cast<http::status>(upstream.status), req.version()};
438
+ res.set(http::field::content_type, upstream.content_type);
439
+ if (!upstream.content_encoding.empty()) {
440
+ res.set(http::field::content_encoding, upstream.content_encoding);
441
+ }
442
+ res.set(http::field::server, "llm-manager");
443
+ res.keep_alive(req.keep_alive());
444
+ res.body() = upstream.body;
445
+ res.prepare_payload();
446
+ auto elapsed_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
447
+ std::chrono::steady_clock::now() - start)
448
+ .count();
449
+ log_line("request_id=" + std::to_string(req_id) +
450
+ " proxied_get model=" + worker->model +
451
+ " upstream_status=" + std::to_string(upstream.status) +
452
+ " elapsed_ms=" + std::to_string(elapsed_ms));
453
+ return res;
454
+ }
455
+
456
  return json_response(http::status::not_found, {{"error", "Not found"}});
457
  } catch (const std::exception &e) {
458
  return json_response(http::status::internal_server_error, {{"error", e.what()}});