Spaces:

ResearchEngineering
/

AGI

Sleeping

App Files Files Community

Dmitry Beresnev commited on 28 days ago

Commit

a4ee76d

1 Parent(s): fc0860f

fix routing in llm manager

Browse files

Files changed (1) hide show

cpp/llm_manager.cpp +65 -0

cpp/llm_manager.cpp CHANGED Viewed

@@ -302,6 +302,43 @@ static std::pair<int, std::string> forward_chat(const WorkerInfo &worker, const
     return {res.result_int(), res.body()};
 }
 template <typename Body, typename Allocator>
 http::response<http::string_body> handle_request(
     ModelManager &manager,
@@ -388,6 +425,34 @@ http::response<http::string_body> handle_request(
             return res;
         }
         return json_response(http::status::not_found, {{"error", "Not found"}});
     } catch (const std::exception &e) {
         return json_response(http::status::internal_server_error, {{"error", e.what()}});

     return {res.result_int(), res.body()};
 }
+struct ProxiedGetResult {
+  int status = 500;
+  std::string body;
+  std::string content_type = "text/plain; charset=utf-8";
+  std::string content_encoding;
+};
+static ProxiedGetResult forward_get_to_worker(const WorkerInfo &worker,
+                                              const std::string &target) {
+  asio::io_context ioc;
+  asio::ip::tcp::resolver resolver(ioc);
+  beast::tcp_stream stream(ioc);
+  auto const results = resolver.resolve("127.0.0.1", std::to_string(worker.port));
+  stream.connect(results);
+    http::request<http::string_body> req{http::verb::get, target, 11};
+    req.set(http::field::host, "127.0.0.1");
+    req.set(http::field::user_agent, "llm-manager");
+    http::write(stream, req);
+  beast::flat_buffer buffer;
+  http::response<http::string_body> res;
+  http::read(stream, buffer, res);
+  beast::error_code ec;
+  stream.socket().shutdown(asio::ip::tcp::socket::shutdown_both, ec);
+  ProxiedGetResult out;
+  out.status = res.result_int();
+  out.body = res.body();
+  if (res.base().find(http::field::content_type) != res.base().end()) {
+    out.content_type = res.base()[http::field::content_type].to_string();
+  }
+  if (res.base().find(http::field::content_encoding) != res.base().end()) {
+    out.content_encoding = res.base()[http::field::content_encoding].to_string();
+  }
+  return out;
+}
 template <typename Body, typename Allocator>
 http::response<http::string_body> handle_request(
     ModelManager &manager,
             return res;
         }
+        // Proxy GET requests not handled by manager endpoints to active llama-server.
+        // This enables llama.cpp UI/static routes (including "/").
+        if (req.method() == http::verb::get) {
+            auto worker = manager.active_worker();
+            if (!worker) {
+                return json_response(http::status::service_unavailable, {{"error", "No active model"}});
+            }
+      auto upstream = forward_get_to_worker(*worker, target);
+      http::response<http::string_body> res{
+          static_cast<http::status>(upstream.status), req.version()};
+      res.set(http::field::content_type, upstream.content_type);
+      if (!upstream.content_encoding.empty()) {
+        res.set(http::field::content_encoding, upstream.content_encoding);
+      }
+      res.set(http::field::server, "llm-manager");
+      res.keep_alive(req.keep_alive());
+      res.body() = upstream.body;
+      res.prepare_payload();
+      auto elapsed_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
+                            std::chrono::steady_clock::now() - start)
+                            .count();
+      log_line("request_id=" + std::to_string(req_id) +
+               " proxied_get model=" + worker->model +
+               " upstream_status=" + std::to_string(upstream.status) +
+               " elapsed_ms=" + std::to_string(elapsed_ms));
+      return res;
+    }
         return json_response(http::status::not_found, {{"error", "Not found"}});
     } catch (const std::exception &e) {
         return json_response(http::status::internal_server_error, {{"error", e.what()}});