Spaces:

ResearchEngineering
/

AGI

Sleeping

App Files Files Community

Dmitry Beresnev commited on Mar 18

Commit

6379bd0

1 Parent(s): 58d70b1

Fix web UI chat by adding buffered SSE fallback

Browse files

Files changed (1) hide show

cpp/server.cpp +85 -6

cpp/server.cpp CHANGED Viewed

@@ -9,6 +9,7 @@
 #include <algorithm>
 #include <atomic>
 #include <utility>
 namespace beast = boost::beast;
@@ -16,6 +17,79 @@ namespace http = beast::http;
 static std::atomic<uint64_t> g_req_id{1};
 http::response<http::string_body> handle_request(
     ModelManager &manager,
     const ManagerConfig &config,
@@ -190,11 +264,11 @@ http::response<http::string_body> handle_request(
             if (payload.is_discarded()) {
                 return json_response(http::status::bad_request, {{"error", "Invalid JSON"}});
             }
-            if (request_stream_enabled(payload)) {
-                if (!config.streaming.enabled) {
-                    return json_response(http::status::not_implemented, {{"error", "Streaming is disabled"}});
-                }
-                return json_response(http::status::not_implemented, {{"error", "Streaming relay is not implemented yet"}});
             }
             std::string token_error;
@@ -216,7 +290,8 @@ http::response<http::string_body> handle_request(
                     rate_limit_decision.retry_after_sec);
             }
-            auto ctx = registry.create(request_id, *authenticated, *estimate, req.body());
             if (!scheduler.try_enqueue(ctx)) {
                 ctx->cancelled.store(true);
                 registry.complete(ctx, RequestState::CANCELLED, {503, R"({"error":"Queue full"})"});
@@ -260,6 +335,10 @@ http::response<http::string_body> handle_request(
             http::response<http::string_body> res{
                 static_cast<http::status>(result.status), req.version()};
             res.set(http::field::content_type, result.content_type);
             res.set(http::field::server, "llm-manager");
             res.set("X-Request-Id", request_id);

 #include <algorithm>
 #include <atomic>
+#include <sstream>
 #include <utility>
 namespace beast = boost::beast;
 static std::atomic<uint64_t> g_req_id{1};
+static std::string build_sse_event(const json &payload) {
+    return "data: " + payload.dump() + "\n\n";
+}
+static std::string build_buffered_stream_response(const std::string &completion_body) {
+    json completion = json::parse(completion_body, nullptr, false);
+    if (completion.is_discarded() || !completion.is_object()) {
+        return "data: [DONE]\n\n";
+    }
+    const std::string id = completion.value("id", "chatcmpl-buffered");
+    const std::string model = completion.value("model", "");
+    const auto created = completion.value("created", 0);
+    std::string assistant_content;
+    if (completion.contains("choices") && completion["choices"].is_array() && !completion["choices"].empty()) {
+        const auto &choice = completion["choices"][0];
+        if (choice.is_object() && choice.contains("message") && choice["message"].is_object()) {
+            const auto &message = choice["message"];
+            if (message.contains("content") && message["content"].is_string()) {
+                assistant_content = message["content"].get<std::string>();
+            }
+        }
+    }
+    std::ostringstream oss;
+    oss << build_sse_event({
+        {"id", id},
+        {"object", "chat.completion.chunk"},
+        {"created", created},
+        {"model", model},
+        {"choices", json::array({
+            {
+                {"index", 0},
+                {"delta", {{"role", "assistant"}}},
+                {"finish_reason", nullptr}
+            }
+        })}
+    });
+    if (!assistant_content.empty()) {
+        oss << build_sse_event({
+            {"id", id},
+            {"object", "chat.completion.chunk"},
+            {"created", created},
+            {"model", model},
+            {"choices", json::array({
+                {
+                    {"index", 0},
+                    {"delta", {{"content", assistant_content}}},
+                    {"finish_reason", nullptr}
+                }
+            })}
+        });
+    }
+    oss << build_sse_event({
+        {"id", id},
+        {"object", "chat.completion.chunk"},
+        {"created", created},
+        {"model", model},
+        {"choices", json::array({
+            {
+                {"index", 0},
+                {"delta", json::object()},
+                {"finish_reason", "stop"}
+            }
+        })}
+    });
+    oss << "data: [DONE]\n\n";
+    return oss.str();
+}
 http::response<http::string_body> handle_request(
     ModelManager &manager,
     const ManagerConfig &config,
             if (payload.is_discarded()) {
                 return json_response(http::status::bad_request, {{"error", "Invalid JSON"}});
             }
+            const bool stream_requested = request_stream_enabled(payload);
+            if (stream_requested) {
+                payload["stream"] = false;
+                log_line("request_id=" + request_id +
+                         " stream_requested=true mode=buffered_sse_fallback");
             }
             std::string token_error;
                     rate_limit_decision.retry_after_sec);
             }
+            const std::string upstream_request_body = payload.dump();
+            auto ctx = registry.create(request_id, *authenticated, *estimate, upstream_request_body);
             if (!scheduler.try_enqueue(ctx)) {
                 ctx->cancelled.store(true);
                 registry.complete(ctx, RequestState::CANCELLED, {503, R"({"error":"Queue full"})"});
             http::response<http::string_body> res{
                 static_cast<http::status>(result.status), req.version()};
+            if (stream_requested && result.status >= 200 && result.status < 300) {
+                result.body = build_buffered_stream_response(result.body);
+                result.content_type = "text/event-stream; charset=utf-8";
+            }
             res.set(http::field::content_type, result.content_type);
             res.set(http::field::server, "llm-manager");
             res.set("X-Request-Id", request_id);