Spaces:

ResearchEngineering
/

AGI

Sleeping

App Files Files Community

Dmitry Beresnev commited on Mar 18

Commit

677456b

1 Parent(s): 6379bd0

Fix 400 for llama.cpp web UI completion requests

Browse files

Files changed (1) hide show

cpp/server.cpp +101 -11

cpp/server.cpp CHANGED Viewed

@@ -21,6 +21,95 @@ static std::string build_sse_event(const json &payload) {
     return "data: " + payload.dump() + "\n\n";
 }
 static std::string build_buffered_stream_response(const std::string &completion_body) {
     json completion = json::parse(completion_body, nullptr, false);
     if (completion.is_discarded() || !completion.is_object()) {
@@ -31,16 +120,7 @@ static std::string build_buffered_stream_response(const std::string &completion_
     const std::string model = completion.value("model", "");
     const auto created = completion.value("created", 0);
-    std::string assistant_content;
-    if (completion.contains("choices") && completion["choices"].is_array() && !completion["choices"].empty()) {
-        const auto &choice = completion["choices"][0];
-        if (choice.is_object() && choice.contains("message") && choice["message"].is_object()) {
-            const auto &message = choice["message"];
-            if (message.contains("content") && message["content"].is_string()) {
-                assistant_content = message["content"].get<std::string>();
-            }
-        }
-    }
     std::ostringstream oss;
     oss << build_sse_event({
@@ -258,18 +338,25 @@ http::response<http::string_body> handle_request(
             }
         }
-        if (path == "/v1/chat/completions" && req.method() == http::verb::post) {
             if (auto auth_res = ensure_authenticated(Role::USER)) return *auth_res;
             json payload = json::parse(req.body(), nullptr, false);
             if (payload.is_discarded()) {
                 return json_response(http::status::bad_request, {{"error", "Invalid JSON"}});
             }
             const bool stream_requested = request_stream_enabled(payload);
             if (stream_requested) {
                 payload["stream"] = false;
                 log_line("request_id=" + request_id +
                          " stream_requested=true mode=buffered_sse_fallback");
             }
             std::string token_error;
             auto estimate = estimate_chat_tokens(payload, config.limits, token_error);
@@ -338,6 +425,9 @@ http::response<http::string_body> handle_request(
             if (stream_requested && result.status >= 200 && result.status < 300) {
                 result.body = build_buffered_stream_response(result.body);
                 result.content_type = "text/event-stream; charset=utf-8";
             }
             res.set(http::field::content_type, result.content_type);
             res.set(http::field::server, "llm-manager");

     return "data: " + payload.dump() + "\n\n";
 }
+static std::string extract_chat_text(const json &completion) {
+    if (!completion.is_object()) return "";
+    if (!completion.contains("choices") || !completion["choices"].is_array() || completion["choices"].empty()) {
+        return "";
+    }
+    const auto &choice = completion["choices"][0];
+    if (!choice.is_object()) return "";
+    if (choice.contains("message") && choice["message"].is_object()) {
+        const auto &message = choice["message"];
+        if (message.contains("content") && message["content"].is_string()) {
+            return message["content"].get<std::string>();
+        }
+    }
+    if (choice.contains("text") && choice["text"].is_string()) {
+        return choice["text"].get<std::string>();
+    }
+    return "";
+}
+static json completion_payload_to_chat_payload(const json &payload, const LimitsConfig &limits) {
+    json chat_payload = json::object();
+    chat_payload["messages"] = json::array();
+    if (payload.contains("prompt")) {
+        if (payload["prompt"].is_string()) {
+            chat_payload["messages"].push_back({
+                {"role", "user"},
+                {"content", payload["prompt"].get<std::string>()}
+            });
+        } else if (payload["prompt"].is_array()) {
+            std::string joined_prompt;
+            bool first = true;
+            for (const auto &item : payload["prompt"]) {
+                if (!item.is_string()) continue;
+                if (!first) joined_prompt += "\n";
+                joined_prompt += item.get<std::string>();
+                first = false;
+            }
+            chat_payload["messages"].push_back({
+                {"role", "user"},
+                {"content", joined_prompt}
+            });
+        }
+    }
+    int normalized_max_tokens = limits.default_max_tokens;
+    if (payload.contains("n_predict") && payload["n_predict"].is_number_integer()) {
+        normalized_max_tokens = payload["n_predict"].get<int>();
+    } else if (payload.contains("max_tokens") && payload["max_tokens"].is_number_integer()) {
+        normalized_max_tokens = payload["max_tokens"].get<int>();
+    }
+    if (normalized_max_tokens <= 0) {
+        normalized_max_tokens = limits.default_max_tokens;
+    }
+    chat_payload["max_tokens"] = normalized_max_tokens;
+    if (chat_payload["messages"].empty()) {
+        chat_payload["messages"].push_back({
+            {"role", "user"},
+            {"content", ""}
+        });
+    }
+    if (payload.contains("temperature")) chat_payload["temperature"] = payload["temperature"];
+    if (payload.contains("top_p")) chat_payload["top_p"] = payload["top_p"];
+    if (payload.contains("top_k")) chat_payload["top_k"] = payload["top_k"];
+    if (payload.contains("stop")) chat_payload["stop"] = payload["stop"];
+    if (payload.contains("stream")) chat_payload["stream"] = payload["stream"];
+    return chat_payload;
+}
+static std::string build_completion_compat_response(const std::string &completion_body) {
+    json completion = json::parse(completion_body, nullptr, false);
+    if (completion.is_discarded() || !completion.is_object()) {
+        return completion_body;
+    }
+    json out = {
+        {"content", extract_chat_text(completion)}
+    };
+    if (completion.contains("stop")) out["stop"] = completion["stop"];
+    if (completion.contains("stopped_eos")) out["stopped_eos"] = completion["stopped_eos"];
+    if (completion.contains("stopped_limit")) out["stopped_limit"] = completion["stopped_limit"];
+    if (completion.contains("tokens_predicted")) out["tokens_predicted"] = completion["tokens_predicted"];
+    if (completion.contains("tokens_evaluated")) out["tokens_evaluated"] = completion["tokens_evaluated"];
+    return out.dump();
+}
 static std::string build_buffered_stream_response(const std::string &completion_body) {
     json completion = json::parse(completion_body, nullptr, false);
     if (completion.is_discarded() || !completion.is_object()) {
     const std::string model = completion.value("model", "");
     const auto created = completion.value("created", 0);
+    const std::string assistant_content = extract_chat_text(completion);
     std::ostringstream oss;
     oss << build_sse_event({
             }
         }
+        if ((path == "/v1/chat/completions" || path == "/completion") && req.method() == http::verb::post) {
             if (auto auth_res = ensure_authenticated(Role::USER)) return *auth_res;
             json payload = json::parse(req.body(), nullptr, false);
             if (payload.is_discarded()) {
                 return json_response(http::status::bad_request, {{"error", "Invalid JSON"}});
             }
+            const bool completion_compat_mode = path == "/completion";
+            if (completion_compat_mode) {
+                payload = completion_payload_to_chat_payload(payload, config.limits);
+            }
             const bool stream_requested = request_stream_enabled(payload);
             if (stream_requested) {
                 payload["stream"] = false;
                 log_line("request_id=" + request_id +
                          " stream_requested=true mode=buffered_sse_fallback");
             }
+            if (completion_compat_mode) {
+                log_line("request_id=" + request_id + " completion_compat_mode=true");
+            }
             std::string token_error;
             auto estimate = estimate_chat_tokens(payload, config.limits, token_error);
             if (stream_requested && result.status >= 200 && result.status < 300) {
                 result.body = build_buffered_stream_response(result.body);
                 result.content_type = "text/event-stream; charset=utf-8";
+            } else if (completion_compat_mode && result.status >= 200 && result.status < 300) {
+                result.body = build_completion_compat_response(result.body);
+                result.content_type = "application/json";
             }
             res.set(http::field::content_type, result.content_type);
             res.set(http::field::server, "llm-manager");