Dmitry Beresnev commited on
Commit
677456b
·
1 Parent(s): 6379bd0

Fix 400 for llama.cpp web UI completion requests

Browse files
Files changed (1) hide show
  1. cpp/server.cpp +101 -11
cpp/server.cpp CHANGED
@@ -21,6 +21,95 @@ static std::string build_sse_event(const json &payload) {
21
  return "data: " + payload.dump() + "\n\n";
22
  }
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  static std::string build_buffered_stream_response(const std::string &completion_body) {
25
  json completion = json::parse(completion_body, nullptr, false);
26
  if (completion.is_discarded() || !completion.is_object()) {
@@ -31,16 +120,7 @@ static std::string build_buffered_stream_response(const std::string &completion_
31
  const std::string model = completion.value("model", "");
32
  const auto created = completion.value("created", 0);
33
 
34
- std::string assistant_content;
35
- if (completion.contains("choices") && completion["choices"].is_array() && !completion["choices"].empty()) {
36
- const auto &choice = completion["choices"][0];
37
- if (choice.is_object() && choice.contains("message") && choice["message"].is_object()) {
38
- const auto &message = choice["message"];
39
- if (message.contains("content") && message["content"].is_string()) {
40
- assistant_content = message["content"].get<std::string>();
41
- }
42
- }
43
- }
44
 
45
  std::ostringstream oss;
46
  oss << build_sse_event({
@@ -258,18 +338,25 @@ http::response<http::string_body> handle_request(
258
  }
259
  }
260
 
261
- if (path == "/v1/chat/completions" && req.method() == http::verb::post) {
262
  if (auto auth_res = ensure_authenticated(Role::USER)) return *auth_res;
263
  json payload = json::parse(req.body(), nullptr, false);
264
  if (payload.is_discarded()) {
265
  return json_response(http::status::bad_request, {{"error", "Invalid JSON"}});
266
  }
 
 
 
 
267
  const bool stream_requested = request_stream_enabled(payload);
268
  if (stream_requested) {
269
  payload["stream"] = false;
270
  log_line("request_id=" + request_id +
271
  " stream_requested=true mode=buffered_sse_fallback");
272
  }
 
 
 
273
 
274
  std::string token_error;
275
  auto estimate = estimate_chat_tokens(payload, config.limits, token_error);
@@ -338,6 +425,9 @@ http::response<http::string_body> handle_request(
338
  if (stream_requested && result.status >= 200 && result.status < 300) {
339
  result.body = build_buffered_stream_response(result.body);
340
  result.content_type = "text/event-stream; charset=utf-8";
 
 
 
341
  }
342
  res.set(http::field::content_type, result.content_type);
343
  res.set(http::field::server, "llm-manager");
 
21
  return "data: " + payload.dump() + "\n\n";
22
  }
23
 
24
+ static std::string extract_chat_text(const json &completion) {
25
+ if (!completion.is_object()) return "";
26
+ if (!completion.contains("choices") || !completion["choices"].is_array() || completion["choices"].empty()) {
27
+ return "";
28
+ }
29
+ const auto &choice = completion["choices"][0];
30
+ if (!choice.is_object()) return "";
31
+ if (choice.contains("message") && choice["message"].is_object()) {
32
+ const auto &message = choice["message"];
33
+ if (message.contains("content") && message["content"].is_string()) {
34
+ return message["content"].get<std::string>();
35
+ }
36
+ }
37
+ if (choice.contains("text") && choice["text"].is_string()) {
38
+ return choice["text"].get<std::string>();
39
+ }
40
+ return "";
41
+ }
42
+
43
+ static json completion_payload_to_chat_payload(const json &payload, const LimitsConfig &limits) {
44
+ json chat_payload = json::object();
45
+ chat_payload["messages"] = json::array();
46
+
47
+ if (payload.contains("prompt")) {
48
+ if (payload["prompt"].is_string()) {
49
+ chat_payload["messages"].push_back({
50
+ {"role", "user"},
51
+ {"content", payload["prompt"].get<std::string>()}
52
+ });
53
+ } else if (payload["prompt"].is_array()) {
54
+ std::string joined_prompt;
55
+ bool first = true;
56
+ for (const auto &item : payload["prompt"]) {
57
+ if (!item.is_string()) continue;
58
+ if (!first) joined_prompt += "\n";
59
+ joined_prompt += item.get<std::string>();
60
+ first = false;
61
+ }
62
+ chat_payload["messages"].push_back({
63
+ {"role", "user"},
64
+ {"content", joined_prompt}
65
+ });
66
+ }
67
+ }
68
+
69
+ int normalized_max_tokens = limits.default_max_tokens;
70
+ if (payload.contains("n_predict") && payload["n_predict"].is_number_integer()) {
71
+ normalized_max_tokens = payload["n_predict"].get<int>();
72
+ } else if (payload.contains("max_tokens") && payload["max_tokens"].is_number_integer()) {
73
+ normalized_max_tokens = payload["max_tokens"].get<int>();
74
+ }
75
+ if (normalized_max_tokens <= 0) {
76
+ normalized_max_tokens = limits.default_max_tokens;
77
+ }
78
+ chat_payload["max_tokens"] = normalized_max_tokens;
79
+
80
+ if (chat_payload["messages"].empty()) {
81
+ chat_payload["messages"].push_back({
82
+ {"role", "user"},
83
+ {"content", ""}
84
+ });
85
+ }
86
+
87
+ if (payload.contains("temperature")) chat_payload["temperature"] = payload["temperature"];
88
+ if (payload.contains("top_p")) chat_payload["top_p"] = payload["top_p"];
89
+ if (payload.contains("top_k")) chat_payload["top_k"] = payload["top_k"];
90
+ if (payload.contains("stop")) chat_payload["stop"] = payload["stop"];
91
+ if (payload.contains("stream")) chat_payload["stream"] = payload["stream"];
92
+ return chat_payload;
93
+ }
94
+
95
+ static std::string build_completion_compat_response(const std::string &completion_body) {
96
+ json completion = json::parse(completion_body, nullptr, false);
97
+ if (completion.is_discarded() || !completion.is_object()) {
98
+ return completion_body;
99
+ }
100
+
101
+ json out = {
102
+ {"content", extract_chat_text(completion)}
103
+ };
104
+
105
+ if (completion.contains("stop")) out["stop"] = completion["stop"];
106
+ if (completion.contains("stopped_eos")) out["stopped_eos"] = completion["stopped_eos"];
107
+ if (completion.contains("stopped_limit")) out["stopped_limit"] = completion["stopped_limit"];
108
+ if (completion.contains("tokens_predicted")) out["tokens_predicted"] = completion["tokens_predicted"];
109
+ if (completion.contains("tokens_evaluated")) out["tokens_evaluated"] = completion["tokens_evaluated"];
110
+ return out.dump();
111
+ }
112
+
113
  static std::string build_buffered_stream_response(const std::string &completion_body) {
114
  json completion = json::parse(completion_body, nullptr, false);
115
  if (completion.is_discarded() || !completion.is_object()) {
 
120
  const std::string model = completion.value("model", "");
121
  const auto created = completion.value("created", 0);
122
 
123
+ const std::string assistant_content = extract_chat_text(completion);
 
 
 
 
 
 
 
 
 
124
 
125
  std::ostringstream oss;
126
  oss << build_sse_event({
 
338
  }
339
  }
340
 
341
+ if ((path == "/v1/chat/completions" || path == "/completion") && req.method() == http::verb::post) {
342
  if (auto auth_res = ensure_authenticated(Role::USER)) return *auth_res;
343
  json payload = json::parse(req.body(), nullptr, false);
344
  if (payload.is_discarded()) {
345
  return json_response(http::status::bad_request, {{"error", "Invalid JSON"}});
346
  }
347
+ const bool completion_compat_mode = path == "/completion";
348
+ if (completion_compat_mode) {
349
+ payload = completion_payload_to_chat_payload(payload, config.limits);
350
+ }
351
  const bool stream_requested = request_stream_enabled(payload);
352
  if (stream_requested) {
353
  payload["stream"] = false;
354
  log_line("request_id=" + request_id +
355
  " stream_requested=true mode=buffered_sse_fallback");
356
  }
357
+ if (completion_compat_mode) {
358
+ log_line("request_id=" + request_id + " completion_compat_mode=true");
359
+ }
360
 
361
  std::string token_error;
362
  auto estimate = estimate_chat_tokens(payload, config.limits, token_error);
 
425
  if (stream_requested && result.status >= 200 && result.status < 300) {
426
  result.body = build_buffered_stream_response(result.body);
427
  result.content_type = "text/event-stream; charset=utf-8";
428
+ } else if (completion_compat_mode && result.status >= 200 && result.status < 300) {
429
+ result.body = build_completion_compat_response(result.body);
430
+ result.content_type = "application/json";
431
  }
432
  res.set(http::field::content_type, result.content_type);
433
  res.set(http::field::server, "llm-manager");