Dmitry Beresnev commited on
Commit
8ef326a
·
1 Parent(s): a97386f

add new endpoint to cancel all processing prompts

Browse files
Files changed (1) hide show
  1. cpp/llm_manager.cpp +65 -0
cpp/llm_manager.cpp CHANGED
@@ -145,6 +145,54 @@ public:
145
  return true;
146
  }
147
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  std::optional<WorkerInfo> active_worker() {
149
  std::lock_guard<std::mutex> lock(_mu);
150
  if (_active && is_alive(_active->pid)) return _active;
@@ -404,6 +452,23 @@ http::response<http::string_body> handle_request(
404
  return json_response(http::status::ok, state);
405
  }
406
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
407
  if (path == "/v1/chat/completions" && req.method() == http::verb::post) {
408
  auto worker = manager.active_worker();
409
  if (!worker) {
 
145
  return true;
146
  }
147
 
148
+ bool restart_active(std::string &error) {
149
+ std::optional<WorkerInfo> old_worker;
150
+ std::string model;
151
+ {
152
+ std::lock_guard<std::mutex> lock(_mu);
153
+ if (_switch_in_progress) {
154
+ error = "Switch already in progress";
155
+ return false;
156
+ }
157
+ if (!_active || !is_alive(_active->pid)) {
158
+ error = "No active model";
159
+ return false;
160
+ }
161
+ _switch_in_progress = true;
162
+ old_worker = _active;
163
+ model = _active->model;
164
+ }
165
+
166
+ shutdown_worker(old_worker->pid);
167
+
168
+ int port = allocate_port();
169
+ pid_t pid = spawn_worker(model, port);
170
+ if (pid <= 0) {
171
+ std::lock_guard<std::mutex> lock(_mu);
172
+ _active = std::nullopt;
173
+ _switch_in_progress = false;
174
+ error = "Failed to start worker process";
175
+ return false;
176
+ }
177
+
178
+ if (!wait_until_ready(pid, port, _switch_timeout_sec)) {
179
+ shutdown_worker(pid);
180
+ std::lock_guard<std::mutex> lock(_mu);
181
+ _active = std::nullopt;
182
+ _switch_in_progress = false;
183
+ error = "New model did not become ready in time";
184
+ return false;
185
+ }
186
+
187
+ WorkerInfo new_worker{model, port, pid, now_utc_iso()};
188
+ {
189
+ std::lock_guard<std::mutex> lock(_mu);
190
+ _active = new_worker;
191
+ _switch_in_progress = false;
192
+ }
193
+ return true;
194
+ }
195
+
196
  std::optional<WorkerInfo> active_worker() {
197
  std::lock_guard<std::mutex> lock(_mu);
198
  if (_active && is_alive(_active->pid)) return _active;
 
452
  return json_response(http::status::ok, state);
453
  }
454
 
455
+ if (path == "/stop" && req.method() == http::verb::post) {
456
+ std::string err;
457
+ bool ok = manager.restart_active(err);
458
+ if (!ok) {
459
+ http::status status = http::status::internal_server_error;
460
+ if (err == "Switch already in progress") {
461
+ status = http::status::conflict;
462
+ } else if (err == "No active model") {
463
+ status = http::status::service_unavailable;
464
+ }
465
+ return json_response(status, {{"status", "error"}, {"error", err}});
466
+ }
467
+ auto state = manager.models_view();
468
+ state["message"] = "Stopped in-flight prompts and restarted model";
469
+ return json_response(http::status::ok, state);
470
+ }
471
+
472
  if (path == "/v1/chat/completions" && req.method() == http::verb::post) {
473
  auto worker = manager.active_worker();
474
  if (!worker) {