Spaces:
Running
Running
Dmitry Beresnev commited on
Commit ·
8ef326a
1
Parent(s): a97386f
add new endpoint to cancel all processing prompts
Browse files- cpp/llm_manager.cpp +65 -0
cpp/llm_manager.cpp
CHANGED
|
@@ -145,6 +145,54 @@ public:
|
|
| 145 |
return true;
|
| 146 |
}
|
| 147 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
std::optional<WorkerInfo> active_worker() {
|
| 149 |
std::lock_guard<std::mutex> lock(_mu);
|
| 150 |
if (_active && is_alive(_active->pid)) return _active;
|
|
@@ -404,6 +452,23 @@ http::response<http::string_body> handle_request(
|
|
| 404 |
return json_response(http::status::ok, state);
|
| 405 |
}
|
| 406 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 407 |
if (path == "/v1/chat/completions" && req.method() == http::verb::post) {
|
| 408 |
auto worker = manager.active_worker();
|
| 409 |
if (!worker) {
|
|
|
|
| 145 |
return true;
|
| 146 |
}
|
| 147 |
|
| 148 |
+
bool restart_active(std::string &error) {
|
| 149 |
+
std::optional<WorkerInfo> old_worker;
|
| 150 |
+
std::string model;
|
| 151 |
+
{
|
| 152 |
+
std::lock_guard<std::mutex> lock(_mu);
|
| 153 |
+
if (_switch_in_progress) {
|
| 154 |
+
error = "Switch already in progress";
|
| 155 |
+
return false;
|
| 156 |
+
}
|
| 157 |
+
if (!_active || !is_alive(_active->pid)) {
|
| 158 |
+
error = "No active model";
|
| 159 |
+
return false;
|
| 160 |
+
}
|
| 161 |
+
_switch_in_progress = true;
|
| 162 |
+
old_worker = _active;
|
| 163 |
+
model = _active->model;
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
+
shutdown_worker(old_worker->pid);
|
| 167 |
+
|
| 168 |
+
int port = allocate_port();
|
| 169 |
+
pid_t pid = spawn_worker(model, port);
|
| 170 |
+
if (pid <= 0) {
|
| 171 |
+
std::lock_guard<std::mutex> lock(_mu);
|
| 172 |
+
_active = std::nullopt;
|
| 173 |
+
_switch_in_progress = false;
|
| 174 |
+
error = "Failed to start worker process";
|
| 175 |
+
return false;
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
+
if (!wait_until_ready(pid, port, _switch_timeout_sec)) {
|
| 179 |
+
shutdown_worker(pid);
|
| 180 |
+
std::lock_guard<std::mutex> lock(_mu);
|
| 181 |
+
_active = std::nullopt;
|
| 182 |
+
_switch_in_progress = false;
|
| 183 |
+
error = "New model did not become ready in time";
|
| 184 |
+
return false;
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
WorkerInfo new_worker{model, port, pid, now_utc_iso()};
|
| 188 |
+
{
|
| 189 |
+
std::lock_guard<std::mutex> lock(_mu);
|
| 190 |
+
_active = new_worker;
|
| 191 |
+
_switch_in_progress = false;
|
| 192 |
+
}
|
| 193 |
+
return true;
|
| 194 |
+
}
|
| 195 |
+
|
| 196 |
std::optional<WorkerInfo> active_worker() {
|
| 197 |
std::lock_guard<std::mutex> lock(_mu);
|
| 198 |
if (_active && is_alive(_active->pid)) return _active;
|
|
|
|
| 452 |
return json_response(http::status::ok, state);
|
| 453 |
}
|
| 454 |
|
| 455 |
+
if (path == "/stop" && req.method() == http::verb::post) {
|
| 456 |
+
std::string err;
|
| 457 |
+
bool ok = manager.restart_active(err);
|
| 458 |
+
if (!ok) {
|
| 459 |
+
http::status status = http::status::internal_server_error;
|
| 460 |
+
if (err == "Switch already in progress") {
|
| 461 |
+
status = http::status::conflict;
|
| 462 |
+
} else if (err == "No active model") {
|
| 463 |
+
status = http::status::service_unavailable;
|
| 464 |
+
}
|
| 465 |
+
return json_response(status, {{"status", "error"}, {"error", err}});
|
| 466 |
+
}
|
| 467 |
+
auto state = manager.models_view();
|
| 468 |
+
state["message"] = "Stopped in-flight prompts and restarted model";
|
| 469 |
+
return json_response(http::status::ok, state);
|
| 470 |
+
}
|
| 471 |
+
|
| 472 |
if (path == "/v1/chat/completions" && req.method() == http::verb::post) {
|
| 473 |
auto worker = manager.active_worker();
|
| 474 |
if (!worker) {
|