Spaces:

ResearchEngineering
/

AGI

Sleeping

App Files Files Community

Dmitry Beresnev commited on Mar 14

Commit

8ef326a

1 Parent(s): a97386f

add new endpoint to cancel all processing prompts

Browse files

Files changed (1) hide show

cpp/llm_manager.cpp +65 -0

cpp/llm_manager.cpp CHANGED Viewed

@@ -145,6 +145,54 @@ public:
         return true;
     }
     std::optional<WorkerInfo> active_worker() {
         std::lock_guard<std::mutex> lock(_mu);
         if (_active && is_alive(_active->pid)) return _active;
@@ -404,6 +452,23 @@ http::response<http::string_body> handle_request(
             return json_response(http::status::ok, state);
         }
         if (path == "/v1/chat/completions" && req.method() == http::verb::post) {
             auto worker = manager.active_worker();
             if (!worker) {

         return true;
     }
+    bool restart_active(std::string &error) {
+        std::optional<WorkerInfo> old_worker;
+        std::string model;
+        {
+            std::lock_guard<std::mutex> lock(_mu);
+            if (_switch_in_progress) {
+                error = "Switch already in progress";
+                return false;
+            }
+            if (!_active || !is_alive(_active->pid)) {
+                error = "No active model";
+                return false;
+            }
+            _switch_in_progress = true;
+            old_worker = _active;
+            model = _active->model;
+        }
+        shutdown_worker(old_worker->pid);
+        int port = allocate_port();
+        pid_t pid = spawn_worker(model, port);
+        if (pid <= 0) {
+            std::lock_guard<std::mutex> lock(_mu);
+            _active = std::nullopt;
+            _switch_in_progress = false;
+            error = "Failed to start worker process";
+            return false;
+        }
+        if (!wait_until_ready(pid, port, _switch_timeout_sec)) {
+            shutdown_worker(pid);
+            std::lock_guard<std::mutex> lock(_mu);
+            _active = std::nullopt;
+            _switch_in_progress = false;
+            error = "New model did not become ready in time";
+            return false;
+        }
+        WorkerInfo new_worker{model, port, pid, now_utc_iso()};
+        {
+            std::lock_guard<std::mutex> lock(_mu);
+            _active = new_worker;
+            _switch_in_progress = false;
+        }
+        return true;
+    }
     std::optional<WorkerInfo> active_worker() {
         std::lock_guard<std::mutex> lock(_mu);
         if (_active && is_alive(_active->pid)) return _active;
             return json_response(http::status::ok, state);
         }
+        if (path == "/stop" && req.method() == http::verb::post) {
+            std::string err;
+            bool ok = manager.restart_active(err);
+            if (!ok) {
+                http::status status = http::status::internal_server_error;
+                if (err == "Switch already in progress") {
+                    status = http::status::conflict;
+                } else if (err == "No active model") {
+                    status = http::status::service_unavailable;
+                }
+                return json_response(status, {{"status", "error"}, {"error", err}});
+            }
+            auto state = manager.models_view();
+            state["message"] = "Stopped in-flight prompts and restarted model";
+            return json_response(http::status::ok, state);
+        }
         if (path == "/v1/chat/completions" && req.method() == http::verb::post) {
             auto worker = manager.active_worker();
             if (!worker) {