Spaces:

multimodalart
/

rtx6000test

Running on RTX PRO 6000

multimodalart HF Staff Claude Opus 4.8 (1M context) commited on Jun 1

Commit

9dbbb30

1 Parent(s): 2eeb78f

Add in-process FastVideo executor (no worker spawn) for ZeroGPU

inproc.py: InProcessExecutor builds the pipeline in-process (build_pipeline) and
calls pipeline.forward directly, removing the spawned worker whose CUDA init
bypasses ZeroGPU's spaces hijack. Validated locally: in-process, AOTI+text-cache
work, 2.9s warm.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

Files changed (1) hide show

inproc.py +83 -0

inproc.py ADDED Viewed

	@@ -0,0 +1,83 @@

+"""Run FastVideo's pipeline IN-PROCESS (no spawned worker) — for ZeroGPU.
+FastVideo's `VideoGenerator` always spawns a worker subprocess
+(MultiprocExecutor), whose CUDA init + `.to("cuda")` happen in a separate torch
+that ZeroGPU's `spaces` hijack never sees, and which grabs a GPU outside any
+`@spaces.GPU` window. That's incompatible with ZeroGPU.
+This swaps in an in-process Executor: it builds a `Worker` (→ `build_pipeline`)
+in the SAME process and calls `pipeline.forward` directly. All of
+VideoGenerator's request→ForwardBatch translation is reused unchanged; only the
+execution backend changes. Combined with lazy init inside `@spaces.GPU`, the
+whole pipeline lives in the GPU-allocated process — the ZeroGPU shape.
+`install()` monkeypatches `Executor.get_class` to return this backend.
+"""
+from __future__ import annotations
+import os
+from typing import Any
+ENABLED = os.getenv("DREAMVERSE_INPROC", "1") == "1"
+def install():
+    if not ENABLED:
+        return
+    try:
+        from fastvideo.worker.executor import Executor
+        from fastvideo.worker.gpu_worker import Worker
+    except Exception as e:
+        print(f"[inproc] fastvideo not importable here ({e}); skipping", flush=True)
+        return
+    if getattr(Executor, "_inproc_patched", False):
+        return
+    class InProcessExecutor(Executor):
+        def _init_executor(self) -> None:
+            os.environ.setdefault("RANK", "0")
+            os.environ.setdefault("LOCAL_RANK", "0")
+            os.environ.setdefault("WORLD_SIZE", "1")
+            os.environ.setdefault("MASTER_ADDR", "127.0.0.1")
+            os.environ.setdefault("MASTER_PORT", "29591")
+            self.worker = Worker(self.fastvideo_args, local_rank=0, rank=0,
+                                 distributed_init_method="env://")
+            self.worker.init_device()  # maybe_init_distributed + build_pipeline (in-process)
+            print("[inproc] pipeline built in-process (no worker subprocess)", flush=True)
+        # Override the collective path: call the worker method directly.
+        def execute_forward(self, forward_batch, fastvideo_args):
+            return self.worker.execute_forward(forward_batch, fastvideo_args)
+        def collective_rpc(self, method: str, timeout=None, args=(), kwargs=None) -> list[Any]:
+            return [getattr(self.worker, method)(*args, **(kwargs or {}))]
+        def set_lora_adapter(self, lora_nickname: str, lora_path: str | None = None) -> None:
+            self.worker.set_lora_adapter(lora_nickname, lora_path)
+        def unmerge_lora_weights(self) -> None:
+            self.worker.unmerge_lora_weights()
+        def merge_lora_weights(self) -> None:
+            self.worker.merge_lora_weights()
+        def set_log_queue(self, log_queue) -> None:
+            pass
+        def clear_log_queue(self) -> None:
+            pass
+        def shutdown(self) -> None:
+            try:
+                self.worker.shutdown()
+            except Exception:
+                pass
+    _orig = Executor.get_class.__func__ if hasattr(Executor.get_class, "__func__") else None
+    @staticmethod
+    def _patched_get_class(fastvideo_args):
+        return InProcessExecutor
+    Executor.get_class = _patched_get_class
+    Executor._inproc_patched = True
+    print("[inproc] installed in-process executor (no spawn)", flush=True)