Spaces:

AlbeRota
/

UnReflectAnything

Running on Zero

App Files Files Community

AlbeRota commited on 5 days ago

Commit

2c3f571

1 Parent(s): 571eb53

ZeroGPU loading fixed

Browse files

Files changed (1) hide show

app.py +50 -27

app.py CHANGED Viewed

@@ -108,62 +108,81 @@ _cached_device = None
 def _get_model(device: str):
-    """Return the pretrained model, loading it once and reusing."""
     global _cached_ura_model, _cached_device
     assets = _get_assets()
-    if _cached_ura_model is not None and _cached_device == device:
-        return _cached_ura_model
     from unreflectanything import model
-    _cached_ura_model = model(
-        pretrained=True,
-        weights_path=assets.weights_path,
-        # weights_path="/home/arota/UnReflectAnything/weights/full_model_weights.pt",
-        config_path=assets.config_path,
-        device=device,
-        verbose=False,
-        skip_path_resolution=True,
-    )
-    _cached_device = device
     return _cached_ura_model
 def build_ui():
     _get_assets()
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    # Start loading the model in the background so it is ready (or nearly ready) by first use.
-    print(f"Initializing model on {device}...")
-    _get_model(device)
     @spaces.GPU if spaces else lambda x: x
     def run_inference(image: np.ndarray | None) -> np.ndarray | None:
-        """Run reflection removal using the cached model. Returns RGB numpy [H,W,3] in 0–255 or None."""
         if image is None:
             return None
         from torchvision.transforms import functional as TF
         ura_model = _get_model(device)
         target_side = ura_model.image_size
-        # image: [H, W, 3] uint8 0–255
         h, w = image.shape[:2]
-        tensor = TF.to_tensor(image).unsqueeze(0)  # [1, 3, H, W], [0, 1]
         tensor = TF.resize(tensor, [target_side, target_side], antialias=True)
-        tensor = tensor.to(ura_model.device, dtype=torch.float32)
-        mask = tensor.mean(1, keepdim=True) > 0.9  # [1, 1, S, S]
-        import time
         with torch.no_grad():
             start_time = time.time()
             diffuse = ura_model(images=tensor, inpaint_mask_override=mask)
             end_time = time.time()
-        diffuse = diffuse.cpu()
         inference_time_ms = (end_time - start_time) * 1000
-        gr.Success(f"Inference time: {inference_time_ms:.1f} ms")
         diffuse = TF.resize(diffuse, [h, w], antialias=True)
         out = diffuse[0].numpy().transpose(1, 2, 0)
         out = (np.clip(out, 0.0, 1.0) * 255).astype(np.uint8)
         return out
     def run_inference_slider(
         image: np.ndarray | None,
     ) -> tuple[np.ndarray | None, np.ndarray | None] | None:
@@ -277,5 +296,9 @@ def _launch_with_allowed_paths(*args, **kwargs):
 demo.launch = _launch_with_allowed_paths
 if __name__ == "__main__":
-    demo.launch()

 def _get_model(device: str):
+    """Return the pretrained model, loading it once and moving to the requested device."""
     global _cached_ura_model, _cached_device
     assets = _get_assets()
     from unreflectanything import model
+    # If the model isn't loaded yet, initialize it
+    if _cached_ura_model is None:
+        print(f"Loading model initially on {device}...")
+        _cached_ura_model = model(
+            pretrained=True,
+            weights_path=assets.weights_path,
+            config_path=assets.config_path,
+            device=device,
+            verbose=False,
+            skip_path_resolution=True,
+        )
+        _cached_device = device
+    # If the model is loaded but on the wrong device, move it
+    if _cached_device != device:
+        print(f"Moving model from {_cached_device} to {device}...")
+        _cached_ura_model.to(device)
+        _cached_device = device
     return _cached_ura_model
 def build_ui():
     _get_assets()
+    # PREVENT: _get_model("cuda") here. It will crash ZeroGPU during startup.
+    print("UI building... Model will initialize on first inference.")
+    # Note: Use the decorator directly on the function that does the heavy lifting
     @spaces.GPU if spaces else lambda x: x
     def run_inference(image: np.ndarray | None) -> np.ndarray | None:
+        """Run reflection removal using the cached model on GPU."""
         if image is None:
             return None
         from torchvision.transforms import functional as TF
+        import time
+        # Now it is safe to request 'cuda' because we are inside the @spaces.GPU wrapper
+        device = "cuda" if (torch.cuda.is_available() and spaces) else "cpu"
         ura_model = _get_model(device)
         target_side = ura_model.image_size
         h, w = image.shape[:2]
+        # Pre-processing
+        tensor = TF.to_tensor(image).unsqueeze(0)  # [1, 3, H, W]
         tensor = TF.resize(tensor, [target_side, target_side], antialias=True)
+        tensor = tensor.to(device, dtype=torch.float32)
+        # Create mask based on highlights
+        mask = tensor.mean(1, keepdim=True) > 0.9
         with torch.no_grad():
             start_time = time.time()
+            # The model is already on 'device' thanks to _get_model
             diffuse = ura_model(images=tensor, inpaint_mask_override=mask)
             end_time = time.time()
         inference_time_ms = (end_time - start_time) * 1000
+        gr.Info(f"Inference complete in {inference_time_ms:.1f} ms") # Use gr.Info for better UX
+        # Post-processing
+        diffuse = diffuse.cpu()
         diffuse = TF.resize(diffuse, [h, w], antialias=True)
         out = diffuse[0].numpy().transpose(1, 2, 0)
         out = (np.clip(out, 0.0, 1.0) * 255).astype(np.uint8)
         return out
+    # ... keep your run_inference_slider and UI layout code the same ...
     def run_inference_slider(
         image: np.ndarray | None,
     ) -> tuple[np.ndarray | None, np.ndarray | None] | None:
 demo.launch = _launch_with_allowed_paths
+# Replace your existing launch logic at the very bottom of the file with this:
 if __name__ == "__main__":
+    demo.launch(ssr_mode=False, server_name="0.0.0.0", server_port=7860)
+else:
+    # This handles cases where Hugging Face imports the file
+    demo.launch(ssr_mode=False, server_name="0.0.0.0", server_port=7860)