Spaces:

AlbeRota
/

UnReflectAnything

Running on Zero

App Files Files Community

AlbeRota commited on 6 days ago

Commit

b604e51

1 Parent(s): 4ed98e6

Fix cach weights

Browse files

Files changed (9) hide show

.cache/configs/pretrained_config.yaml +59 -0
.gradio/certificate.pem +31 -0
app.py +173 -82
tmp/engine_initializers.log +0 -0
tmp/main.log +0 -0
tmp/models.log +0 -0
tmp/optimization.log +0 -0
tmp/rgbp.log +0 -0
tmp/run_resume.log +0 -0

.cache/configs/pretrained_config.yaml ADDED Viewed

	@@ -0,0 +1,59 @@

+### BASELINE: CONVERGES AFTER LONG
+parameters:
+  ### MODEL ARCHITECTURE
+  MODEL:
+    value:
+      MODEL_CLASS: "UnReflect_Model_TokenInpainter"  # Main model class name (must match class in models.py)
+      MODEL_MODULE: "models"  # Module name to import model classes from (default: "models")
+      RGB_ENCODER:
+        ENCODER: "facebook/dinov3-vitl16-pretrain-lvd1689m"  # DINOv3 encoder model name (HuggingFace format)
+        IMAGE_SIZE: 448  # Input image size (height and width in pixels)
+        RETURN_SELECTED_LAYERS: [3, 6, 9, 12]  # Transformer layer indices to extract features from (0-indexed)
+        RGB_ENCODER_LR: 0.0  # Learning rate for RGB encoder (0.0 = frozen, must be explicitly set)
+      DECODERS:
+        diffuse:
+          USE_FILM: False  # Enable FiLM (Feature-wise Linear Modulation) conditioning in decoder
+          FEATURE_DIM: 1024  # Feature dimension for decoder (should match encoder output)
+          REASSEMBLE_OUT_CHANNELS: [768,1024,1536,2048]  # Output channels for each decoder stage (DPT-style reassembly)
+          REASSEMBLE_FACTORS: [4.0, 2.0, 1.0, 0.5]  # Spatial upsampling factors for each stage
+          READOUT_TYPE: "ignore"  # Readout type for DPT decoder ("ignore", "project", etc.)
+          FROM_PRETRAINED: "weights/rgb_decoder.pth"  # Path to pretrained decoder weights (optional)
+          USE_BN: False  # Use batch normalization in decoder
+          DROPOUT: 0.1  # Dropout rate in decoder layers
+          OUTPUT_IMAGE_SIZE: [448,448]  # Output image resolution [height, width]
+          OUTPUT_CHANNELS: 3  # Number of output channels (3 for RGB diffuse image)
+          DECODER_LR: 1.0e-5 # Custom learning rate for decoder (0.0 = frozen, 1.0 = same as base LR)
+          NUM_FUSION_BLOCKS_TRAINABLE: 1  # Number of fusion blocks to train (0-4, null = train all if DECODER_LR != 0)
+          TRAIN_RGB_HEAD: True  # Whether to train RGB head (true/false, null = train if DECODER_LR != 0)
+        highlight:
+          USE_FILM: False  # Enable FiLM conditioning in highlight decoder
+          FEATURE_DIM: 1024  # Feature dimension for highlight decoder
+          REASSEMBLE_OUT_CHANNELS: [96,192,384,768]  # Output channels for each decoder stage
+          REASSEMBLE_FACTORS: [4.0, 2.0, 1.0, 0.5]  # Spatial upsampling factors for each stage
+          READOUT_TYPE: "ignore"  # Readout type for DPT decoder
+          USE_BN: False  # Use batch normalization in decoder
+          DROPOUT: 0.1  # Dropout rate in decoder layers
+          OUTPUT_IMAGE_SIZE: [448,448]  # Output image resolution [height, width]
+          OUTPUT_CHANNELS: 1  # Number of output channels (1 for highlight mask)
+          DECODER_LR: 5.0e-4  # Custom learning rate for decoder (0.0 = frozen, 1.0 = same as base LR)
+          NUM_FUSION_BLOCKS_TRAINABLE: null  # Number of fusion blocks to train (0-4, null = train all if DECODER_LR != 0)
+      TOKEN_INPAINTER:
+        TOKEN_INPAINTER_CLASS: "TokenInpainter_Prior"  # Token inpainter class name
+        TOKEN_INPAINTER_MODULE: "token_inpainters"  # Module name to import token inpainter from
+        FROM_PRETRAINED: "weights/token_inpainter.pth"  # Path to pretrained token inpainter weights
+        TOKEN_INPAINTER_LR: 1.0e-5  # Learning rate for token inpainter (can differ from base LR)
+        DEPTH: 6  # Number of transformer blocks
+        HEADS: 16  # Number of attention heads
+        DROP: 0 # Dropout rate
+        USE_POSITIONAL_ENCODING: True  # Enable 2D sinusoidal positional encodings
+        USE_FINAL_NORM: True  # Enable final LayerNorm before output projection
+        USE_LOCAL_PRIOR: True  # Blend local mean prior for masked seeds
+        LOCAL_PRIOR_WEIGHT: 0.5  # Weight for local prior blending (1.0 = only mask_token, 0.0 = only local mean)
+        LOCAL_PRIOR_KERNEL: 5  # Kernel size for local prior blending (> 1)
+        SEED_NOISE_STD: 0.02  # Standard deviation of noise added to masked seeds during training
+  INPAINT_MASK_DILATION:
+    value: 1  # Dilation kernel size (pixels) for inpaint mask - Must be odd
+  USE_TORCH_COMPILE:  # Enable PyTorch 2.0 torch.compile for faster training (experimental)
+    value: False

.gradio/certificate.pem ADDED Viewed

	@@ -0,0 +1,31 @@

+-----BEGIN CERTIFICATE-----
+MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
+TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
+cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
+WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
+ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
+MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
+h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
+0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
+A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
+T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
+B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
+B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
+KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
+OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
+jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
+qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
+rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
+HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
+hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
+ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
+3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
+NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
+ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
+TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
+jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
+oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
+4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
+mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
+emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
+-----END CERTIFICATE-----

app.py CHANGED Viewed

@@ -3,7 +3,7 @@
 from __future__ import annotations
 import sys
-import tempfile
 from pathlib import Path
 # Allow importing unreflectanything when run from gradio_space (e.g. HF Space with root dir)
@@ -11,112 +11,203 @@ _REPO_ROOT = Path(__file__).resolve().parent.parent
 if _REPO_ROOT not in sys.path:
     sys.path.insert(0, str(_REPO_ROOT))
 import gradio as gr
 import numpy as np
 import torch
 def _ensure_weights():
     """Download weights to cache if not present."""
     from unreflectanything import download
-    from unreflectanything._shared import DEFAULT_WEIGHTS_FILENAME, get_cache_dir
-    weights_dir = get_cache_dir("weights")
-    if not (weights_dir / DEFAULT_WEIGHTS_FILENAME).exists():
-        download("weights")
-def run_inference(
-    image: np.ndarray | None,
-    brightness_threshold: float,
-) -> np.ndarray | None:
-    """Run reflection removal on a single image. Returns RGB numpy [H,W,3] in 0–255 or None."""
-    if image is None:
-        return None
-    from unreflectanything import inference
     device = "cuda" if torch.cuda.is_available() else "cpu"
-    with tempfile.TemporaryDirectory() as tmpdir:
-        inp_path = Path(tmpdir) / "input.png"
-        out_path = Path(tmpdir) / "output.png"
-        # Gradio passes RGB numpy (H, W, 3) in 0–255
-        from PIL import Image
-        Image.fromarray(image.astype(np.uint8)).save(inp_path)
-        try:
-            result = inference(
-                input=str(inp_path),
-                output=None,
-                device=device,
-                batch_size=1,
-                brightness_threshold=brightness_threshold,
-                resize_output=True,
-                verbose=False,
-            )
-        except FileNotFoundError as e:
-            if "Weights not found" in str(e) or "Run 'unreflect download" in str(e):
-                _ensure_weights()
-                result = inference(
-                    input=str(inp_path),
-                    output=None,
-                    device=device,
-                    batch_size=1,
-                    brightness_threshold=brightness_threshold,
-                    resize_output=True,
-                    verbose=False,
                 )
-            else:
-                raise
-    # result: [1, 3, H, W], float 0–1
-    out = result[0].cpu().numpy().transpose(1, 2, 0)
-    out = (np.clip(out, 0.0, 1.0) * 255).astype(np.uint8)
-    return out
-def build_ui():
-    _ensure_weights()
-    with gr.Blocks(
-        title="UnReflectAnything",
-        theme=gr.themes.Soft(primary_hue="green", secondary_hue="purple"),
-    ) as demo:
-        gr.Markdown(
-            """
-            # UnReflectAnything
-            Remove **specular reflections** from a single image. Upload an image and adjust the highlight threshold if needed.
-            """
-        )
         with gr.Row():
             inp = gr.Image(
-                label="Input image",
                 type="numpy",
-                height=360,
             )
-            out = gr.Image(
-                label="Reflection‑removed (diffuse)",
                 type="numpy",
-                height=360,
             )
-        brightness = gr.Slider(
-            minimum=0.0,
-            maximum=1.0,
-            value=0.8,
-            step=0.05,
-            label="Brightness threshold (highlight detection)",
-        )
-        run_btn = gr.Button("Remove reflections", variant="primary")
         run_btn.click(
-            fn=run_inference,
-            inputs=[inp, brightness],
-            outputs=out,
-        )
-        gr.Markdown(
-            "Weights are cached after first run. On CPU inference may be slow."
         )
     return demo
 demo = build_ui()
 if __name__ == "__main__":
-    demo.launch()

 from __future__ import annotations
 import sys
+import threading
 from pathlib import Path
 # Allow importing unreflectanything when run from gradio_space (e.g. HF Space with root dir)
 if _REPO_ROOT not in sys.path:
     sys.path.insert(0, str(_REPO_ROOT))
+# Logo path: put your PNG in gradio_space/logo.png (next to app.py)
+_GRADIO_DIR = Path(__file__).resolve().parent
 import gradio as gr
 import numpy as np
 import torch
+from huggingface_hub import hf_hub_download
 def _ensure_weights():
     """Download weights to cache if not present."""
+    weights_path = hf_hub_download(
+        repo_id="AlbeRota/UnReflectAnything",
+        filename="weights/full_model_weights.pt"
+    )
+    config_path = hf_hub_download(
+        repo_id="AlbeRota/UnReflectAnything",
+        filename="configs/pretrained_config.yaml"
+    )
+    return weights_path, config_path
+def _ensure_sample_images() -> Path | None:
+    """Ensure sample images are downloaded to the standard cache dir and return it.
+    Uses the same cache layout as the rest of the library:
+    get_cache_dir("images") / <files>.
+    """
     from unreflectanything import download
+    from unreflectanything._shared import get_cache_dir
+    images_dir = get_cache_dir("images")
+    if not images_dir.is_dir():
+        try:
+            download("images")
+        except Exception:
+            return None
+    return images_dir
+def _get_sample_images():
+    """Return list of sample image paths from the images cache directory."""
+    from unreflectanything._shared import DEFAULT_IMAGE_EXTENSIONS
+    images_dir = _ensure_sample_images()
+    if images_dir is None or not images_dir.is_dir():
+        return []
+    paths = []
+    for p in sorted(images_dir.iterdir()):
+        if p.is_file() and p.suffix.lower() in DEFAULT_IMAGE_EXTENSIONS:
+            paths.append(str(p))
+    return paths
+# Single model instance; loaded in background at app start or on first inference.
+_cached_ura_model = None
+_cached_device = None
+_model_load_lock = threading.Lock()
+def _get_model(device: str):
+    """Return the pretrained model, loading it once and reusing. Ensures weights exist (downloads if missing)."""
+    global _cached_ura_model, _cached_device
+    weights_path, config_path = _ensure_weights()
+    with _model_load_lock:
+        if _cached_ura_model is not None and _cached_device == device:
+            return _cached_ura_model
+        from unreflectanything import model
+        _cached_ura_model = model(
+            pretrained=True,
+            # weights_path=os.path.join(os.path.dirname(__file__), ".cache", "weights", "full_model_weights.pt"),
+            # config_path=os.path.join(os.path.dirname(__file__), ".cache", "configs", "pretrained_config.yaml"),
+            weights_path=weights_path,
+            config_path=config_path,
+            device=device,
+            verbose=False,
+        )
+        _cached_device = device
+        return _cached_ura_model
+def build_ui():
+    _ensure_sample_images()
     device = "cuda" if torch.cuda.is_available() else "cpu"
+    # Start loading the model in the background so it is ready (or nearly ready) by first use.
+    threading.Thread(target=_get_model, args=(device,), daemon=True).start()
+    def run_inference(image: np.ndarray | None) -> np.ndarray | None:
+        """Run reflection removal using the cached model. Returns RGB numpy [H,W,3] in 0–255 or None."""
+        if image is None:
+            return None
+        from torchvision.transforms import functional as TF
+        ura_model = _get_model(device)
+        target_side = ura_model.image_size
+        # image: [H, W, 3] uint8 0–255
+        h, w = image.shape[:2]
+        tensor = TF.to_tensor(image).unsqueeze(0)  # [1, 3, H, W], [0, 1]
+        tensor = TF.resize(tensor, [target_side, target_side], antialias=True)
+        tensor = tensor.to(ura_model.device, dtype=torch.float32)
+        mask = tensor.mean(1, keepdim=True) > 0.9  # [1, 1, S, S]
+        with torch.no_grad():
+            diffuse = ura_model(images=tensor, inpaint_mask_override=mask)
+        diffuse = diffuse.cpu()
+        diffuse = TF.resize(diffuse, [h, w], antialias=True)
+        out = diffuse[0].numpy().transpose(1, 2, 0)
+        out = (np.clip(out, 0.0, 1.0) * 255).astype(np.uint8)
+        return out
+    def run_inference_slider(
+        image: np.ndarray | None,
+    ) -> tuple[np.ndarray | None, np.ndarray | None] | None:
+        """Run inference and return (input, output) for ImageSlider."""
+        out = run_inference(image)
+        if out is None:
+            return None
+        return (image, out)
+    with gr.Blocks(title="UnReflectAnything") as demo:
+        with gr.Row():
+            with gr.Column(scale=0, min_width=100):
+                # if LOGO_PATH.is_file():
+                #     gr.Image(
+                #         value=str(LOGO_PATH),
+                #         show_label=False,
+                #         interactive=False,
+                #         height=100,
+                #         container=False,
+                #         buttons=[],
+                #     )
+            with gr.Column(scale=1):
+                gr.Markdown(
+                    """
+                    # UnReflectAnything
+                    UnReflectAnything inputs any RGB image and **removes specular highlights**,
+                    returning a clean diffuse-only outputs. We trained UnReflectAnything by synthetizing
+                    specularities and supervising in DINOv3 feature space.
+                    UnReflectAnything works on both natural indoor and **surgical/endoscopic** domain data.
+                    Visit the [Project Page](https://alberto-rota.github.io/UnReflectAnything/)!
+                    """
                 )
         with gr.Row():
             inp = gr.Image(
                 type="numpy",
+                label="Image input",
+                height=600,
+                width=600,
             )
+            out_slider = gr.ImageSlider(
+                label="Input",
                 type="numpy",
+                height=600,
+                show_label=True,
             )
+        run_btn = gr.Button("Run UnReflectAnything", variant="primary")
         run_btn.click(
+            fn=run_inference_slider,
+            inputs=[inp],
+            outputs=out_slider,
         )
+        sample_paths = _get_sample_images()
+        if sample_paths:
+            gr.Examples(
+                examples=[[p] for p in sample_paths],
+                inputs=inp,
+                label="Pre-loaded examples",
+                examples_per_page=20,
+            )
+        gr.HTML("""<hr>""")
+        gr.Markdown("""
+                    [Project Page](https://alberto-rota.github.io/UnReflectAnything/) ⋅
+                    [GitHub](https://github.com/alberto-rota/UnReflectAnything) ⋅
+                    [Model Card](https://huggingface.co/AlbeRota/UnReflectAnything) ⋅
+                    [Paper](https://arxiv.org/abs/2512.09583) ⋅
+                    [Contact](mailto:alberto1.rota@polimi.it) ⋅
+                    """)
     return demo
 demo = build_ui()
+def _launch_allowed_paths():
+    """Paths Gradio is allowed to serve (e.g. for gr.Examples from cache)."""
+    from unreflectanything._shared import get_cache_dir
+    paths = [str(_GRADIO_DIR)]
+    images_cache = get_cache_dir("images")
+    if images_cache.is_dir():
+        paths.append(str(images_cache))
+    return paths
 if __name__ == "__main__":
+    demo.launch(
+        share=True,
+        allowed_paths=_launch_allowed_paths(),
+        theme=gr.themes.Soft(primary_hue="orange", secondary_hue="blue"),
+    )

tmp/engine_initializers.log ADDED Viewed

File without changes

tmp/main.log ADDED Viewed

File without changes

tmp/models.log ADDED Viewed

File without changes

tmp/optimization.log ADDED Viewed

File without changes

tmp/rgbp.log ADDED Viewed

File without changes

tmp/run_resume.log ADDED Viewed

File without changes