Spaces:

LogicGoInfotechSpaces
/

blure_remover

Running

itishalogicgo commited on 3 days ago

Commit

3edaa28

1 Parent(s): b5a0978

Fix weak deblurring: add tile-based inference for large images

NAFNetLocal's channel attention uses local pooling calibrated for 256x256.
For larger images, the attention kernel covers only a fraction of each
feature map, making the residual near-zero (output = input).

Fix: process large images in overlapping 256x256 tiles so each tile gets
proper global channel attention matching the training behavior. Small
images (<= 256px) still use a single forward pass.

Files changed (1) hide show

app.py +66 -12

app.py CHANGED Viewed

@@ -107,6 +107,69 @@ def _unsharp_mask(img: np.ndarray, amount: float) -> np.ndarray:
     return np.clip(sharpened, 0, 255).astype(np.uint8)
 def deblur(image: np.ndarray, strength: float, sharpen: float):
     if image is None:
         raise gr.Error("Please upload an image.")
@@ -130,21 +193,12 @@ def deblur(image: np.ndarray, strength: float, sharpen: float):
     inp = _img2tensor_rgb(img_input)
     try:
-        model.feed_data(data={"lq": inp.unsqueeze(dim=0)})
-        if model.opt["val"].get("grids", False):
-            model.grids()
-        model.test()
-        if model.opt["val"].get("grids", False):
-            model.grids_inverse()
-        visuals = model.get_current_visuals()
-        sr_img = tensor2img([visuals["result"]], rgb2bgr=False)
     except RuntimeError as exc:
         if "out of memory" in str(exc).lower():
             raise gr.Error(
-                "Out of GPU memory. Try a smaller image or enable grid-based tiling."
             ) from exc
         raise gr.Error(f"Inference failed: {exc}") from exc

     return np.clip(sharpened, 0, 255).astype(np.uint8)
+# ---------------------------------------------------------------------------
+# Tile-based inference — critical for proper deblurring on large images.
+#
+# NAFNetLocal replaces AdaptiveAvgPool2d(1) with a fixed-kernel AvgPool2d
+# calibrated for the 256×256 training resolution.  For images larger than
+# ~256 px the kernel becomes *local* instead of *global*, which cripples the
+# channel attention and makes the residual almost zero (output ≈ input).
+#
+# By processing in 256×256 tiles with overlap we guarantee that every tile
+# goes through the network with global channel attention — matching the
+# training behaviour and producing strong deblurring.
+# ---------------------------------------------------------------------------
+TILE_SIZE = 256
+TILE_OVERLAP = 48
+def _tile_positions(length: int, tile: int, overlap: int) -> list:
+    """Return start positions for overlapping tiles along one axis."""
+    if length <= tile:
+        return [0]
+    stride = tile - overlap
+    positions = list(range(0, length - tile + 1, stride))
+    # make sure the last tile reaches the edge
+    if positions[-1] + tile < length:
+        positions.append(length - tile)
+    return sorted(set(positions))
+def _run_inference(model, lq: torch.Tensor,
+                   tile_size: int = TILE_SIZE,
+                   tile_overlap: int = TILE_OVERLAP) -> torch.Tensor:
+    """Run deblur inference — with automatic tiling for large images."""
+    _, c, h, w = lq.shape
+    # Small image → single forward pass (attention is already global)
+    if h <= tile_size and w <= tile_size:
+        model.feed_data(data={"lq": lq})
+        model.test()
+        return model.get_current_visuals()["result"]
+    # Large image → tile-based inference
+    rows = _tile_positions(h, tile_size, tile_overlap)
+    cols = _tile_positions(w, tile_size, tile_overlap)
+    out_acc = torch.zeros(1, c, h, w)
+    count   = torch.zeros(1, 1, h, w)
+    for y in rows:
+        for x in cols:
+            y_end = min(y + tile_size, h)
+            x_end = min(x + tile_size, w)
+            tile = lq[:, :, y:y_end, x:x_end]
+            model.feed_data(data={"lq": tile})
+            model.test()
+            tile_out = model.get_current_visuals()["result"]
+            out_acc[:, :, y:y_end, x:x_end] += tile_out
+            count[:, :, y:y_end, x:x_end]   += 1.0
+    return out_acc / count.clamp(min=1.0)
 def deblur(image: np.ndarray, strength: float, sharpen: float):
     if image is None:
         raise gr.Error("Please upload an image.")
     inp = _img2tensor_rgb(img_input)
     try:
+        result = _run_inference(model, inp.unsqueeze(dim=0))
+        sr_img = tensor2img([result], rgb2bgr=False)
     except RuntimeError as exc:
         if "out of memory" in str(exc).lower():
             raise gr.Error(
+                "Out of memory. Try uploading a smaller image."
             ) from exc
         raise gr.Error(f"Inference failed: {exc}") from exc