Genfocus-Demo

Sleeping

App Files Files Community

Ray commited on Dec 17, 2025

Commit

26cfe11

1 Parent(s): d189df7

feat: add pipeline with LFS images

Browse files

Files changed (5) hide show

.gitattributes +1 -0
app.py +86 -47
example/{get-out.jpg → 0.jpg} +2 -2
example/{wweii_nurse.jpg → group_1.png} +2 -2
example/kid.png +3 -0

.gitattributes CHANGED Viewed

@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 *.jpg filter=lfs diff=lfs merge=lfs -text

 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 *.jpg filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -5,7 +5,6 @@ import torch
 import numpy as np
 from PIL import Image, ImageDraw
-# Hugging Face Spaces 特有的 GPU 裝飾器
 import spaces
 from huggingface_hub import hf_hub_download
@@ -68,21 +67,41 @@ class DepthProLoader:
             raise e
 # ==========================================
-# 3. Helper Functions
 # ==========================================
-def center_crop_512(img: Image.Image) -> Image.Image:
     w, h = img.size
     target = 512
-    if min(w, h) < target:
-        scale = target / min(w, h)
-        new_w, new_h = int(w * scale), int(h * scale)
-        img = img.resize((new_w, new_h), Image.LANCZOS)
-        w, h = new_w, new_h
-    left = (w - target) // 2
-    top = (h - target) // 2
-    right = left + target
-    bottom = top + target
-    return img.crop((left, top, right, bottom))
 def switch_lora_on_gpu(pipe, target_mode):
     print(f"🔄 Switching LoRA to [{target_mode}]...")
@@ -96,19 +115,36 @@ def switch_lora_on_gpu(pipe, target_mode):
         pipe.set_adapters(["bokeh"])
 def preprocess_input_image(raw_img, do_resize):
     if raw_img is None: return None, None
     print(f"🔄 Preprocessing Input... Resize={do_resize}")
     img_to_process = raw_img
     if do_resize:
         w, h = img_to_process.size
-        scale = 512 / min(w, h)
-        new_w, new_h = int(w * scale), int(h * scale)
-        img_to_process = img_to_process.resize((new_w, new_h), Image.LANCZOS)
-    final_input = center_crop_512(img_to_process)
-    # 這裡只回傳兩個值 (不再回傳 latents state)
     return final_input, final_input
 def draw_red_dot_on_preview(clean_img, evt: gr.SelectData):
     if clean_img is None: return None, None
     img_copy = clean_img.copy()
@@ -120,17 +156,21 @@ def draw_red_dot_on_preview(clean_img, evt: gr.SelectData):
     draw.line((x, y-r, x, y+r), fill="red", width=2)
     return img_copy, evt.index
-# ==========================================
-# 4. Main Pipeline
-# ==========================================
 @spaces.GPU(duration=120)
-def run_genfocus_pipeline(clean_input_512, click_coords, K_value):
-    # 移除 cached_latents 參數
     global pipe_flux, depth_model, depth_transform
     device = "cuda"
-    # --- 1. Load Flux ---
     if pipe_flux is None:
         print("🚀 Loading FLUX to GPU (First Run)...")
         from Genfocus.pipeline.flux import FluxPipeline
@@ -163,10 +203,6 @@ def run_genfocus_pipeline(clean_input_512, click_coords, K_value):
             print("⚠️ GPU Context changed, reloading Depth Pro...")
             depth_model, depth_transform = depth_loader.load(device=device)
-    # --- 3. Execution ---
-    if clean_input_512 is None:
-        raise gr.Error("Please complete Step 1 (Upload Image) first.")
     from Genfocus.pipeline.flux import Condition, generate, seed_everything
     print("⚡ Running Inference...")
@@ -174,13 +210,14 @@ def run_genfocus_pipeline(clean_input_512, click_coords, K_value):
     # STAGE 1: DEBLUR
     switch_lora_on_gpu(pipe_flux, "deblur")
-    condition_0_img = Image.new("RGB", (512, 512), (0, 0, 0))
     cond0 = Condition(condition_0_img, "deblurring", [0, 32], 1.0)
-    cond1 = Condition(clean_input_512, "deblurring", [0, 0], 1.0)
     seed_everything(42)
     deblurred_img = generate(
-        pipe_flux, height=512, width=512,
         prompt="a sharp photo with everything in focus",
         conditions=[cond0, cond1]
     ).images[0]
@@ -190,7 +227,8 @@ def run_genfocus_pipeline(clean_input_512, click_coords, K_value):
     # STAGE 2: BOKEH
     if click_coords is None:
-        click_coords = [256, 256]
     # Depth Estimation
     img_t = depth_transform(deblurred_img).to(device)
@@ -200,12 +238,14 @@ def run_genfocus_pipeline(clean_input_512, click_coords, K_value):
     depth_map = pred["depth"].cpu().numpy().squeeze()
     safe_depth = np.where(depth_map > 0.0, depth_map, np.finfo(np.float32).max)
     disp_orig = 1.0 / safe_depth
-    disp = cv2.resize(disp_orig, (512, 512), interpolation=cv2.INTER_LINEAR)
     # Defocus Map
     tx, ty = click_coords
-    tx = min(max(int(tx), 0), 511)
-    ty = min(max(int(ty), 0), 511)
     disp_focus = float(disp[ty, tx])
     dmf = disp - np.float32(disp_focus)
@@ -214,11 +254,12 @@ def run_genfocus_pipeline(clean_input_512, click_coords, K_value):
     defocus_t = torch.from_numpy(defocus_abs).unsqueeze(0).float()
     cond_map = (defocus_t / MAX_COC).clamp(0, 1).repeat(3,1,1).unsqueeze(0)
-    # Generate New Latents (Always fresh)
     seed_everything(42)
     gen = torch.Generator(device=pipe_flux.device).manual_seed(1234)
     current_latents, _ = pipe_flux.prepare_latents(
-        batch_size=1, num_channels_latents=16, height=512, width=512,
         dtype=pipe_flux.dtype, device=pipe_flux.device, generator=gen, latents=None
     )
@@ -232,7 +273,7 @@ def run_genfocus_pipeline(clean_input_512, click_coords, K_value):
     with torch.no_grad():
         res = generate(
-            pipe_flux, height=512, width=512,
             prompt="an excellent photo with a large aperture",
             conditions=[cond_img, cond_dmf],
             guidance_scale=1.0, kv_cache=False, generator=gen,
@@ -242,9 +283,7 @@ def run_genfocus_pipeline(clean_input_512, click_coords, K_value):
     return generated_bokeh
-# ==========================================
-# 5. UI Setup
-# ==========================================
 css = """
 #col-container { margin: 0 auto; max-width: 1400px; }
 """
@@ -260,7 +299,6 @@ if os.path.exists(example_dir):
 with gr.Blocks(css=css) as demo:
     clean_processed_state = gr.State(value=None)
     click_coords_state = gr.State(value=None)
-    # 移除了 latents_state
     with gr.Column(elem_id="col-container"):
         gr.Markdown("# 📷 Genfocus Pipeline: Interactive Refocusing (HF Demo)")
@@ -269,7 +307,8 @@ with gr.Blocks(css=css) as demo:
             with gr.Column(scale=1):
                 gr.Markdown("### Step 1: Upload & Preprocess")
                 input_raw = gr.Image(label="Raw Input Image", type="pil")
-                resize_chk = gr.Checkbox(label="Resize min edge to 512", value=False)
                 if valid_examples:
                     gr.Examples(examples=valid_examples, inputs=input_raw, label="Examples")
@@ -289,7 +328,7 @@ with gr.Blocks(css=css) as demo:
             trigger(
                 fn=preprocess_input_image,
                 inputs=[input_raw, resize_chk],
-                outputs=[focus_preview_img, clean_processed_state] # 移除 latents_state
             )
         focus_preview_img.select(
@@ -304,8 +343,8 @@ with gr.Blocks(css=css) as demo:
         run_btn.click(
             fn=run_genfocus_pipeline,
-            inputs=[clean_processed_state, click_coords_state, k_slider], # 移除 latents_state
-            outputs=[output_img] # 移除 latents_state
         )
 if __name__ == "__main__":

 import numpy as np
 from PIL import Image, ImageDraw
 import spaces
 from huggingface_hub import hf_hub_download
             raise e
 # ==========================================
+# 3. Helper Functions (Modified)
 # ==========================================
+def resize_and_crop_to_16(img: Image.Image) -> Image.Image:
+    """
+    1. Resize the longer side to 512, maintaining aspect ratio.
+    2. Crop the dimensions to be multiples of 16.
+    """
     w, h = img.size
     target = 512
+    # 1. Resize longer side to 512
+    if w >= h:
+        scale = target / w
+    else:
+        scale = target / h
+    new_w = int(w * scale)
+    new_h = int(h * scale)
+    img = img.resize((new_w, new_h), Image.LANCZOS)
+    # 2. Crop to multiples of 16
+    final_w = (new_w // 16) * 16
+    final_h = (new_h // 16) * 16
+    # Center crop calculation
+    left = (new_w - final_w) // 2
+    top = (new_h - final_h) // 2
+    right = left + final_w
+    bottom = top + final_h
+    img = img.crop((left, top, right, bottom))
+    return img
 def switch_lora_on_gpu(pipe, target_mode):
     print(f"🔄 Switching LoRA to [{target_mode}]...")
         pipe.set_adapters(["bokeh"])
 def preprocess_input_image(raw_img, do_resize):
+    """
+    修改後的預處理：
+    如果勾選 do_resize (或預設行為)，則執行長邊512+裁切16倍數。
+    """
     if raw_img is None: return None, None
     print(f"🔄 Preprocessing Input... Resize={do_resize}")
     img_to_process = raw_img
     if do_resize:
+        final_input = resize_and_crop_to_16(img_to_process)
+    else:
         w, h = img_to_process.size
+        new_w = (w // 16) * 16
+        new_h = (h // 16) * 16
+        if new_w != w or new_h != h:
+            final_input = center_crop_helper(img_to_process, new_w, new_h)
+        else:
+            final_input = img_to_process
     return final_input, final_input
+def center_crop_helper(img, target_w, target_h):
+    w, h = img.size
+    left = (w - target_w) // 2
+    top = (h - target_h) // 2
+    return img.crop((left, top, left + target_w, top + target_h))
 def draw_red_dot_on_preview(clean_img, evt: gr.SelectData):
     if clean_img is None: return None, None
     img_copy = clean_img.copy()
     draw.line((x, y-r, x, y+r), fill="red", width=2)
     return img_copy, evt.index
 @spaces.GPU(duration=120)
+def run_genfocus_pipeline(clean_input, click_coords, K_value):
     global pipe_flux, depth_model, depth_transform
     device = "cuda"
+    if clean_input is None:
+        raise gr.Error("Please complete Step 1 (Upload Image) first.")
+    W_dyn, H_dyn = clean_input.size
+    print(f"📏 Processing Image Size: {W_dyn}x{H_dyn}")
     if pipe_flux is None:
         print("🚀 Loading FLUX to GPU (First Run)...")
         from Genfocus.pipeline.flux import FluxPipeline
             print("⚠️ GPU Context changed, reloading Depth Pro...")
             depth_model, depth_transform = depth_loader.load(device=device)
     from Genfocus.pipeline.flux import Condition, generate, seed_everything
     print("⚡ Running Inference...")
     # STAGE 1: DEBLUR
     switch_lora_on_gpu(pipe_flux, "deblur")
+    condition_0_img = Image.new("RGB", (W_dyn, H_dyn), (0, 0, 0))
     cond0 = Condition(condition_0_img, "deblurring", [0, 32], 1.0)
+    cond1 = Condition(clean_input, "deblurring", [0, 0], 1.0)
     seed_everything(42)
     deblurred_img = generate(
+        pipe_flux, height=H_dyn, width=W_dyn,
         prompt="a sharp photo with everything in focus",
         conditions=[cond0, cond1]
     ).images[0]
     # STAGE 2: BOKEH
     if click_coords is None:
+        # Default to center if no click
+        click_coords = [W_dyn // 2, H_dyn // 2]
     # Depth Estimation
     img_t = depth_transform(deblurred_img).to(device)
     depth_map = pred["depth"].cpu().numpy().squeeze()
     safe_depth = np.where(depth_map > 0.0, depth_map, np.finfo(np.float32).max)
     disp_orig = 1.0 / safe_depth
+    # Resize disp to match current image dimensions
+    disp = cv2.resize(disp_orig, (W_dyn, H_dyn), interpolation=cv2.INTER_LINEAR)
     # Defocus Map
     tx, ty = click_coords
+    # Clamp coordinates to new dimensions
+    tx = min(max(int(tx), 0), W_dyn - 1)
+    ty = min(max(int(ty), 0), H_dyn - 1)
     disp_focus = float(disp[ty, tx])
     dmf = disp - np.float32(disp_focus)
     defocus_t = torch.from_numpy(defocus_abs).unsqueeze(0).float()
     cond_map = (defocus_t / MAX_COC).clamp(0, 1).repeat(3,1,1).unsqueeze(0)
+    # Generate New Latents
     seed_everything(42)
     gen = torch.Generator(device=pipe_flux.device).manual_seed(1234)
+    # Prepare latents with dynamic H, W
     current_latents, _ = pipe_flux.prepare_latents(
+        batch_size=1, num_channels_latents=16, height=H_dyn, width=W_dyn,
         dtype=pipe_flux.dtype, device=pipe_flux.device, generator=gen, latents=None
     )
     with torch.no_grad():
         res = generate(
+            pipe_flux, height=H_dyn, width=W_dyn,
             prompt="an excellent photo with a large aperture",
             conditions=[cond_img, cond_dmf],
             guidance_scale=1.0, kv_cache=False, generator=gen,
     return generated_bokeh
 css = """
 #col-container { margin: 0 auto; max-width: 1400px; }
 """
 with gr.Blocks(css=css) as demo:
     clean_processed_state = gr.State(value=None)
     click_coords_state = gr.State(value=None)
     with gr.Column(elem_id="col-container"):
         gr.Markdown("# 📷 Genfocus Pipeline: Interactive Refocusing (HF Demo)")
             with gr.Column(scale=1):
                 gr.Markdown("### Step 1: Upload & Preprocess")
                 input_raw = gr.Image(label="Raw Input Image", type="pil")
+                resize_chk = gr.Checkbox(label="Resize longer edge to 512 (crops to 16x)", value=True)
                 if valid_examples:
                     gr.Examples(examples=valid_examples, inputs=input_raw, label="Examples")
             trigger(
                 fn=preprocess_input_image,
                 inputs=[input_raw, resize_chk],
+                outputs=[focus_preview_img, clean_processed_state]
             )
         focus_preview_img.select(
         run_btn.click(
             fn=run_genfocus_pipeline,
+            inputs=[clean_processed_state, click_coords_state, k_slider],
+            outputs=[output_img]
         )
 if __name__ == "__main__":

example/{get-out.jpg → 0.jpg} RENAMED Viewed

File without changes

example/{wweii_nurse.jpg → group_1.png} RENAMED Viewed

File without changes

example/kid.png ADDED Viewed

Git LFS Details

SHA256: 07d19b465ef526bee07495087bf187c4995027239a19c73d470dfca003a2f5e3
Pointer size: 132 Bytes
Size of remote file: 1.91 MB