Spaces:

VanKee
/

3D-Photo-Effect

Runtime error

App Files Files Community

VanKee commited on Oct 31, 2025

Commit

212b9f6

1 Parent(s): 3f3d1c4

upload gradio

Browse files

Files changed (4) hide show

.gitignore +1 -0
app.py +390 -0
cs5330_hw4.py +633 -0
requirements.txt +10 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .DS_Store

app.py ADDED Viewed

	@@ -0,0 +1,390 @@

+# -*- coding: utf-8 -*-
+"""
+CS5330-HW4: Parallax Effect Gradio App
+Converted from Colab notebook.
+(V4: Final fix for halo/border artifact. Uses correct mask.)
+"""
+import torch
+import numpy as np
+import matplotlib.pyplot as plt
+from PIL import Image
+from transformers import DPTImageProcessor, DPTForDepthEstimation
+import cv2
+import imageio.v2 as imageio
+import gradio as gr
+import time # To create unique filenames
+# ==================================================================
+# Global Transformer Setup
+# ==================================================================
+print("Loading Intel DPT depth estimation model...")
+processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
+model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
+model.eval()
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model = model.to(device)
+print(f"Model loaded on {device}. Gradio app is ready.")
+# ==================================================================
+# Helper Function 1: Get Depth Map
+# ==================================================================
+def get_depth_map(pil_image, processor, model, device):
+    print("... (1/5) Extracting depth map")
+    inputs = processor(images=pil_image, return_tensors="pt")
+    inputs = {k: v.to(device) for k, v in inputs.items()}
+    with torch.no_grad():
+        outputs = model(**inputs)
+        predicted_depth = outputs.predicted_depth
+    prediction = torch.nn.functional.interpolate(
+        predicted_depth.unsqueeze(1),
+        size=pil_image.size[::-1],
+        mode="bicubic",
+        align_corners=False,
+    )
+    depth_map = prediction.squeeze().cpu().numpy()
+    depth_map = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
+    return depth_map
+# ==================================================================
+# Helper Function 2: Layer Separation
+# ==================================================================
+# This function returns mask_clean (hard) and mask_soft (soft/full-size)
+def separate_foreground_background(image, depth_map, *,
+                                   assume_bgr_input=True,
+                                   near_is_foreground=True,
+                                   foreground_depth_is_high=True):
+    print("... (2/5) Separating layers")
+    if not isinstance(image, np.ndarray):
+        image = np.array(image)
+    if not isinstance(depth_map, np.ndarray):
+        depth_map = np.array(depth_map)
+    if assume_bgr_input and image.ndim == 3 and image.shape[2] == 3:
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    if image.ndim == 2:
+        image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
+    if depth_map.ndim == 3:
+        depth_map = depth_map[:, :, 0]
+    depth_norm   = cv2.normalize(depth_map, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
+    depth_smooth = cv2.GaussianBlur(depth_norm, (5, 5), 0)
+    if near_is_foreground and foreground_depth_is_high:
+        thresh_flag = cv2.THRESH_BINARY
+    elif near_is_foreground and not foreground_depth_is_high:
+        thresh_flag = cv2.THRESH_BINARY_INV
+    elif (not near_is_foreground) and foreground_depth_is_high:
+        thresh_flag = cv2.THRESH_BINARY_INV
+    else:
+        thresh_flag = cv2.THRESH_BINARY
+    _, binary_mask = cv2.threshold(depth_smooth, 0, 255, thresh_flag + cv2.THRESH_OTSU)
+    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
+    mask_clean = cv2.morphologyEx(binary_mask, cv2.MORPH_OPEN, kernel, iterations=1)
+    mask_clean = cv2.morphologyEx(mask_clean, cv2.MORPH_CLOSE, kernel, iterations=2)
+    num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(mask_clean, 8)
+    if num_labels > 1:
+        largest_label = 1 + np.argmax(stats[1:, cv2.CC_STAT_AREA])
+        mask_clean = (labels == largest_label).astype(np.uint8) * 255
+    # mask_soft is the full-size mask, which is key to fixing the artifact.
+    mask_soft = cv2.GaussianBlur(mask_clean, (9, 9), 5).astype(np.float32) / 255.0
+    img_f  = image.astype(np.float32) / 255.0
+    mask_3 = np.dstack([mask_soft]*3)
+    foreground = np.clip(img_f * mask_3, 0, 1)
+    background = np.clip(img_f * (1.0 - mask_3), 0, 1)
+    foreground = (foreground * 255.0).astype(np.uint8)
+    background = (background * 255.0).astype(np.uint8)
+    return foreground, background, mask_clean, mask_soft
+# ==================================================================
+# Helper Function 3: Background Reconstruction
+# ==================================================================
+# This function returns final_bg (inpainted background) and alpha_no_halo (eroded mask)
+# Note: We no longer use alpha_no_halo for the animation, but the function is fine.
+def reconstruct_background(background, mask_hard, original_image_np):
+    print("... (3/5) Reconstructing background")
+    kernel = np.ones((7,7), np.uint8)
+    mask_dilated = cv2.dilate(mask_hard, kernel, iterations=1)
+    bg_inpainted = cv2.inpaint(background, mask_dilated, inpaintRadius=6, flags=cv2.INPAINT_TELEA)
+    bg_smooth = cv2.bilateralFilter(bg_inpainted, d=9, sigmaColor=75, sigmaSpace=75)
+    final_bg = np.where(mask_dilated[..., None] == 255, bg_smooth, background)
+    k3 = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3))
+    mask_erode = cv2.erode(mask_hard, k3, iterations=1)
+    dist = cv2.distanceTransform(mask_erode, cv2.DIST_L2, 5)
+    alpha_no_halo = dist / 6.0
+    alpha_no_halo = np.clip(alpha_no_halo, 0, 1).astype(np.float32)
+    alpha_no_halo = alpha_no_halo[..., None] # HxWx1
+    return final_bg, alpha_no_halo
+# ==================================================================
+# Helper Function 4: Animation
+# ==================================================================
+# This is the animation function (from V2 logic), which is correct (uses normalization to prevent gaps).
+def create_multi_layer_animation(
+    image_original,
+    background_clean,
+    alpha_mask,      # KEY: We will pass the full-size mask_soft here
+    depth_map,
+    n_frames=60,
+    parallax_strength=12,
+    blur_strength=1.0,
+    direction='right',
+    zoom_center=1.10,
+    zoom_peak=1.05
+):
+    print(f"... (4/5) Generating {n_frames} animation frames")
+    print(f"    Params: Parallax={parallax_strength}px, Blur={blur_strength}x, Dir={direction}")
+    h, w = image_original.shape[:2]
+    # --- 1. Prepare motion and blur settings ---
+    direction_map = {'right': (1, 0), 'left': (-1, 0), 'up': (0, -1), 'down': (0, 1)}
+    dx, dy = direction_map.get(direction, (1, 0))
+    fg_shift = parallax_strength
+    mid_shift = parallax_strength * 0.5
+    far_shift = parallax_strength * (2 / 12)
+    base_mid_k = 9
+    base_far_k = 35
+    mid_k_raw = int(base_mid_k * blur_strength)
+    far_k_raw = int(base_far_k * blur_strength)
+    mid_k = (mid_k_raw + 1) if (mid_k_raw > 0 and mid_k_raw % 2 == 0) else max(1, mid_k_raw)
+    far_k = (far_k_raw + 1) if (far_k_raw > 0 and far_k_raw % 2 == 0) else max(1, far_k_raw)
+    mid_blur_ksize = (mid_k, mid_k)
+    far_blur_ksize = (far_k, far_k)
+    print(f"    ...Using blur kernels: Mid={mid_blur_ksize}, Far={far_blur_ksize}")
+    # --- 2. Prepare base masks (FG vs BG) ---
+    # alpha_mask is now the full-size mask_soft
+    if alpha_mask.max() > 1:
+        alpha_mask = alpha_mask.astype(np.float32) / 255.0
+    if alpha_mask.ndim == 2:
+        alpha_mask = alpha_mask[..., None]
+    fg_mask_3ch = np.repeat(alpha_mask, 3, axis=2) # full-size foreground
+    bg_mask_3ch = 1.0 - fg_mask_3ch               # full-size background "hole"
+    # --- 3. Create mid-ground / far-ground masks ---
+    if depth_map.ndim == 3:
+        depth_map = cv2.cvtColor(depth_map, cv2.COLOR_BGR2GRAY)
+    # We find depth values inside the "background hole" (bg_mask_3ch)
+    bg_depth_values = depth_map[alpha_mask[..., 0] < 0.5]
+    if len(bg_depth_values) > 0:
+        bg_split_threshold = np.percentile(bg_depth_values, 50)
+    else:
+        bg_split_threshold = 0.5
+    raw_mid_mask = (depth_map > bg_split_threshold).astype(np.float32)
+    raw_mid_mask_smooth = cv2.GaussianBlur(raw_mid_mask, (21, 21), 0)
+    if raw_mid_mask_smooth.ndim == 2:
+        raw_mid_mask_smooth = raw_mid_mask_smooth[..., None]
+    raw_mid_mask_smooth_3ch = np.repeat(raw_mid_mask_smooth, 3, axis=2)
+    # --- 4. Generate the final 3 mutually exclusive masks ---
+    # These three layers will perfectly cover the image with no gaps or overlaps.
+    mid_mask_3ch = raw_mid_mask_smooth_3ch * bg_mask_3ch
+    far_mask_3ch = (1.0 - raw_mid_mask_smooth_3ch) * bg_mask_3ch
+    frames = []
+    # --- 5. Loop to generate each frame ---
+    for i in range(n_frames):
+        phase = (i / n_frames) * 2 * np.pi
+        ease = np.sin(phase)
+        zoom_range = zoom_center - zoom_peak
+        scale = zoom_center - (zoom_range * abs(ease))
+        center = (w / 2, h / 2)
+        M_scale = cv2.getRotationMatrix2D(center, 0, scale)
+        M_fg_trans = np.float32([[1, 0, dx*ease*fg_shift], [0, 1, dy*ease*fg_shift]])
+        M_mid_trans = np.float32([[1, 0, dx*ease*mid_shift], [0, 1, dy*ease*mid_shift]])
+        M_far_trans = np.float32([[1, 0, dx*ease*far_shift], [0, 1, dy*ease*far_shift]])
+        # --- Layer Transforms ---
+        fg_warped = cv2.warpAffine(image_original, M_fg_trans, (w,h), borderMode=cv2.BORDER_REFLECT_101)
+        fg_final = cv2.warpAffine(fg_warped, M_scale, (w,h), borderMode=cv2.BORDER_REFLECT_101).astype(np.float32)
+        mid_warped = cv2.warpAffine(background_clean, M_mid_trans, (w,h), borderMode=cv2.BORDER_REPLICATE)
+        mid_warped_scaled = cv2.warpAffine(mid_warped, M_scale, (w,h), borderMode=cv2.BORDER_REPLICATE)
+        mid_final = cv2.GaussianBlur(mid_warped_scaled, mid_blur_ksize, 0).astype(np.float32)
+        far_warped = cv2.warpAffine(background_clean, M_far_trans, (w,h), borderMode=cv2.BORDER_REPLICATE)
+        far_warped_scaled = cv2.warpAffine(far_warped, M_scale, (w,h), borderMode=cv2.BORDER_REPLICATE)
+        far_final = cv2.GaussianBlur(far_warped_scaled, far_blur_ksize, 0).astype(np.float32)
+        # --- Mask Transforms ---
+        fg_mask_warped = cv2.warpAffine(fg_mask_3ch, M_fg_trans, (w,h))
+        fg_mask_warped = cv2.warpAffine(fg_mask_warped, M_scale, (w,h))
+        mid_mask_warped = cv2.warpAffine(mid_mask_3ch, M_mid_trans, (w,h))
+        mid_mask_warped = cv2.warpAffine(mid_mask_warped, M_scale, (w,h))
+        far_mask_warped = cv2.warpAffine(far_mask_3ch, M_far_trans, (w,h))
+        far_mask_warped = cv2.warpAffine(far_mask_warped, M_scale, (w,h))
+        # --- Final Composite (V2 normalization logic) ---
+        # Re-normalize masks to prevent black borders or tiny gaps after warp.
+        total_mask = fg_mask_warped + mid_mask_warped + far_mask_warped + 1e-6
+        fg_mask_warped /= total_mask
+        mid_mask_warped /= total_mask
+        far_mask_warped /= total_mask
+        # Add the three layers, weighted by their masks.
+        composite = (fg_final   * fg_mask_warped) + \
+                    (mid_final  * mid_mask_warped) + \
+                    (far_final  * far_mask_warped)
+        frame = np.clip(composite, 0, 255).astype(np.uint8)
+        frames.append(frame)
+    print(f"... (4/5) Frame generation complete.")
+    return frames
+# ==================================================================
+# MAIN GRADIO FUNCTION (Ties everything together)
+# ==================================================================
+def generate_parallax_effect(input_image_np, parallax_strength, blur_strength, animation_direction):
+    print("\n--- Processing new image ---")
+    # --- 0. Image Preparation ---
+    image_pil = Image.fromarray(input_image_np).convert('RGB')
+    max_size = 640
+    if max(image_pil.size) > max_size:
+        ratio = max_size / max(image_pil.size)
+        new_size = tuple(int(dim * ratio) for dim in image_pil.size)
+        image_pil = image_pil.resize(new_size, Image.LANCZOS)
+    image_resized_np = np.array(image_pil)
+    print(f"Image resized to: {image_pil.size}")
+    # --- 1. Get Depth Map ---
+    depth_map_0_1 = get_depth_map(image_pil, processor, model, device)
+    # --- 2. Layer Separation ---
+    # We get mask_soft (full-size mask) from this function.
+    foreground, background, mask_hard, mask_soft = separate_foreground_background(
+        image_pil,
+        depth_map_0_1,
+        assume_bgr_input=False,
+        near_is_foreground=True,
+        foreground_depth_is_high=True
+    )
+    # --- 3. Background Reconstruction ---
+    # We get final_bg (inpainted background) from this.
+    # We also get alpha_no_halo, but we won't use it for the animation.
+    final_bg, alpha_no_halo = reconstruct_background(background, mask_hard, image_resized_np)
+    # --- 4. Animation ---
+    # *** THIS IS THE KEY FIX ***
+    # We use the V2-logic animation function (V4) with `mask_soft` (the full-size mask).
+    multi_layer_frames = create_multi_layer_animation(
+        image_original=image_resized_np,
+        background_clean=final_bg,
+        alpha_mask=mask_soft,              # <-- KEY FIX: Pass the full-size soft mask
+        depth_map=depth_map_0_1,
+        n_frames=60,
+        parallax_strength=parallax_strength,
+        blur_strength=blur_strength,
+        direction=animation_direction
+    )
+    # --- 5. Save GIF and Return Path ---
+    print("... (5/5) Saving final GIF")
+    timestamp = int(time.time())
+    output_filename = f'parallax_final_{timestamp}.gif'
+    # This saves the file to the SERVER'S disk.
+    # It does NOT trigger a download in the user's browser.
+    imageio.mimsave(output_filename, multi_layer_frames, duration=0.04, loop=0)
+    print(f"--- Processing complete! Saved to {output_filename} ---")
+    # MODIFIED: Only return the GIF filepath
+    return output_filename
+# ==================================================================
+# Gradio Interface (Modified)
+# ==================================================================
+print("Creating Gradio interface...")
+# --- 1. Define Input Components ---
+input_image = gr.Image(label="1. Upload Your Image", type="numpy")
+param_parallax = gr.Slider(
+    minimum=0,
+    maximum=30,
+    value=12,
+    step=1,
+    label="2. Parallax Strength (px)",
+    info="Foreground motion in pixels. Higher = stronger 3D effect."
+)
+param_blur = gr.Slider(
+    minimum=0.0,
+    maximum=2.0,
+    value=1.0,
+    step=0.1,
+    label="3. Aperture / Blur Strength",
+    info="Controls background blur (bokeh). 0 = no blur, 1 = default, 2 = max blur."
+)
+param_direction = gr.Dropdown(
+    choices=['right', 'left', 'up', 'down'],
+    value='right',
+    label="4. Animation Direction"
+)
+# --- 2. Define Output Components ---
+# MODIFIED: Removed output_original
+output_gif = gr.Image(label="Generated Parallax GIF")
+# NOTE: The gr.Image component automatically provides a download button
+# in the top-right corner when displaying an image/GIF. This
+# fulfills the requirement for a "Gradio download button".
+# --- 4. Create Interface ---
+iface = gr.Interface(
+    fn=generate_parallax_effect,
+    inputs=[input_image, param_parallax, param_blur, param_direction],
+    # MODIFIED: Only one output
+    outputs=output_gif,
+    title="📸 3D Parallax Photo Animator (CS5330-HW4)",
+    description="""
+    Upload a photo (ideally with a clear foreground and background) to generate a 3D parallax and depth-of-field animation.
+    1. Upload an image.
+    2. Adjust the 3 parameters below.
+    3. Click "Submit".
+    Processing may take 30-60 seconds. You can find the download button in the top-right corner of the generated GIF.
+    """,
+    # (Removed examples and cache_examples)
+)
+if __name__ == "__main__":
+    iface.launch(share=False)

cs5330_hw4.py ADDED Viewed

	@@ -0,0 +1,633 @@

+# -*- coding: utf-8 -*-
+"""CS5330-HW4.ipynb
+Automatically generated by Colab.
+Original file is located at
+    https://colab.research.google.com/drive/1s2_5NEaW54KPPeoQy2TlhZxD_HHtF9fg
+# Part 0. Transformer Setup
+"""
+import torch
+import numpy as np
+import matplotlib.pyplot as plt
+from PIL import Image
+from transformers import DPTImageProcessor, DPTForDepthEstimation
+import requests
+"""## Initialize Depth Model"""
+# ============================================
+# Initialize the Depth Model
+# ============================================
+print("Loading Intel DPT depth estimation model...")
+processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
+model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
+model.eval()  # Set to evaluation mode
+# Use GPU if available for faster processing
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model = model.to(device)
+print(f"Model loaded on {device}")
+"""## Image Preparation"""
+# ============================================
+# Load and Prepare Your Image
+# ============================================
+# Option 1: Load from URL
+image_url = "https://images.pexels.com/photos/1681010/pexels-photo-1681010.jpeg"
+image = Image.open(requests.get(image_url, stream=True).raw).convert('RGB')
+# Option 2: Upload from local (in Colab)
+# from google.colab import files
+# uploaded = files.upload()
+# image = Image.open(list(uploaded.keys())[0]).convert('RGB')
+# Resize for faster processing (optional but recommended)
+max_size = 640
+if max(image.size) > max_size:
+    ratio = max_size / max(image.size)
+    new_size = tuple(int(dim * ratio) for dim in image.size)
+    image = image.resize(new_size, Image.LANCZOS)
+print(f"Image size: {image.size}")
+"""## Depth Map Extraction"""
+# ============================================
+# Extract Depth Map
+# ============================================
+# Prepare image for the model
+inputs = processor(images=image, return_tensors="pt")
+inputs = {k: v.to(device) for k, v in inputs.items()}
+# Run depth estimation
+with torch.no_grad():
+    outputs = model(**inputs)
+    predicted_depth = outputs.predicted_depth
+# Interpolate to original size and normalize
+prediction = torch.nn.functional.interpolate(
+    predicted_depth.unsqueeze(1),
+    size=image.size[::-1],  # (height, width)
+    mode="bicubic",
+    align_corners=False,
+)
+# Convert to numpy and normalize to 0-1 range
+depth_map = prediction.squeeze().cpu().numpy()
+depth_map = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
+"""## Image Visualization"""
+# ============================================
+# Visualize Results
+# ============================================
+fig, axes = plt.subplots(1, 2, figsize=(12, 5))
+# Original image
+axes[0].imshow(image)
+axes[0].set_title('Original Image')
+axes[0].axis('off')
+# Depth map
+im = axes[1].imshow(depth_map, cmap='plasma')
+axes[1].set_title('Depth Map (Yellow=Close, Purple=Far)')
+axes[1].axis('off')
+plt.colorbar(im, ax=axes[1], fraction=0.046)
+plt.tight_layout()
+plt.show()
+print(f"Depth map shape: {depth_map.shape}")
+print(f"Depth range: [{depth_map.min():.3f}, {depth_map.max():.3f}]")
+print("Ready for processing!")
+# depth_map is now a normalized numpy array where:
+# - Values close to 1.0 = near to camera (yellow in visualization)
+# - Values close to 0.0 = far from camera (purple in visualization)
+# Use this depth_map for all subsequent processing!
+"""# Part 1. Depth-Guided Layer Separation"""
+import cv2
+import numpy as np
+from matplotlib import pyplot as plt
+"""At this point ,we should have [image] and [depth_map] available"""
+import numpy as np
+import cv2
+def separate_foreground_background(image, depth_map, *,
+                                   assume_bgr_input=True,
+                                   near_is_foreground=True,
+                                   foreground_depth_is_high=True):
+    """
+    Params
+    ------
+    assume_bgr_input: Whether the input image is in OpenCV's typical BGR format (True converts to RGB; set to False for RGB input)
+    near_is_foreground: Whether near objects are the foreground (True is common for "face/subject is closer")
+    foreground_depth_is_high: Whether the foreground's "depth value" is higher (In your map: foreground is brighter -> higher value -> True)
+    """
+    # ---- 1) Unify formats ----
+    if not isinstance(image, np.ndarray):
+        image = np.array(image)
+    if not isinstance(depth_map, np.ndarray):
+        depth_map = np.array(depth_map)
+    # Only convert to RGB if explicitly BGR (avoids unnecessary BGR<->RGB round-trips causing color shifts)
+    if assume_bgr_input and image.ndim == 3 and image.shape[2] == 3:
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    if image.ndim == 2:  # Convert grayscale to 3 channels as well
+        image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
+    if depth_map.ndim == 3:
+        depth_map = depth_map[:, :, 0]
+    # ---- 2) Depth -> Binary Mask (Foreground=1) ----
+    depth_norm   = cv2.normalize(depth_map, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
+    depth_smooth = cv2.GaussianBlur(depth_norm, (5, 5), 0)
+    # Key correction:
+    # In your example: near=foreground AND foreground depth value is higher (brighter),
+    # Therefore, THRESH_BINARY should be used (takes high values as 1), otherwise FG/BG will be inverted.
+    if near_is_foreground and foreground_depth_is_high:
+        thresh_flag = cv2.THRESH_BINARY
+    elif near_is_foreground and not foreground_depth_is_high:
+        thresh_flag = cv2.THRESH_BINARY_INV
+    elif (not near_is_foreground) and foreground_depth_is_high:
+        thresh_flag = cv2.THRESH_BINARY_INV
+    else:  # not near_is_foreground and not foreground_depth_is_high
+        thresh_flag = cv2.THRESH_BINARY
+    _, binary_mask = cv2.threshold(depth_smooth, 0, 255, thresh_flag + cv2.THRESH_OTSU)
+    # Cleanup and find largest connected component
+    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
+    mask_clean = cv2.morphologyEx(binary_mask, cv2.MORPH_OPEN, kernel, iterations=1)
+    mask_clean = cv2.morphologyEx(mask_clean, cv2.MORPH_CLOSE, kernel, iterations=2)
+    num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(mask_clean, 8)
+    if num_labels > 1:
+        largest_label = 1 + np.argmax(stats[1:, cv2.CC_STAT_AREA])
+        mask_clean = (labels == largest_label).astype(np.uint8) * 255
+    # Soften edges
+    mask_soft = cv2.GaussianBlur(mask_clean, (9, 9), 5).astype(np.float32) / 255.0
+    # ---- 3) Composite (Foreground=img*mask, Background=img*(1-mask)) ----
+    img_f  = image.astype(np.float32) / 255.0
+    mask_3 = np.dstack([mask_soft]*3)
+    foreground = np.clip(img_f * mask_3, 0, 1)
+    background = np.clip(img_f * (1.0 - mask_3), 0, 1)
+    foreground = (foreground * 255.0).astype(np.uint8)
+    background = (background * 255.0).astype(np.uint8)
+    return foreground, background, mask_clean, mask_soft
+from PIL import Image as PILImage
+def visualize_results(image, depth_map, foreground, background, mask, mask_soft):
+    fig, axes = plt.subplots(2, 3, figsize=(15, 10))
+    axes[0, 0].imshow(image)
+    axes[0, 0].set_title('Original Image'); axes[0, 0].axis('off')
+    axes[0, 1].imshow(depth_map, cmap='plasma')
+    axes[0, 1].set_title('Depth Map'); axes[0, 1].axis('off')
+    axes[0, 2].imshow(mask, cmap='gray')
+    axes[0, 2].set_title('Binary Mask (Cleaned)'); axes[0, 2].axis('off')
+    axes[1, 0].imshow(mask_soft, cmap='gray')
+    axes[1, 0].set_title('Soft Mask (Blurred)'); axes[1, 0].axis('off')
+    axes[1, 1].imshow(foreground)
+    axes[1, 1].set_title('Foreground'); axes[1, 1].axis('off')
+    axes[1, 2].imshow(background)
+    axes[1, 2].set_title('Background'); axes[1, 2].axis('off')
+    plt.tight_layout()
+    plt.show()
+foreground, background, mask_hard, mask_soft = separate_foreground_background(image, depth_map,     assume_bgr_input=False,
+    near_is_foreground=True,
+    foreground_depth_is_high=True)
+if not isinstance(image, np.ndarray):
+    image = np.array(image)
+visualize_results(image, depth_map, foreground, background, mask_hard, mask_soft)
+"""# Part 3: Intelligent Background Reconstruction"""
+kernel = np.ones((7,7), np.uint8)
+mask_dilated = cv2.dilate(mask_hard, kernel, iterations=1)
+# 1️⃣ Inpaint
+bg_inpainted = cv2.inpaint(background, mask_dilated, inpaintRadius=6, flags=cv2.INPAINT_TELEA)
+# 2️⃣ Smooth
+bg_smooth = cv2.bilateralFilter(bg_inpainted, d=9, sigmaColor=75, sigmaSpace=75)
+# 3️⃣ Optional: Only replace in the mask region
+final_bg = np.where(mask_dilated[..., None] == 255, bg_smooth, background)
+# -- New: Use the hard mask to create an eroded+feathered alpha with no black halo (recommended to pass this)
+k3 = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3))       # Approximately erode 2px
+mask_erode = cv2.erode(mask_hard, k3, iterations=1)
+dist = cv2.distanceTransform(mask_erode, cv2.DIST_L2, 5)
+alpha_no_halo = dist / 6                                    # feather≈12, adjustable 8~14
+alpha_no_halo = np.clip(alpha_no_halo, 0, 1).astype(np.float32)
+alpha_no_halo[mask_erode == 0] = 0.0
+alpha_no_halo = alpha_no_halo[..., None]                       # HxWx1
+original_rgb = image
+# 4️⃣ Display
+plt.figure(figsize=(10,5))
+plt.subplot(1,2,1); plt.title("Original Background with Hole"); plt.imshow(background); plt.axis('off')
+plt.subplot(1,2,2); plt.title("Clean Background (Inpainted)"); plt.imshow(final_bg); plt.axis('off')
+plt.show()
+import matplotlib.pyplot as plt
+import numpy as np
+# Check if the alpha mask is correct
+plt.figure(figsize=(20, 5))
+plt.subplot(151)
+plt.imshow(image)
+plt.title('Original Image')
+plt.axis('off')
+plt.subplot(152)
+plt.imshow(alpha_no_halo.squeeze(), cmap='gray')
+plt.title('Alpha Mask\n(Should cover full person)')
+plt.axis('off')
+plt.subplot(153)
+plt.imshow(final_bg)
+plt.title('Clean Background')
+plt.axis('off')
+# Test composite
+fg_float = image.astype(np.float32) / 255.0
+bg_float = final_bg.astype(np.float32) / 255.0
+test_composite = fg_float * alpha_no_halo + bg_float * (1.0 - alpha_no_halo)
+test_composite = (np.clip(test_composite, 0, 1) * 255).astype(np.uint8)
+plt.subplot(154)
+plt.imshow(test_composite)
+plt.title('Test Composite\n(Does BG eat face?)')
+plt.axis('off')
+# Highlight areas where alpha < 0.5 (potential lost person areas)
+alpha_highlight = image.copy()
+low_alpha_mask = alpha_no_halo.squeeze() < 0.5
+alpha_highlight[low_alpha_mask] = [255, 0, 0]  # Mark with red
+plt.subplot(155)
+plt.imshow(alpha_highlight)
+plt.title('Red = Alpha < 0.5\n(Lost person areas)')
+plt.axis('off')
+plt.tight_layout()
+plt.show()
+# Print diagnostic info
+print("Alpha Mask Statistics:")
+print(f"  Min: {alpha_no_halo.min():.3f}")
+print(f"  Max: {alpha_no_halo.max():.3f}")
+print(f"  Shape: {alpha_no_halo.shape}")
+print(f"  Pixels with alpha > 0.9: {(alpha_no_halo > 0.9).sum()}")
+print(f"  Pixels with alpha < 0.5: {(alpha_no_halo < 0.5).sum()}")
+"""# Part 4: Depth-Aware Motion Synthesis"""
+import numpy as np
+import cv2
+def create_motion_frames(
+    image_original,
+    background_clean,
+    alpha_mask,
+    n_frames=30,
+    fg_shift=12,
+    bg_shift=4,
+    direction='right',
+    scale_effect=0.015
+):
+    """
+    Part 4: Generates motion frames for a parallax effect.
+    This function *only* handles motion and compositing.
+    Depth-of-field (blur) will be added later.
+    """
+    h, w = image_original.shape[:2]
+    # --- 1. Set direction ---
+    direction_map = {
+        'right': (1, 0),
+        'left': (-1, 0),
+        'up': (0, -1),
+        'down': (0, 1)
+    }
+    dx, dy = direction_map.get(direction, (1, 0))
+    # --- 2. Prepare Alpha Mask ---
+    # Ensure alpha_mask is 3-channel, 0-1 float
+    if alpha_mask.ndim == 2:
+        alpha_mask = alpha_mask[..., None]
+    if alpha_mask.shape[2] == 1:
+        alpha_3ch = np.repeat(alpha_mask, 3, axis=2)
+    else:
+        alpha_3ch = alpha_mask
+    alpha_3ch = np.clip(alpha_3ch.astype(np.float32) / 255.0, 0, 1) if alpha_3ch.max() > 1 else np.clip(alpha_3ch.astype(np.float32), 0, 1)
+    frames = []
+    print(f"Generating {n_frames} motion frames (Part 4)...")
+    # --- 3. Loop to generate each frame ---
+    for i in range(n_frames):
+        # Use a sine wave to create smooth back-and-forth motion (0 -> 1 -> 0 -> -1 -> 0)
+        # This is very natural for a looping GIF
+        phase = (i / n_frames) * 2 * np.pi
+        ease = np.sin(phase)
+        # --- 4. Calculate motion and scale ---
+        # Scale effect (1.0 -> 1.015 -> 1.0)
+        scale = 1.0 + abs(ease) * scale_effect
+        # Foreground and background displacement
+        fg_x = dx * ease * fg_shift
+        fg_y = dy * ease * fg_shift
+        bg_x = dx * ease * bg_shift
+        bg_y = dy * ease * bg_shift
+        # --- 5. Create transformation matrices ---
+        M_fg_trans = np.float32([[1, 0, fg_x], [0, 1, fg_y]])
+        M_bg_trans = np.float32([[1, 0, bg_x], [0, 1, bg_y]])
+        center = (w / 2, h / 2)
+        M_scale = cv2.getRotationMatrix2D(center, 0, scale)
+        # --- 6. Apply transformations (motion + scale) ---
+        # --- Transform foreground ---
+        fg_moved = cv2.warpAffine(
+            image_original, M_fg_trans, (w, h),
+            flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT_101
+        )
+        fg_moved = cv2.warpAffine(
+            fg_moved, M_scale, (w, h),
+            flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT_101
+        )
+        # --- Transform background ---
+        bg_moved = cv2.warpAffine(
+            background_clean, M_bg_trans, (w, h),
+            flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REPLICATE
+        )
+        bg_moved = cv2.warpAffine(
+            bg_moved, M_scale, (w, h),
+            flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REPLICATE
+        )
+        # --- Transform Alpha Mask ---
+        # (Must use the exact same transform as the foreground)
+        alpha_moved = cv2.warpAffine(
+            alpha_3ch, M_fg_trans, (w, h),
+            flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=0
+        )
+        alpha_moved = cv2.warpAffine(
+            alpha_moved, M_scale, (w, h),
+            flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=0
+        )
+        alpha_moved = np.clip(alpha_moved, 0, 1)
+        # --- 7. Composite layers (no blur) ---
+        # This is the final composite for Part 4
+        fg_masked = cv2.multiply(fg_moved.astype(np.float32), alpha_moved)
+        bg_masked = cv2.multiply(bg_moved.astype(np.float32), 1.0 - alpha_moved)
+        composite = cv2.add(fg_masked, bg_masked)
+        frame = np.clip(composite, 0, 255).astype(np.uint8)
+        frames.append(frame)
+    print("Part 4: Motion frames generated.")
+    return frames
+motion_frames = create_motion_frames(
+    image_original=image,
+    background_clean=final_bg,
+    alpha_mask=alpha_no_halo,
+    n_frames=30,
+    fg_shift=12,
+    bg_shift=4,
+    direction='right',
+    scale_effect=0.00
+)
+# Save a "no blur" GIF to preview the motion
+import imageio.v2 as imageio
+# Note: output_filename is not defined here, this line will error if run as-is.
+# Assuming a placeholder filename for the notebook's logic.
+output_filename = 'parallax_part4_only.gif'
+imageio.mimsave(output_filename, motion_frames, duration=0.04, loop=0, optimize=True, palettesize=192)
+print("Part 4 preview GIF saved!")
+"""# Part 5: Depth-of-Field & Bokeh Effects"""
+import numpy as np
+import cv2
+import imageio.v2 as imageio
+def create_multi_layer_animation(
+    image_original,
+    background_clean,
+    alpha_mask,      # This is your 'alpha_no_halo'
+    depth_map,       # This is your 'depth_map' (needs to be single-channel, 0-1 float)
+    n_frames=60,
+    # --- 1. Multi-layer motion settings ---
+    fg_shift=12,     # Foreground (person) moves 12px
+    mid_shift=6,     # Mid-ground (near background) moves 6px
+    far_shift=2,     # Far-ground (far background) moves 2px
+    # --- 2. Dynamic zoom settings ---
+    zoom_center=1.10,
+    zoom_peak=1.05,
+    # --- 3. Dynamic Depth-of-Field settings ---
+    mid_blur_ksize=(9, 9),    # Mid-ground blur (f/5.6)
+    far_blur_ksize=(35, 35),  # Far-ground blur (f/1.4)
+    direction='right'
+):
+    """
+    Final version: Integrates multi-layer parallax (3 layers), dynamic DoF, and dynamic zoom
+    """
+    print("--- Start generating advanced multi-layer animation ---")
+    h, w = image_original.shape[:2]
+    # --- 1. Prepare motion ---
+    direction_map = {'right': (1, 0), 'left': (-1, 0), 'up': (0, -1), 'down': (0, 1)}
+    dx, dy = direction_map.get(direction, (1, 0))
+    # --- 2. Prepare base masks (Foreground vs. Background) ---
+    # Ensure alpha_mask is a 0-1 float
+    if alpha_mask.max() > 1:
+        alpha_mask = alpha_mask.astype(np.float32) / 255.0
+    if alpha_mask.ndim == 2:
+        alpha_mask = alpha_mask[..., None]
+    # Create foreground mask (fg_mask) and background mask (bg_mask)
+    fg_mask_3ch = np.repeat(alpha_mask, 3, axis=2)
+    bg_mask_3ch = 1.0 - fg_mask_3ch
+    # --- 3. Create mid/far-ground masks (outside loop) ---
+    print("...Analyzing depth map and creating layers...")
+    # Ensure depth_map is single-channel
+    if depth_map.ndim == 3:
+        depth_map = cv2.cvtColor(depth_map, cv2.COLOR_BGR2GRAY)
+    # Find depth values in the background region
+    bg_depth_values = depth_map[alpha_mask[..., 0] < 0.5]
+    # Find the 50th percentile (median) of background depth as the split point
+    if len(bg_depth_values) > 0:
+        bg_split_threshold = np.percentile(bg_depth_values, 50)
+    else:
+        bg_split_threshold = 0.5 # If no background, just use a default value
+    print(f"    Background depth split point: {bg_split_threshold:.4f}")
+    # Create a raw binary mask (1 = mid-ground, 0 = far-ground)
+    # Depth value > threshold = closer = mid-ground
+    raw_mid_mask = (depth_map > bg_split_threshold).astype(np.float32)
+    # Blur this mask for a smooth transition between mid and far grounds
+    # Note: this mask currently includes the person/foreground area
+    raw_mid_mask_smooth = cv2.GaussianBlur(raw_mid_mask, (21, 21), 0)
+    if raw_mid_mask_smooth.ndim == 2:
+        raw_mid_mask_smooth = raw_mid_mask_smooth[..., None]
+    # Expand to 3 channels
+    raw_mid_mask_smooth_3ch = np.repeat(raw_mid_mask_smooth, 3, axis=2)
+    # --- 4. Generate the final 3 mutually exclusive masks ---
+    # Mid-ground mask = (smooth mid mask) * (background mask)
+    # This "cuts out" the person, leaving only the mid-ground part of the background
+    mid_mask_3ch = raw_mid_mask_smooth_3ch * bg_mask_3ch
+    # Far-ground mask = (1.0 - smooth mid mask) * (background mask)
+    # This "cuts out" the person, leaving only the far-ground part of the background
+    far_mask_3ch = (1.0 - raw_mid_mask_smooth_3ch) * bg_mask_3ch
+    # fg_mask_3ch (foreground) + mid_mask_3ch (mid-ground) + far_mask_3ch (far-ground)
+    # These three masks now sum to 1.0 (the whole image) and are mutually exclusive.
+    print("...Layers created. Starting frame generation...")
+    frames = []
+    # --- 5. Loop to generate each frame ---
+    for i in range(n_frames):
+        # --- 5a. Calculate motion and scale ---
+        phase = (i / n_frames) * 2 * np.pi
+        ease = np.sin(phase)
+        # Dynamic zoom
+        zoom_range = zoom_center - zoom_peak
+        scale = zoom_center - (zoom_range * abs(ease))
+        center = (w / 2, h / 2)
+        M_scale = cv2.getRotationMatrix2D(center, 0, scale)
+        # --- 5b. Create 3 different transformation matrices ---
+        M_fg_trans = np.float32([[1, 0, dx*ease*fg_shift], [0, 1, dy*ease*fg_shift]])
+        M_mid_trans = np.float32([[1, 0, dx*ease*mid_shift], [0, 1, dy*ease*mid_shift]])
+        M_far_trans = np.float32([[1, 0, dx*ease*far_shift], [0, 1, dy*ease*far_shift]])
+        # --- 5c. Transform and blur layers ---
+        # --- Foreground (sharp) ---
+        fg_warped = cv2.warpAffine(image_original, M_fg_trans, (w,h), borderMode=cv2.BORDER_REFLECT_101)
+        fg_final = cv2.warpAffine(fg_warped, M_scale, (w,h), borderMode=cv2.BORDER_REFLECT_101).astype(np.float32)
+        # --- Mid-ground (slight blur) ---
+        mid_warped = cv2.warpAffine(background_clean, M_mid_trans, (w,h), borderMode=cv2.BORDER_REPLICATE)
+        mid_warped_scaled = cv2.warpAffine(mid_warped, M_scale, (w,h), borderMode=cv2.BORDER_REPLICATE)
+        mid_final = cv2.GaussianBlur(mid_warped_scaled, mid_blur_ksize, 0).astype(np.float32)
+        # --- Far-ground (heavy blur) ---
+        far_warped = cv2.warpAffine(background_clean, M_far_trans, (w,h), borderMode=cv2.BORDER_REPLICATE)
+        far_warped_scaled = cv2.warpAffine(far_warped, M_scale, (w,h), borderMode=cv2.BORDER_REPLICATE)
+        far_final = cv2.GaussianBlur(far_warped_scaled, far_blur_ksize, 0).astype(np.float32)
+        # --- 5d. Transform the 3 masks ---
+        # Masks must be transformed along with their corresponding layers!
+        fg_mask_warped = cv2.warpAffine(fg_mask_3ch, M_fg_trans, (w,h))
+        fg_mask_warped = cv2.warpAffine(fg_mask_warped, M_scale, (w,h))
+        mid_mask_warped = cv2.warpAffine(mid_mask_3ch, M_mid_trans, (w,h))
+        mid_mask_warped = cv2.warpAffine(mid_mask_warped, M_scale, (w,h))
+        far_mask_warped = cv2.warpAffine(far_mask_3ch, M_far_trans, (w,h))
+        far_mask_warped = cv2.warpAffine(far_mask_warped, M_scale, (w,h))
+        # --- 5e. Final composite ---
+        # Re-normalize the masks to prevent black borders or tiny gaps after warp
+        total_mask = fg_mask_warped + mid_mask_warped + far_mask_warped + 1e-6 # Avoid division by zero
+        fg_mask_warped /= total_mask
+        mid_mask_warped /= total_mask
+        far_mask_warped /= total_mask
+        # Add the three layers, weighted by their masks
+        composite = (fg_final   * fg_mask_warped) + \
+                    (mid_final  * mid_mask_warped) + \
+                    (far_final  * far_mask_warped)
+        frame = np.clip(composite, 0, 255).astype(np.uint8)
+        frames.append(frame)
+        if (i + 1) % 10 == 0:
+             print(f"   ...Frame {i+1}/{n_frames} complete")
+    print(f"Advanced animation generation complete. Created {n_frames} frames.")
+    return frames
+if depth_map.max() > 1:
+    depth_map_0_1 = depth_map.astype(np.float32) / 255.0
+else:
+    depth_map_0_1 = depth_map.astype(np.float32)
+# Call the new multi-layer function
+multi_layer_frames = create_multi_layer_animation(
+    image_original=image,
+    background_clean=final_bg,
+    alpha_mask=alpha_no_halo,   # Your foreground mask
+    depth_map=depth_map_0_1,    # Your 0-1 depth map
+    n_frames=60,
+    fg_shift=12,
+    mid_shift=6,
+    far_shift=2,
+    zoom_center=1.10,
+    zoom_peak=1.05
+)
+# Save the final GIF
+imageio.mimsave('parallax_final_multi_layer.gif', multi_layer_frames, duration=0.04, loop=0)
+print("Final multi-layer parallax animation saved!")

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+transformers
+torch
+torchvision
+pillow
+matplotlib
+requests
+opencv-python
+imageio
+tqdm
+gradio