Spaces:
Sleeping
Sleeping
upload gradio
Browse files- .gitignore +1 -0
- app.py +390 -0
- cs5330_hw4.py +633 -0
- requirements.txt +10 -0
.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
.DS_Store
|
app.py
ADDED
|
@@ -0,0 +1,390 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""
|
| 3 |
+
CS5330-HW4: Parallax Effect Gradio App
|
| 4 |
+
Converted from Colab notebook.
|
| 5 |
+
(V4: Final fix for halo/border artifact. Uses correct mask.)
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import torch
|
| 9 |
+
import numpy as np
|
| 10 |
+
import matplotlib.pyplot as plt
|
| 11 |
+
from PIL import Image
|
| 12 |
+
from transformers import DPTImageProcessor, DPTForDepthEstimation
|
| 13 |
+
import cv2
|
| 14 |
+
import imageio.v2 as imageio
|
| 15 |
+
import gradio as gr
|
| 16 |
+
import time # To create unique filenames
|
| 17 |
+
|
| 18 |
+
# ==================================================================
|
| 19 |
+
# Global Transformer Setup
|
| 20 |
+
# ==================================================================
|
| 21 |
+
print("Loading Intel DPT depth estimation model...")
|
| 22 |
+
processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
|
| 23 |
+
model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
|
| 24 |
+
model.eval()
|
| 25 |
+
|
| 26 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 27 |
+
model = model.to(device)
|
| 28 |
+
print(f"Model loaded on {device}. Gradio app is ready.")
|
| 29 |
+
|
| 30 |
+
# ==================================================================
|
| 31 |
+
# Helper Function 1: Get Depth Map
|
| 32 |
+
# ==================================================================
|
| 33 |
+
def get_depth_map(pil_image, processor, model, device):
|
| 34 |
+
print("... (1/5) Extracting depth map")
|
| 35 |
+
inputs = processor(images=pil_image, return_tensors="pt")
|
| 36 |
+
inputs = {k: v.to(device) for k, v in inputs.items()}
|
| 37 |
+
|
| 38 |
+
with torch.no_grad():
|
| 39 |
+
outputs = model(**inputs)
|
| 40 |
+
predicted_depth = outputs.predicted_depth
|
| 41 |
+
|
| 42 |
+
prediction = torch.nn.functional.interpolate(
|
| 43 |
+
predicted_depth.unsqueeze(1),
|
| 44 |
+
size=pil_image.size[::-1],
|
| 45 |
+
mode="bicubic",
|
| 46 |
+
align_corners=False,
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
depth_map = prediction.squeeze().cpu().numpy()
|
| 50 |
+
depth_map = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
|
| 51 |
+
return depth_map
|
| 52 |
+
|
| 53 |
+
# ==================================================================
|
| 54 |
+
# Helper Function 2: Layer Separation
|
| 55 |
+
# ==================================================================
|
| 56 |
+
# This function returns mask_clean (hard) and mask_soft (soft/full-size)
|
| 57 |
+
def separate_foreground_background(image, depth_map, *,
|
| 58 |
+
assume_bgr_input=True,
|
| 59 |
+
near_is_foreground=True,
|
| 60 |
+
foreground_depth_is_high=True):
|
| 61 |
+
print("... (2/5) Separating layers")
|
| 62 |
+
|
| 63 |
+
if not isinstance(image, np.ndarray):
|
| 64 |
+
image = np.array(image)
|
| 65 |
+
if not isinstance(depth_map, np.ndarray):
|
| 66 |
+
depth_map = np.array(depth_map)
|
| 67 |
+
|
| 68 |
+
if assume_bgr_input and image.ndim == 3 and image.shape[2] == 3:
|
| 69 |
+
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
| 70 |
+
|
| 71 |
+
if image.ndim == 2:
|
| 72 |
+
image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
|
| 73 |
+
if depth_map.ndim == 3:
|
| 74 |
+
depth_map = depth_map[:, :, 0]
|
| 75 |
+
|
| 76 |
+
depth_norm = cv2.normalize(depth_map, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
|
| 77 |
+
depth_smooth = cv2.GaussianBlur(depth_norm, (5, 5), 0)
|
| 78 |
+
|
| 79 |
+
if near_is_foreground and foreground_depth_is_high:
|
| 80 |
+
thresh_flag = cv2.THRESH_BINARY
|
| 81 |
+
elif near_is_foreground and not foreground_depth_is_high:
|
| 82 |
+
thresh_flag = cv2.THRESH_BINARY_INV
|
| 83 |
+
elif (not near_is_foreground) and foreground_depth_is_high:
|
| 84 |
+
thresh_flag = cv2.THRESH_BINARY_INV
|
| 85 |
+
else:
|
| 86 |
+
thresh_flag = cv2.THRESH_BINARY
|
| 87 |
+
|
| 88 |
+
_, binary_mask = cv2.threshold(depth_smooth, 0, 255, thresh_flag + cv2.THRESH_OTSU)
|
| 89 |
+
|
| 90 |
+
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
|
| 91 |
+
mask_clean = cv2.morphologyEx(binary_mask, cv2.MORPH_OPEN, kernel, iterations=1)
|
| 92 |
+
mask_clean = cv2.morphologyEx(mask_clean, cv2.MORPH_CLOSE, kernel, iterations=2)
|
| 93 |
+
|
| 94 |
+
num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(mask_clean, 8)
|
| 95 |
+
if num_labels > 1:
|
| 96 |
+
largest_label = 1 + np.argmax(stats[1:, cv2.CC_STAT_AREA])
|
| 97 |
+
mask_clean = (labels == largest_label).astype(np.uint8) * 255
|
| 98 |
+
|
| 99 |
+
# mask_soft is the full-size mask, which is key to fixing the artifact.
|
| 100 |
+
mask_soft = cv2.GaussianBlur(mask_clean, (9, 9), 5).astype(np.float32) / 255.0
|
| 101 |
+
|
| 102 |
+
img_f = image.astype(np.float32) / 255.0
|
| 103 |
+
mask_3 = np.dstack([mask_soft]*3)
|
| 104 |
+
|
| 105 |
+
foreground = np.clip(img_f * mask_3, 0, 1)
|
| 106 |
+
background = np.clip(img_f * (1.0 - mask_3), 0, 1)
|
| 107 |
+
|
| 108 |
+
foreground = (foreground * 255.0).astype(np.uint8)
|
| 109 |
+
background = (background * 255.0).astype(np.uint8)
|
| 110 |
+
|
| 111 |
+
return foreground, background, mask_clean, mask_soft
|
| 112 |
+
|
| 113 |
+
# ==================================================================
|
| 114 |
+
# Helper Function 3: Background Reconstruction
|
| 115 |
+
# ==================================================================
|
| 116 |
+
# This function returns final_bg (inpainted background) and alpha_no_halo (eroded mask)
|
| 117 |
+
# Note: We no longer use alpha_no_halo for the animation, but the function is fine.
|
| 118 |
+
def reconstruct_background(background, mask_hard, original_image_np):
|
| 119 |
+
print("... (3/5) Reconstructing background")
|
| 120 |
+
|
| 121 |
+
kernel = np.ones((7,7), np.uint8)
|
| 122 |
+
mask_dilated = cv2.dilate(mask_hard, kernel, iterations=1)
|
| 123 |
+
bg_inpainted = cv2.inpaint(background, mask_dilated, inpaintRadius=6, flags=cv2.INPAINT_TELEA)
|
| 124 |
+
|
| 125 |
+
bg_smooth = cv2.bilateralFilter(bg_inpainted, d=9, sigmaColor=75, sigmaSpace=75)
|
| 126 |
+
final_bg = np.where(mask_dilated[..., None] == 255, bg_smooth, background)
|
| 127 |
+
|
| 128 |
+
k3 = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3))
|
| 129 |
+
mask_erode = cv2.erode(mask_hard, k3, iterations=1)
|
| 130 |
+
dist = cv2.distanceTransform(mask_erode, cv2.DIST_L2, 5)
|
| 131 |
+
alpha_no_halo = dist / 6.0
|
| 132 |
+
alpha_no_halo = np.clip(alpha_no_halo, 0, 1).astype(np.float32)
|
| 133 |
+
alpha_no_halo = alpha_no_halo[..., None] # HxWx1
|
| 134 |
+
|
| 135 |
+
return final_bg, alpha_no_halo
|
| 136 |
+
|
| 137 |
+
# ==================================================================
|
| 138 |
+
# Helper Function 4: Animation
|
| 139 |
+
# ==================================================================
|
| 140 |
+
# This is the animation function (from V2 logic), which is correct (uses normalization to prevent gaps).
|
| 141 |
+
def create_multi_layer_animation(
|
| 142 |
+
image_original,
|
| 143 |
+
background_clean,
|
| 144 |
+
alpha_mask, # KEY: We will pass the full-size mask_soft here
|
| 145 |
+
depth_map,
|
| 146 |
+
n_frames=60,
|
| 147 |
+
parallax_strength=12,
|
| 148 |
+
blur_strength=1.0,
|
| 149 |
+
direction='right',
|
| 150 |
+
zoom_center=1.10,
|
| 151 |
+
zoom_peak=1.05
|
| 152 |
+
):
|
| 153 |
+
print(f"... (4/5) Generating {n_frames} animation frames")
|
| 154 |
+
print(f" Params: Parallax={parallax_strength}px, Blur={blur_strength}x, Dir={direction}")
|
| 155 |
+
|
| 156 |
+
h, w = image_original.shape[:2]
|
| 157 |
+
|
| 158 |
+
# --- 1. Prepare motion and blur settings ---
|
| 159 |
+
direction_map = {'right': (1, 0), 'left': (-1, 0), 'up': (0, -1), 'down': (0, 1)}
|
| 160 |
+
dx, dy = direction_map.get(direction, (1, 0))
|
| 161 |
+
|
| 162 |
+
fg_shift = parallax_strength
|
| 163 |
+
mid_shift = parallax_strength * 0.5
|
| 164 |
+
far_shift = parallax_strength * (2 / 12)
|
| 165 |
+
|
| 166 |
+
base_mid_k = 9
|
| 167 |
+
base_far_k = 35
|
| 168 |
+
mid_k_raw = int(base_mid_k * blur_strength)
|
| 169 |
+
far_k_raw = int(base_far_k * blur_strength)
|
| 170 |
+
mid_k = (mid_k_raw + 1) if (mid_k_raw > 0 and mid_k_raw % 2 == 0) else max(1, mid_k_raw)
|
| 171 |
+
far_k = (far_k_raw + 1) if (far_k_raw > 0 and far_k_raw % 2 == 0) else max(1, far_k_raw)
|
| 172 |
+
mid_blur_ksize = (mid_k, mid_k)
|
| 173 |
+
far_blur_ksize = (far_k, far_k)
|
| 174 |
+
print(f" ...Using blur kernels: Mid={mid_blur_ksize}, Far={far_blur_ksize}")
|
| 175 |
+
|
| 176 |
+
# --- 2. Prepare base masks (FG vs BG) ---
|
| 177 |
+
# alpha_mask is now the full-size mask_soft
|
| 178 |
+
if alpha_mask.max() > 1:
|
| 179 |
+
alpha_mask = alpha_mask.astype(np.float32) / 255.0
|
| 180 |
+
if alpha_mask.ndim == 2:
|
| 181 |
+
alpha_mask = alpha_mask[..., None]
|
| 182 |
+
|
| 183 |
+
fg_mask_3ch = np.repeat(alpha_mask, 3, axis=2) # full-size foreground
|
| 184 |
+
bg_mask_3ch = 1.0 - fg_mask_3ch # full-size background "hole"
|
| 185 |
+
|
| 186 |
+
# --- 3. Create mid-ground / far-ground masks ---
|
| 187 |
+
if depth_map.ndim == 3:
|
| 188 |
+
depth_map = cv2.cvtColor(depth_map, cv2.COLOR_BGR2GRAY)
|
| 189 |
+
|
| 190 |
+
# We find depth values inside the "background hole" (bg_mask_3ch)
|
| 191 |
+
bg_depth_values = depth_map[alpha_mask[..., 0] < 0.5]
|
| 192 |
+
if len(bg_depth_values) > 0:
|
| 193 |
+
bg_split_threshold = np.percentile(bg_depth_values, 50)
|
| 194 |
+
else:
|
| 195 |
+
bg_split_threshold = 0.5
|
| 196 |
+
|
| 197 |
+
raw_mid_mask = (depth_map > bg_split_threshold).astype(np.float32)
|
| 198 |
+
raw_mid_mask_smooth = cv2.GaussianBlur(raw_mid_mask, (21, 21), 0)
|
| 199 |
+
if raw_mid_mask_smooth.ndim == 2:
|
| 200 |
+
raw_mid_mask_smooth = raw_mid_mask_smooth[..., None]
|
| 201 |
+
|
| 202 |
+
raw_mid_mask_smooth_3ch = np.repeat(raw_mid_mask_smooth, 3, axis=2)
|
| 203 |
+
|
| 204 |
+
# --- 4. Generate the final 3 mutually exclusive masks ---
|
| 205 |
+
# These three layers will perfectly cover the image with no gaps or overlaps.
|
| 206 |
+
mid_mask_3ch = raw_mid_mask_smooth_3ch * bg_mask_3ch
|
| 207 |
+
far_mask_3ch = (1.0 - raw_mid_mask_smooth_3ch) * bg_mask_3ch
|
| 208 |
+
|
| 209 |
+
frames = []
|
| 210 |
+
|
| 211 |
+
# --- 5. Loop to generate each frame ---
|
| 212 |
+
for i in range(n_frames):
|
| 213 |
+
phase = (i / n_frames) * 2 * np.pi
|
| 214 |
+
ease = np.sin(phase)
|
| 215 |
+
|
| 216 |
+
zoom_range = zoom_center - zoom_peak
|
| 217 |
+
scale = zoom_center - (zoom_range * abs(ease))
|
| 218 |
+
center = (w / 2, h / 2)
|
| 219 |
+
M_scale = cv2.getRotationMatrix2D(center, 0, scale)
|
| 220 |
+
|
| 221 |
+
M_fg_trans = np.float32([[1, 0, dx*ease*fg_shift], [0, 1, dy*ease*fg_shift]])
|
| 222 |
+
M_mid_trans = np.float32([[1, 0, dx*ease*mid_shift], [0, 1, dy*ease*mid_shift]])
|
| 223 |
+
M_far_trans = np.float32([[1, 0, dx*ease*far_shift], [0, 1, dy*ease*far_shift]])
|
| 224 |
+
|
| 225 |
+
# --- Layer Transforms ---
|
| 226 |
+
fg_warped = cv2.warpAffine(image_original, M_fg_trans, (w,h), borderMode=cv2.BORDER_REFLECT_101)
|
| 227 |
+
fg_final = cv2.warpAffine(fg_warped, M_scale, (w,h), borderMode=cv2.BORDER_REFLECT_101).astype(np.float32)
|
| 228 |
+
|
| 229 |
+
mid_warped = cv2.warpAffine(background_clean, M_mid_trans, (w,h), borderMode=cv2.BORDER_REPLICATE)
|
| 230 |
+
mid_warped_scaled = cv2.warpAffine(mid_warped, M_scale, (w,h), borderMode=cv2.BORDER_REPLICATE)
|
| 231 |
+
mid_final = cv2.GaussianBlur(mid_warped_scaled, mid_blur_ksize, 0).astype(np.float32)
|
| 232 |
+
|
| 233 |
+
far_warped = cv2.warpAffine(background_clean, M_far_trans, (w,h), borderMode=cv2.BORDER_REPLICATE)
|
| 234 |
+
far_warped_scaled = cv2.warpAffine(far_warped, M_scale, (w,h), borderMode=cv2.BORDER_REPLICATE)
|
| 235 |
+
far_final = cv2.GaussianBlur(far_warped_scaled, far_blur_ksize, 0).astype(np.float32)
|
| 236 |
+
|
| 237 |
+
# --- Mask Transforms ---
|
| 238 |
+
fg_mask_warped = cv2.warpAffine(fg_mask_3ch, M_fg_trans, (w,h))
|
| 239 |
+
fg_mask_warped = cv2.warpAffine(fg_mask_warped, M_scale, (w,h))
|
| 240 |
+
|
| 241 |
+
mid_mask_warped = cv2.warpAffine(mid_mask_3ch, M_mid_trans, (w,h))
|
| 242 |
+
mid_mask_warped = cv2.warpAffine(mid_mask_warped, M_scale, (w,h))
|
| 243 |
+
|
| 244 |
+
far_mask_warped = cv2.warpAffine(far_mask_3ch, M_far_trans, (w,h))
|
| 245 |
+
far_mask_warped = cv2.warpAffine(far_mask_warped, M_scale, (w,h))
|
| 246 |
+
|
| 247 |
+
# --- Final Composite (V2 normalization logic) ---
|
| 248 |
+
# Re-normalize masks to prevent black borders or tiny gaps after warp.
|
| 249 |
+
total_mask = fg_mask_warped + mid_mask_warped + far_mask_warped + 1e-6
|
| 250 |
+
fg_mask_warped /= total_mask
|
| 251 |
+
mid_mask_warped /= total_mask
|
| 252 |
+
far_mask_warped /= total_mask
|
| 253 |
+
|
| 254 |
+
# Add the three layers, weighted by their masks.
|
| 255 |
+
composite = (fg_final * fg_mask_warped) + \
|
| 256 |
+
(mid_final * mid_mask_warped) + \
|
| 257 |
+
(far_final * far_mask_warped)
|
| 258 |
+
|
| 259 |
+
frame = np.clip(composite, 0, 255).astype(np.uint8)
|
| 260 |
+
frames.append(frame)
|
| 261 |
+
|
| 262 |
+
print(f"... (4/5) Frame generation complete.")
|
| 263 |
+
return frames
|
| 264 |
+
|
| 265 |
+
# ==================================================================
|
| 266 |
+
# MAIN GRADIO FUNCTION (Ties everything together)
|
| 267 |
+
# ==================================================================
|
| 268 |
+
|
| 269 |
+
def generate_parallax_effect(input_image_np, parallax_strength, blur_strength, animation_direction):
|
| 270 |
+
print("\n--- Processing new image ---")
|
| 271 |
+
|
| 272 |
+
# --- 0. Image Preparation ---
|
| 273 |
+
image_pil = Image.fromarray(input_image_np).convert('RGB')
|
| 274 |
+
|
| 275 |
+
max_size = 640
|
| 276 |
+
if max(image_pil.size) > max_size:
|
| 277 |
+
ratio = max_size / max(image_pil.size)
|
| 278 |
+
new_size = tuple(int(dim * ratio) for dim in image_pil.size)
|
| 279 |
+
image_pil = image_pil.resize(new_size, Image.LANCZOS)
|
| 280 |
+
|
| 281 |
+
image_resized_np = np.array(image_pil)
|
| 282 |
+
print(f"Image resized to: {image_pil.size}")
|
| 283 |
+
|
| 284 |
+
# --- 1. Get Depth Map ---
|
| 285 |
+
depth_map_0_1 = get_depth_map(image_pil, processor, model, device)
|
| 286 |
+
|
| 287 |
+
# --- 2. Layer Separation ---
|
| 288 |
+
# We get mask_soft (full-size mask) from this function.
|
| 289 |
+
foreground, background, mask_hard, mask_soft = separate_foreground_background(
|
| 290 |
+
image_pil,
|
| 291 |
+
depth_map_0_1,
|
| 292 |
+
assume_bgr_input=False,
|
| 293 |
+
near_is_foreground=True,
|
| 294 |
+
foreground_depth_is_high=True
|
| 295 |
+
)
|
| 296 |
+
|
| 297 |
+
# --- 3. Background Reconstruction ---
|
| 298 |
+
# We get final_bg (inpainted background) from this.
|
| 299 |
+
# We also get alpha_no_halo, but we won't use it for the animation.
|
| 300 |
+
final_bg, alpha_no_halo = reconstruct_background(background, mask_hard, image_resized_np)
|
| 301 |
+
|
| 302 |
+
# --- 4. Animation ---
|
| 303 |
+
# *** THIS IS THE KEY FIX ***
|
| 304 |
+
# We use the V2-logic animation function (V4) with `mask_soft` (the full-size mask).
|
| 305 |
+
multi_layer_frames = create_multi_layer_animation(
|
| 306 |
+
image_original=image_resized_np,
|
| 307 |
+
background_clean=final_bg,
|
| 308 |
+
alpha_mask=mask_soft, # <-- KEY FIX: Pass the full-size soft mask
|
| 309 |
+
depth_map=depth_map_0_1,
|
| 310 |
+
n_frames=60,
|
| 311 |
+
parallax_strength=parallax_strength,
|
| 312 |
+
blur_strength=blur_strength,
|
| 313 |
+
direction=animation_direction
|
| 314 |
+
)
|
| 315 |
+
|
| 316 |
+
# --- 5. Save GIF and Return Path ---
|
| 317 |
+
print("... (5/5) Saving final GIF")
|
| 318 |
+
timestamp = int(time.time())
|
| 319 |
+
output_filename = f'parallax_final_{timestamp}.gif'
|
| 320 |
+
|
| 321 |
+
# This saves the file to the SERVER'S disk.
|
| 322 |
+
# It does NOT trigger a download in the user's browser.
|
| 323 |
+
imageio.mimsave(output_filename, multi_layer_frames, duration=0.04, loop=0)
|
| 324 |
+
print(f"--- Processing complete! Saved to {output_filename} ---")
|
| 325 |
+
|
| 326 |
+
# MODIFIED: Only return the GIF filepath
|
| 327 |
+
return output_filename
|
| 328 |
+
|
| 329 |
+
# ==================================================================
|
| 330 |
+
# Gradio Interface (Modified)
|
| 331 |
+
# ==================================================================
|
| 332 |
+
|
| 333 |
+
print("Creating Gradio interface...")
|
| 334 |
+
|
| 335 |
+
# --- 1. Define Input Components ---
|
| 336 |
+
input_image = gr.Image(label="1. Upload Your Image", type="numpy")
|
| 337 |
+
|
| 338 |
+
param_parallax = gr.Slider(
|
| 339 |
+
minimum=0,
|
| 340 |
+
maximum=30,
|
| 341 |
+
value=12,
|
| 342 |
+
step=1,
|
| 343 |
+
label="2. Parallax Strength (px)",
|
| 344 |
+
info="Foreground motion in pixels. Higher = stronger 3D effect."
|
| 345 |
+
)
|
| 346 |
+
|
| 347 |
+
param_blur = gr.Slider(
|
| 348 |
+
minimum=0.0,
|
| 349 |
+
maximum=2.0,
|
| 350 |
+
value=1.0,
|
| 351 |
+
step=0.1,
|
| 352 |
+
label="3. Aperture / Blur Strength",
|
| 353 |
+
info="Controls background blur (bokeh). 0 = no blur, 1 = default, 2 = max blur."
|
| 354 |
+
)
|
| 355 |
+
|
| 356 |
+
param_direction = gr.Dropdown(
|
| 357 |
+
choices=['right', 'left', 'up', 'down'],
|
| 358 |
+
value='right',
|
| 359 |
+
label="4. Animation Direction"
|
| 360 |
+
)
|
| 361 |
+
|
| 362 |
+
# --- 2. Define Output Components ---
|
| 363 |
+
# MODIFIED: Removed output_original
|
| 364 |
+
output_gif = gr.Image(label="Generated Parallax GIF")
|
| 365 |
+
# NOTE: The gr.Image component automatically provides a download button
|
| 366 |
+
# in the top-right corner when displaying an image/GIF. This
|
| 367 |
+
# fulfills the requirement for a "Gradio download button".
|
| 368 |
+
|
| 369 |
+
|
| 370 |
+
# --- 4. Create Interface ---
|
| 371 |
+
iface = gr.Interface(
|
| 372 |
+
fn=generate_parallax_effect,
|
| 373 |
+
inputs=[input_image, param_parallax, param_blur, param_direction],
|
| 374 |
+
# MODIFIED: Only one output
|
| 375 |
+
outputs=output_gif,
|
| 376 |
+
title="📸 3D Parallax Photo Animator (CS5330-HW4)",
|
| 377 |
+
description="""
|
| 378 |
+
Upload a photo (ideally with a clear foreground and background) to generate a 3D parallax and depth-of-field animation.
|
| 379 |
+
|
| 380 |
+
1. Upload an image.
|
| 381 |
+
2. Adjust the 3 parameters below.
|
| 382 |
+
3. Click "Submit".
|
| 383 |
+
|
| 384 |
+
Processing may take 30-60 seconds. You can find the download button in the top-right corner of the generated GIF.
|
| 385 |
+
""",
|
| 386 |
+
# (Removed examples and cache_examples)
|
| 387 |
+
)
|
| 388 |
+
|
| 389 |
+
if __name__ == "__main__":
|
| 390 |
+
iface.launch(share=False)
|
cs5330_hw4.py
ADDED
|
@@ -0,0 +1,633 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""CS5330-HW4.ipynb
|
| 3 |
+
|
| 4 |
+
Automatically generated by Colab.
|
| 5 |
+
|
| 6 |
+
Original file is located at
|
| 7 |
+
https://colab.research.google.com/drive/1s2_5NEaW54KPPeoQy2TlhZxD_HHtF9fg
|
| 8 |
+
|
| 9 |
+
# Part 0. Transformer Setup
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import torch
|
| 13 |
+
import numpy as np
|
| 14 |
+
import matplotlib.pyplot as plt
|
| 15 |
+
from PIL import Image
|
| 16 |
+
from transformers import DPTImageProcessor, DPTForDepthEstimation
|
| 17 |
+
import requests
|
| 18 |
+
|
| 19 |
+
"""## Initialize Depth Model"""
|
| 20 |
+
|
| 21 |
+
# ============================================
|
| 22 |
+
# Initialize the Depth Model
|
| 23 |
+
# ============================================
|
| 24 |
+
print("Loading Intel DPT depth estimation model...")
|
| 25 |
+
processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
|
| 26 |
+
model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
|
| 27 |
+
model.eval() # Set to evaluation mode
|
| 28 |
+
|
| 29 |
+
# Use GPU if available for faster processing
|
| 30 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 31 |
+
model = model.to(device)
|
| 32 |
+
print(f"Model loaded on {device}")
|
| 33 |
+
|
| 34 |
+
"""## Image Preparation"""
|
| 35 |
+
|
| 36 |
+
# ============================================
|
| 37 |
+
# Load and Prepare Your Image
|
| 38 |
+
# ============================================
|
| 39 |
+
# Option 1: Load from URL
|
| 40 |
+
image_url = "https://images.pexels.com/photos/1681010/pexels-photo-1681010.jpeg"
|
| 41 |
+
image = Image.open(requests.get(image_url, stream=True).raw).convert('RGB')
|
| 42 |
+
|
| 43 |
+
# Option 2: Upload from local (in Colab)
|
| 44 |
+
# from google.colab import files
|
| 45 |
+
# uploaded = files.upload()
|
| 46 |
+
# image = Image.open(list(uploaded.keys())[0]).convert('RGB')
|
| 47 |
+
|
| 48 |
+
# Resize for faster processing (optional but recommended)
|
| 49 |
+
max_size = 640
|
| 50 |
+
if max(image.size) > max_size:
|
| 51 |
+
ratio = max_size / max(image.size)
|
| 52 |
+
new_size = tuple(int(dim * ratio) for dim in image.size)
|
| 53 |
+
image = image.resize(new_size, Image.LANCZOS)
|
| 54 |
+
|
| 55 |
+
print(f"Image size: {image.size}")
|
| 56 |
+
|
| 57 |
+
"""## Depth Map Extraction"""
|
| 58 |
+
|
| 59 |
+
# ============================================
|
| 60 |
+
# Extract Depth Map
|
| 61 |
+
# ============================================
|
| 62 |
+
# Prepare image for the model
|
| 63 |
+
inputs = processor(images=image, return_tensors="pt")
|
| 64 |
+
inputs = {k: v.to(device) for k, v in inputs.items()}
|
| 65 |
+
|
| 66 |
+
# Run depth estimation
|
| 67 |
+
with torch.no_grad():
|
| 68 |
+
outputs = model(**inputs)
|
| 69 |
+
predicted_depth = outputs.predicted_depth
|
| 70 |
+
|
| 71 |
+
# Interpolate to original size and normalize
|
| 72 |
+
prediction = torch.nn.functional.interpolate(
|
| 73 |
+
predicted_depth.unsqueeze(1),
|
| 74 |
+
size=image.size[::-1], # (height, width)
|
| 75 |
+
mode="bicubic",
|
| 76 |
+
align_corners=False,
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
# Convert to numpy and normalize to 0-1 range
|
| 80 |
+
depth_map = prediction.squeeze().cpu().numpy()
|
| 81 |
+
depth_map = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
|
| 82 |
+
|
| 83 |
+
"""## Image Visualization"""
|
| 84 |
+
|
| 85 |
+
# ============================================
|
| 86 |
+
# Visualize Results
|
| 87 |
+
# ============================================
|
| 88 |
+
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
|
| 89 |
+
|
| 90 |
+
# Original image
|
| 91 |
+
axes[0].imshow(image)
|
| 92 |
+
axes[0].set_title('Original Image')
|
| 93 |
+
axes[0].axis('off')
|
| 94 |
+
|
| 95 |
+
# Depth map
|
| 96 |
+
im = axes[1].imshow(depth_map, cmap='plasma')
|
| 97 |
+
axes[1].set_title('Depth Map (Yellow=Close, Purple=Far)')
|
| 98 |
+
axes[1].axis('off')
|
| 99 |
+
plt.colorbar(im, ax=axes[1], fraction=0.046)
|
| 100 |
+
|
| 101 |
+
plt.tight_layout()
|
| 102 |
+
plt.show()
|
| 103 |
+
|
| 104 |
+
print(f"Depth map shape: {depth_map.shape}")
|
| 105 |
+
print(f"Depth range: [{depth_map.min():.3f}, {depth_map.max():.3f}]")
|
| 106 |
+
print("Ready for processing!")
|
| 107 |
+
# depth_map is now a normalized numpy array where:
|
| 108 |
+
# - Values close to 1.0 = near to camera (yellow in visualization)
|
| 109 |
+
# - Values close to 0.0 = far from camera (purple in visualization)
|
| 110 |
+
# Use this depth_map for all subsequent processing!
|
| 111 |
+
|
| 112 |
+
"""# Part 1. Depth-Guided Layer Separation"""
|
| 113 |
+
|
| 114 |
+
import cv2
|
| 115 |
+
import numpy as np
|
| 116 |
+
from matplotlib import pyplot as plt
|
| 117 |
+
|
| 118 |
+
"""At this point ,we should have [image] and [depth_map] available"""
|
| 119 |
+
|
| 120 |
+
import numpy as np
|
| 121 |
+
import cv2
|
| 122 |
+
|
| 123 |
+
def separate_foreground_background(image, depth_map, *,
|
| 124 |
+
assume_bgr_input=True,
|
| 125 |
+
near_is_foreground=True,
|
| 126 |
+
foreground_depth_is_high=True):
|
| 127 |
+
"""
|
| 128 |
+
Params
|
| 129 |
+
------
|
| 130 |
+
assume_bgr_input: Whether the input image is in OpenCV's typical BGR format (True converts to RGB; set to False for RGB input)
|
| 131 |
+
near_is_foreground: Whether near objects are the foreground (True is common for "face/subject is closer")
|
| 132 |
+
foreground_depth_is_high: Whether the foreground's "depth value" is higher (In your map: foreground is brighter -> higher value -> True)
|
| 133 |
+
"""
|
| 134 |
+
|
| 135 |
+
# ---- 1) Unify formats ----
|
| 136 |
+
if not isinstance(image, np.ndarray):
|
| 137 |
+
image = np.array(image)
|
| 138 |
+
if not isinstance(depth_map, np.ndarray):
|
| 139 |
+
depth_map = np.array(depth_map)
|
| 140 |
+
|
| 141 |
+
# Only convert to RGB if explicitly BGR (avoids unnecessary BGR<->RGB round-trips causing color shifts)
|
| 142 |
+
if assume_bgr_input and image.ndim == 3 and image.shape[2] == 3:
|
| 143 |
+
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
| 144 |
+
|
| 145 |
+
if image.ndim == 2: # Convert grayscale to 3 channels as well
|
| 146 |
+
image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
|
| 147 |
+
if depth_map.ndim == 3:
|
| 148 |
+
depth_map = depth_map[:, :, 0]
|
| 149 |
+
|
| 150 |
+
# ---- 2) Depth -> Binary Mask (Foreground=1) ----
|
| 151 |
+
depth_norm = cv2.normalize(depth_map, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
|
| 152 |
+
depth_smooth = cv2.GaussianBlur(depth_norm, (5, 5), 0)
|
| 153 |
+
|
| 154 |
+
# Key correction:
|
| 155 |
+
# In your example: near=foreground AND foreground depth value is higher (brighter),
|
| 156 |
+
# Therefore, THRESH_BINARY should be used (takes high values as 1), otherwise FG/BG will be inverted.
|
| 157 |
+
if near_is_foreground and foreground_depth_is_high:
|
| 158 |
+
thresh_flag = cv2.THRESH_BINARY
|
| 159 |
+
elif near_is_foreground and not foreground_depth_is_high:
|
| 160 |
+
thresh_flag = cv2.THRESH_BINARY_INV
|
| 161 |
+
elif (not near_is_foreground) and foreground_depth_is_high:
|
| 162 |
+
thresh_flag = cv2.THRESH_BINARY_INV
|
| 163 |
+
else: # not near_is_foreground and not foreground_depth_is_high
|
| 164 |
+
thresh_flag = cv2.THRESH_BINARY
|
| 165 |
+
|
| 166 |
+
_, binary_mask = cv2.threshold(depth_smooth, 0, 255, thresh_flag + cv2.THRESH_OTSU)
|
| 167 |
+
|
| 168 |
+
# Cleanup and find largest connected component
|
| 169 |
+
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
|
| 170 |
+
mask_clean = cv2.morphologyEx(binary_mask, cv2.MORPH_OPEN, kernel, iterations=1)
|
| 171 |
+
mask_clean = cv2.morphologyEx(mask_clean, cv2.MORPH_CLOSE, kernel, iterations=2)
|
| 172 |
+
|
| 173 |
+
num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(mask_clean, 8)
|
| 174 |
+
if num_labels > 1:
|
| 175 |
+
largest_label = 1 + np.argmax(stats[1:, cv2.CC_STAT_AREA])
|
| 176 |
+
mask_clean = (labels == largest_label).astype(np.uint8) * 255
|
| 177 |
+
|
| 178 |
+
# Soften edges
|
| 179 |
+
mask_soft = cv2.GaussianBlur(mask_clean, (9, 9), 5).astype(np.float32) / 255.0
|
| 180 |
+
|
| 181 |
+
# ---- 3) Composite (Foreground=img*mask, Background=img*(1-mask)) ----
|
| 182 |
+
img_f = image.astype(np.float32) / 255.0
|
| 183 |
+
mask_3 = np.dstack([mask_soft]*3)
|
| 184 |
+
|
| 185 |
+
foreground = np.clip(img_f * mask_3, 0, 1)
|
| 186 |
+
background = np.clip(img_f * (1.0 - mask_3), 0, 1)
|
| 187 |
+
|
| 188 |
+
foreground = (foreground * 255.0).astype(np.uint8)
|
| 189 |
+
background = (background * 255.0).astype(np.uint8)
|
| 190 |
+
|
| 191 |
+
return foreground, background, mask_clean, mask_soft
|
| 192 |
+
|
| 193 |
+
from PIL import Image as PILImage
|
| 194 |
+
|
| 195 |
+
def visualize_results(image, depth_map, foreground, background, mask, mask_soft):
|
| 196 |
+
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
|
| 197 |
+
|
| 198 |
+
axes[0, 0].imshow(image)
|
| 199 |
+
axes[0, 0].set_title('Original Image'); axes[0, 0].axis('off')
|
| 200 |
+
|
| 201 |
+
axes[0, 1].imshow(depth_map, cmap='plasma')
|
| 202 |
+
axes[0, 1].set_title('Depth Map'); axes[0, 1].axis('off')
|
| 203 |
+
|
| 204 |
+
axes[0, 2].imshow(mask, cmap='gray')
|
| 205 |
+
axes[0, 2].set_title('Binary Mask (Cleaned)'); axes[0, 2].axis('off')
|
| 206 |
+
|
| 207 |
+
axes[1, 0].imshow(mask_soft, cmap='gray')
|
| 208 |
+
axes[1, 0].set_title('Soft Mask (Blurred)'); axes[1, 0].axis('off')
|
| 209 |
+
|
| 210 |
+
axes[1, 1].imshow(foreground)
|
| 211 |
+
axes[1, 1].set_title('Foreground'); axes[1, 1].axis('off')
|
| 212 |
+
|
| 213 |
+
axes[1, 2].imshow(background)
|
| 214 |
+
axes[1, 2].set_title('Background'); axes[1, 2].axis('off')
|
| 215 |
+
|
| 216 |
+
plt.tight_layout()
|
| 217 |
+
plt.show()
|
| 218 |
+
|
| 219 |
+
foreground, background, mask_hard, mask_soft = separate_foreground_background(image, depth_map, assume_bgr_input=False,
|
| 220 |
+
near_is_foreground=True,
|
| 221 |
+
foreground_depth_is_high=True)
|
| 222 |
+
|
| 223 |
+
if not isinstance(image, np.ndarray):
|
| 224 |
+
image = np.array(image)
|
| 225 |
+
|
| 226 |
+
visualize_results(image, depth_map, foreground, background, mask_hard, mask_soft)
|
| 227 |
+
|
| 228 |
+
"""# Part 3: Intelligent Background Reconstruction"""
|
| 229 |
+
|
| 230 |
+
kernel = np.ones((7,7), np.uint8)
|
| 231 |
+
mask_dilated = cv2.dilate(mask_hard, kernel, iterations=1)
|
| 232 |
+
|
| 233 |
+
# 1️⃣ Inpaint
|
| 234 |
+
bg_inpainted = cv2.inpaint(background, mask_dilated, inpaintRadius=6, flags=cv2.INPAINT_TELEA)
|
| 235 |
+
|
| 236 |
+
# 2️⃣ Smooth
|
| 237 |
+
bg_smooth = cv2.bilateralFilter(bg_inpainted, d=9, sigmaColor=75, sigmaSpace=75)
|
| 238 |
+
|
| 239 |
+
# 3️⃣ Optional: Only replace in the mask region
|
| 240 |
+
final_bg = np.where(mask_dilated[..., None] == 255, bg_smooth, background)
|
| 241 |
+
|
| 242 |
+
# -- New: Use the hard mask to create an eroded+feathered alpha with no black halo (recommended to pass this)
|
| 243 |
+
k3 = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3)) # Approximately erode 2px
|
| 244 |
+
mask_erode = cv2.erode(mask_hard, k3, iterations=1)
|
| 245 |
+
dist = cv2.distanceTransform(mask_erode, cv2.DIST_L2, 5)
|
| 246 |
+
alpha_no_halo = dist / 6 # feather≈12, adjustable 8~14
|
| 247 |
+
alpha_no_halo = np.clip(alpha_no_halo, 0, 1).astype(np.float32)
|
| 248 |
+
alpha_no_halo[mask_erode == 0] = 0.0
|
| 249 |
+
alpha_no_halo = alpha_no_halo[..., None] # HxWx1
|
| 250 |
+
original_rgb = image
|
| 251 |
+
|
| 252 |
+
# 4️⃣ Display
|
| 253 |
+
plt.figure(figsize=(10,5))
|
| 254 |
+
plt.subplot(1,2,1); plt.title("Original Background with Hole"); plt.imshow(background); plt.axis('off')
|
| 255 |
+
plt.subplot(1,2,2); plt.title("Clean Background (Inpainted)"); plt.imshow(final_bg); plt.axis('off')
|
| 256 |
+
plt.show()
|
| 257 |
+
|
| 258 |
+
|
| 259 |
+
import matplotlib.pyplot as plt
|
| 260 |
+
import numpy as np
|
| 261 |
+
|
| 262 |
+
# Check if the alpha mask is correct
|
| 263 |
+
plt.figure(figsize=(20, 5))
|
| 264 |
+
|
| 265 |
+
plt.subplot(151)
|
| 266 |
+
plt.imshow(image)
|
| 267 |
+
plt.title('Original Image')
|
| 268 |
+
plt.axis('off')
|
| 269 |
+
|
| 270 |
+
plt.subplot(152)
|
| 271 |
+
plt.imshow(alpha_no_halo.squeeze(), cmap='gray')
|
| 272 |
+
plt.title('Alpha Mask\n(Should cover full person)')
|
| 273 |
+
plt.axis('off')
|
| 274 |
+
|
| 275 |
+
plt.subplot(153)
|
| 276 |
+
plt.imshow(final_bg)
|
| 277 |
+
plt.title('Clean Background')
|
| 278 |
+
plt.axis('off')
|
| 279 |
+
|
| 280 |
+
# Test composite
|
| 281 |
+
fg_float = image.astype(np.float32) / 255.0
|
| 282 |
+
bg_float = final_bg.astype(np.float32) / 255.0
|
| 283 |
+
test_composite = fg_float * alpha_no_halo + bg_float * (1.0 - alpha_no_halo)
|
| 284 |
+
test_composite = (np.clip(test_composite, 0, 1) * 255).astype(np.uint8)
|
| 285 |
+
|
| 286 |
+
plt.subplot(154)
|
| 287 |
+
plt.imshow(test_composite)
|
| 288 |
+
plt.title('Test Composite\n(Does BG eat face?)')
|
| 289 |
+
plt.axis('off')
|
| 290 |
+
|
| 291 |
+
# Highlight areas where alpha < 0.5 (potential lost person areas)
|
| 292 |
+
alpha_highlight = image.copy()
|
| 293 |
+
low_alpha_mask = alpha_no_halo.squeeze() < 0.5
|
| 294 |
+
alpha_highlight[low_alpha_mask] = [255, 0, 0] # Mark with red
|
| 295 |
+
|
| 296 |
+
plt.subplot(155)
|
| 297 |
+
plt.imshow(alpha_highlight)
|
| 298 |
+
plt.title('Red = Alpha < 0.5\n(Lost person areas)')
|
| 299 |
+
plt.axis('off')
|
| 300 |
+
|
| 301 |
+
plt.tight_layout()
|
| 302 |
+
plt.show()
|
| 303 |
+
|
| 304 |
+
# Print diagnostic info
|
| 305 |
+
print("Alpha Mask Statistics:")
|
| 306 |
+
print(f" Min: {alpha_no_halo.min():.3f}")
|
| 307 |
+
print(f" Max: {alpha_no_halo.max():.3f}")
|
| 308 |
+
print(f" Shape: {alpha_no_halo.shape}")
|
| 309 |
+
print(f" Pixels with alpha > 0.9: {(alpha_no_halo > 0.9).sum()}")
|
| 310 |
+
print(f" Pixels with alpha < 0.5: {(alpha_no_halo < 0.5).sum()}")
|
| 311 |
+
|
| 312 |
+
"""# Part 4: Depth-Aware Motion Synthesis"""
|
| 313 |
+
|
| 314 |
+
import numpy as np
|
| 315 |
+
import cv2
|
| 316 |
+
|
| 317 |
+
def create_motion_frames(
|
| 318 |
+
image_original,
|
| 319 |
+
background_clean,
|
| 320 |
+
alpha_mask,
|
| 321 |
+
n_frames=30,
|
| 322 |
+
fg_shift=12,
|
| 323 |
+
bg_shift=4,
|
| 324 |
+
direction='right',
|
| 325 |
+
scale_effect=0.015
|
| 326 |
+
):
|
| 327 |
+
"""
|
| 328 |
+
Part 4: Generates motion frames for a parallax effect.
|
| 329 |
+
This function *only* handles motion and compositing.
|
| 330 |
+
Depth-of-field (blur) will be added later.
|
| 331 |
+
"""
|
| 332 |
+
h, w = image_original.shape[:2]
|
| 333 |
+
|
| 334 |
+
# --- 1. Set direction ---
|
| 335 |
+
direction_map = {
|
| 336 |
+
'right': (1, 0),
|
| 337 |
+
'left': (-1, 0),
|
| 338 |
+
'up': (0, -1),
|
| 339 |
+
'down': (0, 1)
|
| 340 |
+
}
|
| 341 |
+
dx, dy = direction_map.get(direction, (1, 0))
|
| 342 |
+
|
| 343 |
+
# --- 2. Prepare Alpha Mask ---
|
| 344 |
+
# Ensure alpha_mask is 3-channel, 0-1 float
|
| 345 |
+
if alpha_mask.ndim == 2:
|
| 346 |
+
alpha_mask = alpha_mask[..., None]
|
| 347 |
+
if alpha_mask.shape[2] == 1:
|
| 348 |
+
alpha_3ch = np.repeat(alpha_mask, 3, axis=2)
|
| 349 |
+
else:
|
| 350 |
+
alpha_3ch = alpha_mask
|
| 351 |
+
|
| 352 |
+
alpha_3ch = np.clip(alpha_3ch.astype(np.float32) / 255.0, 0, 1) if alpha_3ch.max() > 1 else np.clip(alpha_3ch.astype(np.float32), 0, 1)
|
| 353 |
+
|
| 354 |
+
|
| 355 |
+
frames = []
|
| 356 |
+
print(f"Generating {n_frames} motion frames (Part 4)...")
|
| 357 |
+
|
| 358 |
+
# --- 3. Loop to generate each frame ---
|
| 359 |
+
for i in range(n_frames):
|
| 360 |
+
# Use a sine wave to create smooth back-and-forth motion (0 -> 1 -> 0 -> -1 -> 0)
|
| 361 |
+
# This is very natural for a looping GIF
|
| 362 |
+
phase = (i / n_frames) * 2 * np.pi
|
| 363 |
+
ease = np.sin(phase)
|
| 364 |
+
|
| 365 |
+
# --- 4. Calculate motion and scale ---
|
| 366 |
+
# Scale effect (1.0 -> 1.015 -> 1.0)
|
| 367 |
+
scale = 1.0 + abs(ease) * scale_effect
|
| 368 |
+
|
| 369 |
+
# Foreground and background displacement
|
| 370 |
+
fg_x = dx * ease * fg_shift
|
| 371 |
+
fg_y = dy * ease * fg_shift
|
| 372 |
+
bg_x = dx * ease * bg_shift
|
| 373 |
+
bg_y = dy * ease * bg_shift
|
| 374 |
+
|
| 375 |
+
# --- 5. Create transformation matrices ---
|
| 376 |
+
M_fg_trans = np.float32([[1, 0, fg_x], [0, 1, fg_y]])
|
| 377 |
+
M_bg_trans = np.float32([[1, 0, bg_x], [0, 1, bg_y]])
|
| 378 |
+
|
| 379 |
+
center = (w / 2, h / 2)
|
| 380 |
+
M_scale = cv2.getRotationMatrix2D(center, 0, scale)
|
| 381 |
+
|
| 382 |
+
# --- 6. Apply transformations (motion + scale) ---
|
| 383 |
+
|
| 384 |
+
# --- Transform foreground ---
|
| 385 |
+
fg_moved = cv2.warpAffine(
|
| 386 |
+
image_original, M_fg_trans, (w, h),
|
| 387 |
+
flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT_101
|
| 388 |
+
)
|
| 389 |
+
fg_moved = cv2.warpAffine(
|
| 390 |
+
fg_moved, M_scale, (w, h),
|
| 391 |
+
flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT_101
|
| 392 |
+
)
|
| 393 |
+
|
| 394 |
+
# --- Transform background ---
|
| 395 |
+
bg_moved = cv2.warpAffine(
|
| 396 |
+
background_clean, M_bg_trans, (w, h),
|
| 397 |
+
flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REPLICATE
|
| 398 |
+
)
|
| 399 |
+
bg_moved = cv2.warpAffine(
|
| 400 |
+
bg_moved, M_scale, (w, h),
|
| 401 |
+
flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REPLICATE
|
| 402 |
+
)
|
| 403 |
+
|
| 404 |
+
# --- Transform Alpha Mask ---
|
| 405 |
+
# (Must use the exact same transform as the foreground)
|
| 406 |
+
alpha_moved = cv2.warpAffine(
|
| 407 |
+
alpha_3ch, M_fg_trans, (w, h),
|
| 408 |
+
flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=0
|
| 409 |
+
)
|
| 410 |
+
alpha_moved = cv2.warpAffine(
|
| 411 |
+
alpha_moved, M_scale, (w, h),
|
| 412 |
+
flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=0
|
| 413 |
+
)
|
| 414 |
+
alpha_moved = np.clip(alpha_moved, 0, 1)
|
| 415 |
+
|
| 416 |
+
# --- 7. Composite layers (no blur) ---
|
| 417 |
+
# This is the final composite for Part 4
|
| 418 |
+
fg_masked = cv2.multiply(fg_moved.astype(np.float32), alpha_moved)
|
| 419 |
+
bg_masked = cv2.multiply(bg_moved.astype(np.float32), 1.0 - alpha_moved)
|
| 420 |
+
|
| 421 |
+
composite = cv2.add(fg_masked, bg_masked)
|
| 422 |
+
frame = np.clip(composite, 0, 255).astype(np.uint8)
|
| 423 |
+
|
| 424 |
+
frames.append(frame)
|
| 425 |
+
|
| 426 |
+
print("Part 4: Motion frames generated.")
|
| 427 |
+
return frames
|
| 428 |
+
|
| 429 |
+
motion_frames = create_motion_frames(
|
| 430 |
+
image_original=image,
|
| 431 |
+
background_clean=final_bg,
|
| 432 |
+
alpha_mask=alpha_no_halo,
|
| 433 |
+
n_frames=30,
|
| 434 |
+
fg_shift=12,
|
| 435 |
+
bg_shift=4,
|
| 436 |
+
direction='right',
|
| 437 |
+
scale_effect=0.00
|
| 438 |
+
)
|
| 439 |
+
|
| 440 |
+
# Save a "no blur" GIF to preview the motion
|
| 441 |
+
import imageio.v2 as imageio
|
| 442 |
+
# Note: output_filename is not defined here, this line will error if run as-is.
|
| 443 |
+
# Assuming a placeholder filename for the notebook's logic.
|
| 444 |
+
output_filename = 'parallax_part4_only.gif'
|
| 445 |
+
imageio.mimsave(output_filename, motion_frames, duration=0.04, loop=0, optimize=True, palettesize=192)
|
| 446 |
+
print("Part 4 preview GIF saved!")
|
| 447 |
+
|
| 448 |
+
"""# Part 5: Depth-of-Field & Bokeh Effects"""
|
| 449 |
+
|
| 450 |
+
import numpy as np
|
| 451 |
+
import cv2
|
| 452 |
+
import imageio.v2 as imageio
|
| 453 |
+
|
| 454 |
+
def create_multi_layer_animation(
|
| 455 |
+
image_original,
|
| 456 |
+
background_clean,
|
| 457 |
+
alpha_mask, # This is your 'alpha_no_halo'
|
| 458 |
+
depth_map, # This is your 'depth_map' (needs to be single-channel, 0-1 float)
|
| 459 |
+
n_frames=60,
|
| 460 |
+
|
| 461 |
+
# --- 1. Multi-layer motion settings ---
|
| 462 |
+
fg_shift=12, # Foreground (person) moves 12px
|
| 463 |
+
mid_shift=6, # Mid-ground (near background) moves 6px
|
| 464 |
+
far_shift=2, # Far-ground (far background) moves 2px
|
| 465 |
+
|
| 466 |
+
# --- 2. Dynamic zoom settings ---
|
| 467 |
+
zoom_center=1.10,
|
| 468 |
+
zoom_peak=1.05,
|
| 469 |
+
|
| 470 |
+
# --- 3. Dynamic Depth-of-Field settings ---
|
| 471 |
+
mid_blur_ksize=(9, 9), # Mid-ground blur (f/5.6)
|
| 472 |
+
far_blur_ksize=(35, 35), # Far-ground blur (f/1.4)
|
| 473 |
+
|
| 474 |
+
direction='right'
|
| 475 |
+
):
|
| 476 |
+
"""
|
| 477 |
+
Final version: Integrates multi-layer parallax (3 layers), dynamic DoF, and dynamic zoom
|
| 478 |
+
"""
|
| 479 |
+
print("--- Start generating advanced multi-layer animation ---")
|
| 480 |
+
|
| 481 |
+
h, w = image_original.shape[:2]
|
| 482 |
+
|
| 483 |
+
# --- 1. Prepare motion ---
|
| 484 |
+
direction_map = {'right': (1, 0), 'left': (-1, 0), 'up': (0, -1), 'down': (0, 1)}
|
| 485 |
+
dx, dy = direction_map.get(direction, (1, 0))
|
| 486 |
+
|
| 487 |
+
# --- 2. Prepare base masks (Foreground vs. Background) ---
|
| 488 |
+
# Ensure alpha_mask is a 0-1 float
|
| 489 |
+
if alpha_mask.max() > 1:
|
| 490 |
+
alpha_mask = alpha_mask.astype(np.float32) / 255.0
|
| 491 |
+
if alpha_mask.ndim == 2:
|
| 492 |
+
alpha_mask = alpha_mask[..., None]
|
| 493 |
+
|
| 494 |
+
# Create foreground mask (fg_mask) and background mask (bg_mask)
|
| 495 |
+
fg_mask_3ch = np.repeat(alpha_mask, 3, axis=2)
|
| 496 |
+
bg_mask_3ch = 1.0 - fg_mask_3ch
|
| 497 |
+
|
| 498 |
+
# --- 3. Create mid/far-ground masks (outside loop) ---
|
| 499 |
+
print("...Analyzing depth map and creating layers...")
|
| 500 |
+
|
| 501 |
+
# Ensure depth_map is single-channel
|
| 502 |
+
if depth_map.ndim == 3:
|
| 503 |
+
depth_map = cv2.cvtColor(depth_map, cv2.COLOR_BGR2GRAY)
|
| 504 |
+
|
| 505 |
+
# Find depth values in the background region
|
| 506 |
+
bg_depth_values = depth_map[alpha_mask[..., 0] < 0.5]
|
| 507 |
+
|
| 508 |
+
# Find the 50th percentile (median) of background depth as the split point
|
| 509 |
+
if len(bg_depth_values) > 0:
|
| 510 |
+
bg_split_threshold = np.percentile(bg_depth_values, 50)
|
| 511 |
+
else:
|
| 512 |
+
bg_split_threshold = 0.5 # If no background, just use a default value
|
| 513 |
+
|
| 514 |
+
print(f" Background depth split point: {bg_split_threshold:.4f}")
|
| 515 |
+
|
| 516 |
+
# Create a raw binary mask (1 = mid-ground, 0 = far-ground)
|
| 517 |
+
# Depth value > threshold = closer = mid-ground
|
| 518 |
+
raw_mid_mask = (depth_map > bg_split_threshold).astype(np.float32)
|
| 519 |
+
|
| 520 |
+
# Blur this mask for a smooth transition between mid and far grounds
|
| 521 |
+
# Note: this mask currently includes the person/foreground area
|
| 522 |
+
raw_mid_mask_smooth = cv2.GaussianBlur(raw_mid_mask, (21, 21), 0)
|
| 523 |
+
if raw_mid_mask_smooth.ndim == 2:
|
| 524 |
+
raw_mid_mask_smooth = raw_mid_mask_smooth[..., None]
|
| 525 |
+
|
| 526 |
+
# Expand to 3 channels
|
| 527 |
+
raw_mid_mask_smooth_3ch = np.repeat(raw_mid_mask_smooth, 3, axis=2)
|
| 528 |
+
|
| 529 |
+
# --- 4. Generate the final 3 mutually exclusive masks ---
|
| 530 |
+
# Mid-ground mask = (smooth mid mask) * (background mask)
|
| 531 |
+
# This "cuts out" the person, leaving only the mid-ground part of the background
|
| 532 |
+
mid_mask_3ch = raw_mid_mask_smooth_3ch * bg_mask_3ch
|
| 533 |
+
|
| 534 |
+
# Far-ground mask = (1.0 - smooth mid mask) * (background mask)
|
| 535 |
+
# This "cuts out" the person, leaving only the far-ground part of the background
|
| 536 |
+
far_mask_3ch = (1.0 - raw_mid_mask_smooth_3ch) * bg_mask_3ch
|
| 537 |
+
|
| 538 |
+
# fg_mask_3ch (foreground) + mid_mask_3ch (mid-ground) + far_mask_3ch (far-ground)
|
| 539 |
+
# These three masks now sum to 1.0 (the whole image) and are mutually exclusive.
|
| 540 |
+
|
| 541 |
+
print("...Layers created. Starting frame generation...")
|
| 542 |
+
|
| 543 |
+
frames = []
|
| 544 |
+
|
| 545 |
+
# --- 5. Loop to generate each frame ---
|
| 546 |
+
for i in range(n_frames):
|
| 547 |
+
|
| 548 |
+
# --- 5a. Calculate motion and scale ---
|
| 549 |
+
phase = (i / n_frames) * 2 * np.pi
|
| 550 |
+
ease = np.sin(phase)
|
| 551 |
+
|
| 552 |
+
# Dynamic zoom
|
| 553 |
+
zoom_range = zoom_center - zoom_peak
|
| 554 |
+
scale = zoom_center - (zoom_range * abs(ease))
|
| 555 |
+
center = (w / 2, h / 2)
|
| 556 |
+
M_scale = cv2.getRotationMatrix2D(center, 0, scale)
|
| 557 |
+
|
| 558 |
+
# --- 5b. Create 3 different transformation matrices ---
|
| 559 |
+
M_fg_trans = np.float32([[1, 0, dx*ease*fg_shift], [0, 1, dy*ease*fg_shift]])
|
| 560 |
+
M_mid_trans = np.float32([[1, 0, dx*ease*mid_shift], [0, 1, dy*ease*mid_shift]])
|
| 561 |
+
M_far_trans = np.float32([[1, 0, dx*ease*far_shift], [0, 1, dy*ease*far_shift]])
|
| 562 |
+
|
| 563 |
+
# --- 5c. Transform and blur layers ---
|
| 564 |
+
|
| 565 |
+
# --- Foreground (sharp) ---
|
| 566 |
+
fg_warped = cv2.warpAffine(image_original, M_fg_trans, (w,h), borderMode=cv2.BORDER_REFLECT_101)
|
| 567 |
+
fg_final = cv2.warpAffine(fg_warped, M_scale, (w,h), borderMode=cv2.BORDER_REFLECT_101).astype(np.float32)
|
| 568 |
+
|
| 569 |
+
# --- Mid-ground (slight blur) ---
|
| 570 |
+
mid_warped = cv2.warpAffine(background_clean, M_mid_trans, (w,h), borderMode=cv2.BORDER_REPLICATE)
|
| 571 |
+
mid_warped_scaled = cv2.warpAffine(mid_warped, M_scale, (w,h), borderMode=cv2.BORDER_REPLICATE)
|
| 572 |
+
mid_final = cv2.GaussianBlur(mid_warped_scaled, mid_blur_ksize, 0).astype(np.float32)
|
| 573 |
+
|
| 574 |
+
# --- Far-ground (heavy blur) ---
|
| 575 |
+
far_warped = cv2.warpAffine(background_clean, M_far_trans, (w,h), borderMode=cv2.BORDER_REPLICATE)
|
| 576 |
+
far_warped_scaled = cv2.warpAffine(far_warped, M_scale, (w,h), borderMode=cv2.BORDER_REPLICATE)
|
| 577 |
+
far_final = cv2.GaussianBlur(far_warped_scaled, far_blur_ksize, 0).astype(np.float32)
|
| 578 |
+
|
| 579 |
+
# --- 5d. Transform the 3 masks ---
|
| 580 |
+
# Masks must be transformed along with their corresponding layers!
|
| 581 |
+
fg_mask_warped = cv2.warpAffine(fg_mask_3ch, M_fg_trans, (w,h))
|
| 582 |
+
fg_mask_warped = cv2.warpAffine(fg_mask_warped, M_scale, (w,h))
|
| 583 |
+
|
| 584 |
+
mid_mask_warped = cv2.warpAffine(mid_mask_3ch, M_mid_trans, (w,h))
|
| 585 |
+
mid_mask_warped = cv2.warpAffine(mid_mask_warped, M_scale, (w,h))
|
| 586 |
+
|
| 587 |
+
far_mask_warped = cv2.warpAffine(far_mask_3ch, M_far_trans, (w,h))
|
| 588 |
+
far_mask_warped = cv2.warpAffine(far_mask_warped, M_scale, (w,h))
|
| 589 |
+
|
| 590 |
+
# --- 5e. Final composite ---
|
| 591 |
+
# Re-normalize the masks to prevent black borders or tiny gaps after warp
|
| 592 |
+
total_mask = fg_mask_warped + mid_mask_warped + far_mask_warped + 1e-6 # Avoid division by zero
|
| 593 |
+
fg_mask_warped /= total_mask
|
| 594 |
+
mid_mask_warped /= total_mask
|
| 595 |
+
far_mask_warped /= total_mask
|
| 596 |
+
|
| 597 |
+
# Add the three layers, weighted by their masks
|
| 598 |
+
composite = (fg_final * fg_mask_warped) + \
|
| 599 |
+
(mid_final * mid_mask_warped) + \
|
| 600 |
+
(far_final * far_mask_warped)
|
| 601 |
+
|
| 602 |
+
frame = np.clip(composite, 0, 255).astype(np.uint8)
|
| 603 |
+
frames.append(frame)
|
| 604 |
+
|
| 605 |
+
if (i + 1) % 10 == 0:
|
| 606 |
+
print(f" ...Frame {i+1}/{n_frames} complete")
|
| 607 |
+
|
| 608 |
+
print(f"Advanced animation generation complete. Created {n_frames} frames.")
|
| 609 |
+
return frames
|
| 610 |
+
|
| 611 |
+
if depth_map.max() > 1:
|
| 612 |
+
depth_map_0_1 = depth_map.astype(np.float32) / 255.0
|
| 613 |
+
else:
|
| 614 |
+
depth_map_0_1 = depth_map.astype(np.float32)
|
| 615 |
+
|
| 616 |
+
|
| 617 |
+
# Call the new multi-layer function
|
| 618 |
+
multi_layer_frames = create_multi_layer_animation(
|
| 619 |
+
image_original=image,
|
| 620 |
+
background_clean=final_bg,
|
| 621 |
+
alpha_mask=alpha_no_halo, # Your foreground mask
|
| 622 |
+
depth_map=depth_map_0_1, # Your 0-1 depth map
|
| 623 |
+
n_frames=60,
|
| 624 |
+
fg_shift=12,
|
| 625 |
+
mid_shift=6,
|
| 626 |
+
far_shift=2,
|
| 627 |
+
zoom_center=1.10,
|
| 628 |
+
zoom_peak=1.05
|
| 629 |
+
)
|
| 630 |
+
|
| 631 |
+
# Save the final GIF
|
| 632 |
+
imageio.mimsave('parallax_final_multi_layer.gif', multi_layer_frames, duration=0.04, loop=0)
|
| 633 |
+
print("Final multi-layer parallax animation saved!")
|
requirements.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
transformers
|
| 2 |
+
torch
|
| 3 |
+
torchvision
|
| 4 |
+
pillow
|
| 5 |
+
matplotlib
|
| 6 |
+
requests
|
| 7 |
+
opencv-python
|
| 8 |
+
imageio
|
| 9 |
+
tqdm
|
| 10 |
+
gradio
|