VanKee commited on
Commit
212b9f6
·
1 Parent(s): 3f3d1c4

upload gradio

Browse files
Files changed (4) hide show
  1. .gitignore +1 -0
  2. app.py +390 -0
  3. cs5330_hw4.py +633 -0
  4. requirements.txt +10 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .DS_Store
app.py ADDED
@@ -0,0 +1,390 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ CS5330-HW4: Parallax Effect Gradio App
4
+ Converted from Colab notebook.
5
+ (V4: Final fix for halo/border artifact. Uses correct mask.)
6
+ """
7
+
8
+ import torch
9
+ import numpy as np
10
+ import matplotlib.pyplot as plt
11
+ from PIL import Image
12
+ from transformers import DPTImageProcessor, DPTForDepthEstimation
13
+ import cv2
14
+ import imageio.v2 as imageio
15
+ import gradio as gr
16
+ import time # To create unique filenames
17
+
18
+ # ==================================================================
19
+ # Global Transformer Setup
20
+ # ==================================================================
21
+ print("Loading Intel DPT depth estimation model...")
22
+ processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
23
+ model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
24
+ model.eval()
25
+
26
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
27
+ model = model.to(device)
28
+ print(f"Model loaded on {device}. Gradio app is ready.")
29
+
30
+ # ==================================================================
31
+ # Helper Function 1: Get Depth Map
32
+ # ==================================================================
33
+ def get_depth_map(pil_image, processor, model, device):
34
+ print("... (1/5) Extracting depth map")
35
+ inputs = processor(images=pil_image, return_tensors="pt")
36
+ inputs = {k: v.to(device) for k, v in inputs.items()}
37
+
38
+ with torch.no_grad():
39
+ outputs = model(**inputs)
40
+ predicted_depth = outputs.predicted_depth
41
+
42
+ prediction = torch.nn.functional.interpolate(
43
+ predicted_depth.unsqueeze(1),
44
+ size=pil_image.size[::-1],
45
+ mode="bicubic",
46
+ align_corners=False,
47
+ )
48
+
49
+ depth_map = prediction.squeeze().cpu().numpy()
50
+ depth_map = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
51
+ return depth_map
52
+
53
+ # ==================================================================
54
+ # Helper Function 2: Layer Separation
55
+ # ==================================================================
56
+ # This function returns mask_clean (hard) and mask_soft (soft/full-size)
57
+ def separate_foreground_background(image, depth_map, *,
58
+ assume_bgr_input=True,
59
+ near_is_foreground=True,
60
+ foreground_depth_is_high=True):
61
+ print("... (2/5) Separating layers")
62
+
63
+ if not isinstance(image, np.ndarray):
64
+ image = np.array(image)
65
+ if not isinstance(depth_map, np.ndarray):
66
+ depth_map = np.array(depth_map)
67
+
68
+ if assume_bgr_input and image.ndim == 3 and image.shape[2] == 3:
69
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
70
+
71
+ if image.ndim == 2:
72
+ image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
73
+ if depth_map.ndim == 3:
74
+ depth_map = depth_map[:, :, 0]
75
+
76
+ depth_norm = cv2.normalize(depth_map, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
77
+ depth_smooth = cv2.GaussianBlur(depth_norm, (5, 5), 0)
78
+
79
+ if near_is_foreground and foreground_depth_is_high:
80
+ thresh_flag = cv2.THRESH_BINARY
81
+ elif near_is_foreground and not foreground_depth_is_high:
82
+ thresh_flag = cv2.THRESH_BINARY_INV
83
+ elif (not near_is_foreground) and foreground_depth_is_high:
84
+ thresh_flag = cv2.THRESH_BINARY_INV
85
+ else:
86
+ thresh_flag = cv2.THRESH_BINARY
87
+
88
+ _, binary_mask = cv2.threshold(depth_smooth, 0, 255, thresh_flag + cv2.THRESH_OTSU)
89
+
90
+ kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
91
+ mask_clean = cv2.morphologyEx(binary_mask, cv2.MORPH_OPEN, kernel, iterations=1)
92
+ mask_clean = cv2.morphologyEx(mask_clean, cv2.MORPH_CLOSE, kernel, iterations=2)
93
+
94
+ num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(mask_clean, 8)
95
+ if num_labels > 1:
96
+ largest_label = 1 + np.argmax(stats[1:, cv2.CC_STAT_AREA])
97
+ mask_clean = (labels == largest_label).astype(np.uint8) * 255
98
+
99
+ # mask_soft is the full-size mask, which is key to fixing the artifact.
100
+ mask_soft = cv2.GaussianBlur(mask_clean, (9, 9), 5).astype(np.float32) / 255.0
101
+
102
+ img_f = image.astype(np.float32) / 255.0
103
+ mask_3 = np.dstack([mask_soft]*3)
104
+
105
+ foreground = np.clip(img_f * mask_3, 0, 1)
106
+ background = np.clip(img_f * (1.0 - mask_3), 0, 1)
107
+
108
+ foreground = (foreground * 255.0).astype(np.uint8)
109
+ background = (background * 255.0).astype(np.uint8)
110
+
111
+ return foreground, background, mask_clean, mask_soft
112
+
113
+ # ==================================================================
114
+ # Helper Function 3: Background Reconstruction
115
+ # ==================================================================
116
+ # This function returns final_bg (inpainted background) and alpha_no_halo (eroded mask)
117
+ # Note: We no longer use alpha_no_halo for the animation, but the function is fine.
118
+ def reconstruct_background(background, mask_hard, original_image_np):
119
+ print("... (3/5) Reconstructing background")
120
+
121
+ kernel = np.ones((7,7), np.uint8)
122
+ mask_dilated = cv2.dilate(mask_hard, kernel, iterations=1)
123
+ bg_inpainted = cv2.inpaint(background, mask_dilated, inpaintRadius=6, flags=cv2.INPAINT_TELEA)
124
+
125
+ bg_smooth = cv2.bilateralFilter(bg_inpainted, d=9, sigmaColor=75, sigmaSpace=75)
126
+ final_bg = np.where(mask_dilated[..., None] == 255, bg_smooth, background)
127
+
128
+ k3 = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3))
129
+ mask_erode = cv2.erode(mask_hard, k3, iterations=1)
130
+ dist = cv2.distanceTransform(mask_erode, cv2.DIST_L2, 5)
131
+ alpha_no_halo = dist / 6.0
132
+ alpha_no_halo = np.clip(alpha_no_halo, 0, 1).astype(np.float32)
133
+ alpha_no_halo = alpha_no_halo[..., None] # HxWx1
134
+
135
+ return final_bg, alpha_no_halo
136
+
137
+ # ==================================================================
138
+ # Helper Function 4: Animation
139
+ # ==================================================================
140
+ # This is the animation function (from V2 logic), which is correct (uses normalization to prevent gaps).
141
+ def create_multi_layer_animation(
142
+ image_original,
143
+ background_clean,
144
+ alpha_mask, # KEY: We will pass the full-size mask_soft here
145
+ depth_map,
146
+ n_frames=60,
147
+ parallax_strength=12,
148
+ blur_strength=1.0,
149
+ direction='right',
150
+ zoom_center=1.10,
151
+ zoom_peak=1.05
152
+ ):
153
+ print(f"... (4/5) Generating {n_frames} animation frames")
154
+ print(f" Params: Parallax={parallax_strength}px, Blur={blur_strength}x, Dir={direction}")
155
+
156
+ h, w = image_original.shape[:2]
157
+
158
+ # --- 1. Prepare motion and blur settings ---
159
+ direction_map = {'right': (1, 0), 'left': (-1, 0), 'up': (0, -1), 'down': (0, 1)}
160
+ dx, dy = direction_map.get(direction, (1, 0))
161
+
162
+ fg_shift = parallax_strength
163
+ mid_shift = parallax_strength * 0.5
164
+ far_shift = parallax_strength * (2 / 12)
165
+
166
+ base_mid_k = 9
167
+ base_far_k = 35
168
+ mid_k_raw = int(base_mid_k * blur_strength)
169
+ far_k_raw = int(base_far_k * blur_strength)
170
+ mid_k = (mid_k_raw + 1) if (mid_k_raw > 0 and mid_k_raw % 2 == 0) else max(1, mid_k_raw)
171
+ far_k = (far_k_raw + 1) if (far_k_raw > 0 and far_k_raw % 2 == 0) else max(1, far_k_raw)
172
+ mid_blur_ksize = (mid_k, mid_k)
173
+ far_blur_ksize = (far_k, far_k)
174
+ print(f" ...Using blur kernels: Mid={mid_blur_ksize}, Far={far_blur_ksize}")
175
+
176
+ # --- 2. Prepare base masks (FG vs BG) ---
177
+ # alpha_mask is now the full-size mask_soft
178
+ if alpha_mask.max() > 1:
179
+ alpha_mask = alpha_mask.astype(np.float32) / 255.0
180
+ if alpha_mask.ndim == 2:
181
+ alpha_mask = alpha_mask[..., None]
182
+
183
+ fg_mask_3ch = np.repeat(alpha_mask, 3, axis=2) # full-size foreground
184
+ bg_mask_3ch = 1.0 - fg_mask_3ch # full-size background "hole"
185
+
186
+ # --- 3. Create mid-ground / far-ground masks ---
187
+ if depth_map.ndim == 3:
188
+ depth_map = cv2.cvtColor(depth_map, cv2.COLOR_BGR2GRAY)
189
+
190
+ # We find depth values inside the "background hole" (bg_mask_3ch)
191
+ bg_depth_values = depth_map[alpha_mask[..., 0] < 0.5]
192
+ if len(bg_depth_values) > 0:
193
+ bg_split_threshold = np.percentile(bg_depth_values, 50)
194
+ else:
195
+ bg_split_threshold = 0.5
196
+
197
+ raw_mid_mask = (depth_map > bg_split_threshold).astype(np.float32)
198
+ raw_mid_mask_smooth = cv2.GaussianBlur(raw_mid_mask, (21, 21), 0)
199
+ if raw_mid_mask_smooth.ndim == 2:
200
+ raw_mid_mask_smooth = raw_mid_mask_smooth[..., None]
201
+
202
+ raw_mid_mask_smooth_3ch = np.repeat(raw_mid_mask_smooth, 3, axis=2)
203
+
204
+ # --- 4. Generate the final 3 mutually exclusive masks ---
205
+ # These three layers will perfectly cover the image with no gaps or overlaps.
206
+ mid_mask_3ch = raw_mid_mask_smooth_3ch * bg_mask_3ch
207
+ far_mask_3ch = (1.0 - raw_mid_mask_smooth_3ch) * bg_mask_3ch
208
+
209
+ frames = []
210
+
211
+ # --- 5. Loop to generate each frame ---
212
+ for i in range(n_frames):
213
+ phase = (i / n_frames) * 2 * np.pi
214
+ ease = np.sin(phase)
215
+
216
+ zoom_range = zoom_center - zoom_peak
217
+ scale = zoom_center - (zoom_range * abs(ease))
218
+ center = (w / 2, h / 2)
219
+ M_scale = cv2.getRotationMatrix2D(center, 0, scale)
220
+
221
+ M_fg_trans = np.float32([[1, 0, dx*ease*fg_shift], [0, 1, dy*ease*fg_shift]])
222
+ M_mid_trans = np.float32([[1, 0, dx*ease*mid_shift], [0, 1, dy*ease*mid_shift]])
223
+ M_far_trans = np.float32([[1, 0, dx*ease*far_shift], [0, 1, dy*ease*far_shift]])
224
+
225
+ # --- Layer Transforms ---
226
+ fg_warped = cv2.warpAffine(image_original, M_fg_trans, (w,h), borderMode=cv2.BORDER_REFLECT_101)
227
+ fg_final = cv2.warpAffine(fg_warped, M_scale, (w,h), borderMode=cv2.BORDER_REFLECT_101).astype(np.float32)
228
+
229
+ mid_warped = cv2.warpAffine(background_clean, M_mid_trans, (w,h), borderMode=cv2.BORDER_REPLICATE)
230
+ mid_warped_scaled = cv2.warpAffine(mid_warped, M_scale, (w,h), borderMode=cv2.BORDER_REPLICATE)
231
+ mid_final = cv2.GaussianBlur(mid_warped_scaled, mid_blur_ksize, 0).astype(np.float32)
232
+
233
+ far_warped = cv2.warpAffine(background_clean, M_far_trans, (w,h), borderMode=cv2.BORDER_REPLICATE)
234
+ far_warped_scaled = cv2.warpAffine(far_warped, M_scale, (w,h), borderMode=cv2.BORDER_REPLICATE)
235
+ far_final = cv2.GaussianBlur(far_warped_scaled, far_blur_ksize, 0).astype(np.float32)
236
+
237
+ # --- Mask Transforms ---
238
+ fg_mask_warped = cv2.warpAffine(fg_mask_3ch, M_fg_trans, (w,h))
239
+ fg_mask_warped = cv2.warpAffine(fg_mask_warped, M_scale, (w,h))
240
+
241
+ mid_mask_warped = cv2.warpAffine(mid_mask_3ch, M_mid_trans, (w,h))
242
+ mid_mask_warped = cv2.warpAffine(mid_mask_warped, M_scale, (w,h))
243
+
244
+ far_mask_warped = cv2.warpAffine(far_mask_3ch, M_far_trans, (w,h))
245
+ far_mask_warped = cv2.warpAffine(far_mask_warped, M_scale, (w,h))
246
+
247
+ # --- Final Composite (V2 normalization logic) ---
248
+ # Re-normalize masks to prevent black borders or tiny gaps after warp.
249
+ total_mask = fg_mask_warped + mid_mask_warped + far_mask_warped + 1e-6
250
+ fg_mask_warped /= total_mask
251
+ mid_mask_warped /= total_mask
252
+ far_mask_warped /= total_mask
253
+
254
+ # Add the three layers, weighted by their masks.
255
+ composite = (fg_final * fg_mask_warped) + \
256
+ (mid_final * mid_mask_warped) + \
257
+ (far_final * far_mask_warped)
258
+
259
+ frame = np.clip(composite, 0, 255).astype(np.uint8)
260
+ frames.append(frame)
261
+
262
+ print(f"... (4/5) Frame generation complete.")
263
+ return frames
264
+
265
+ # ==================================================================
266
+ # MAIN GRADIO FUNCTION (Ties everything together)
267
+ # ==================================================================
268
+
269
+ def generate_parallax_effect(input_image_np, parallax_strength, blur_strength, animation_direction):
270
+ print("\n--- Processing new image ---")
271
+
272
+ # --- 0. Image Preparation ---
273
+ image_pil = Image.fromarray(input_image_np).convert('RGB')
274
+
275
+ max_size = 640
276
+ if max(image_pil.size) > max_size:
277
+ ratio = max_size / max(image_pil.size)
278
+ new_size = tuple(int(dim * ratio) for dim in image_pil.size)
279
+ image_pil = image_pil.resize(new_size, Image.LANCZOS)
280
+
281
+ image_resized_np = np.array(image_pil)
282
+ print(f"Image resized to: {image_pil.size}")
283
+
284
+ # --- 1. Get Depth Map ---
285
+ depth_map_0_1 = get_depth_map(image_pil, processor, model, device)
286
+
287
+ # --- 2. Layer Separation ---
288
+ # We get mask_soft (full-size mask) from this function.
289
+ foreground, background, mask_hard, mask_soft = separate_foreground_background(
290
+ image_pil,
291
+ depth_map_0_1,
292
+ assume_bgr_input=False,
293
+ near_is_foreground=True,
294
+ foreground_depth_is_high=True
295
+ )
296
+
297
+ # --- 3. Background Reconstruction ---
298
+ # We get final_bg (inpainted background) from this.
299
+ # We also get alpha_no_halo, but we won't use it for the animation.
300
+ final_bg, alpha_no_halo = reconstruct_background(background, mask_hard, image_resized_np)
301
+
302
+ # --- 4. Animation ---
303
+ # *** THIS IS THE KEY FIX ***
304
+ # We use the V2-logic animation function (V4) with `mask_soft` (the full-size mask).
305
+ multi_layer_frames = create_multi_layer_animation(
306
+ image_original=image_resized_np,
307
+ background_clean=final_bg,
308
+ alpha_mask=mask_soft, # <-- KEY FIX: Pass the full-size soft mask
309
+ depth_map=depth_map_0_1,
310
+ n_frames=60,
311
+ parallax_strength=parallax_strength,
312
+ blur_strength=blur_strength,
313
+ direction=animation_direction
314
+ )
315
+
316
+ # --- 5. Save GIF and Return Path ---
317
+ print("... (5/5) Saving final GIF")
318
+ timestamp = int(time.time())
319
+ output_filename = f'parallax_final_{timestamp}.gif'
320
+
321
+ # This saves the file to the SERVER'S disk.
322
+ # It does NOT trigger a download in the user's browser.
323
+ imageio.mimsave(output_filename, multi_layer_frames, duration=0.04, loop=0)
324
+ print(f"--- Processing complete! Saved to {output_filename} ---")
325
+
326
+ # MODIFIED: Only return the GIF filepath
327
+ return output_filename
328
+
329
+ # ==================================================================
330
+ # Gradio Interface (Modified)
331
+ # ==================================================================
332
+
333
+ print("Creating Gradio interface...")
334
+
335
+ # --- 1. Define Input Components ---
336
+ input_image = gr.Image(label="1. Upload Your Image", type="numpy")
337
+
338
+ param_parallax = gr.Slider(
339
+ minimum=0,
340
+ maximum=30,
341
+ value=12,
342
+ step=1,
343
+ label="2. Parallax Strength (px)",
344
+ info="Foreground motion in pixels. Higher = stronger 3D effect."
345
+ )
346
+
347
+ param_blur = gr.Slider(
348
+ minimum=0.0,
349
+ maximum=2.0,
350
+ value=1.0,
351
+ step=0.1,
352
+ label="3. Aperture / Blur Strength",
353
+ info="Controls background blur (bokeh). 0 = no blur, 1 = default, 2 = max blur."
354
+ )
355
+
356
+ param_direction = gr.Dropdown(
357
+ choices=['right', 'left', 'up', 'down'],
358
+ value='right',
359
+ label="4. Animation Direction"
360
+ )
361
+
362
+ # --- 2. Define Output Components ---
363
+ # MODIFIED: Removed output_original
364
+ output_gif = gr.Image(label="Generated Parallax GIF")
365
+ # NOTE: The gr.Image component automatically provides a download button
366
+ # in the top-right corner when displaying an image/GIF. This
367
+ # fulfills the requirement for a "Gradio download button".
368
+
369
+
370
+ # --- 4. Create Interface ---
371
+ iface = gr.Interface(
372
+ fn=generate_parallax_effect,
373
+ inputs=[input_image, param_parallax, param_blur, param_direction],
374
+ # MODIFIED: Only one output
375
+ outputs=output_gif,
376
+ title="📸 3D Parallax Photo Animator (CS5330-HW4)",
377
+ description="""
378
+ Upload a photo (ideally with a clear foreground and background) to generate a 3D parallax and depth-of-field animation.
379
+
380
+ 1. Upload an image.
381
+ 2. Adjust the 3 parameters below.
382
+ 3. Click "Submit".
383
+
384
+ Processing may take 30-60 seconds. You can find the download button in the top-right corner of the generated GIF.
385
+ """,
386
+ # (Removed examples and cache_examples)
387
+ )
388
+
389
+ if __name__ == "__main__":
390
+ iface.launch(share=False)
cs5330_hw4.py ADDED
@@ -0,0 +1,633 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """CS5330-HW4.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1s2_5NEaW54KPPeoQy2TlhZxD_HHtF9fg
8
+
9
+ # Part 0. Transformer Setup
10
+ """
11
+
12
+ import torch
13
+ import numpy as np
14
+ import matplotlib.pyplot as plt
15
+ from PIL import Image
16
+ from transformers import DPTImageProcessor, DPTForDepthEstimation
17
+ import requests
18
+
19
+ """## Initialize Depth Model"""
20
+
21
+ # ============================================
22
+ # Initialize the Depth Model
23
+ # ============================================
24
+ print("Loading Intel DPT depth estimation model...")
25
+ processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
26
+ model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
27
+ model.eval() # Set to evaluation mode
28
+
29
+ # Use GPU if available for faster processing
30
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
31
+ model = model.to(device)
32
+ print(f"Model loaded on {device}")
33
+
34
+ """## Image Preparation"""
35
+
36
+ # ============================================
37
+ # Load and Prepare Your Image
38
+ # ============================================
39
+ # Option 1: Load from URL
40
+ image_url = "https://images.pexels.com/photos/1681010/pexels-photo-1681010.jpeg"
41
+ image = Image.open(requests.get(image_url, stream=True).raw).convert('RGB')
42
+
43
+ # Option 2: Upload from local (in Colab)
44
+ # from google.colab import files
45
+ # uploaded = files.upload()
46
+ # image = Image.open(list(uploaded.keys())[0]).convert('RGB')
47
+
48
+ # Resize for faster processing (optional but recommended)
49
+ max_size = 640
50
+ if max(image.size) > max_size:
51
+ ratio = max_size / max(image.size)
52
+ new_size = tuple(int(dim * ratio) for dim in image.size)
53
+ image = image.resize(new_size, Image.LANCZOS)
54
+
55
+ print(f"Image size: {image.size}")
56
+
57
+ """## Depth Map Extraction"""
58
+
59
+ # ============================================
60
+ # Extract Depth Map
61
+ # ============================================
62
+ # Prepare image for the model
63
+ inputs = processor(images=image, return_tensors="pt")
64
+ inputs = {k: v.to(device) for k, v in inputs.items()}
65
+
66
+ # Run depth estimation
67
+ with torch.no_grad():
68
+ outputs = model(**inputs)
69
+ predicted_depth = outputs.predicted_depth
70
+
71
+ # Interpolate to original size and normalize
72
+ prediction = torch.nn.functional.interpolate(
73
+ predicted_depth.unsqueeze(1),
74
+ size=image.size[::-1], # (height, width)
75
+ mode="bicubic",
76
+ align_corners=False,
77
+ )
78
+
79
+ # Convert to numpy and normalize to 0-1 range
80
+ depth_map = prediction.squeeze().cpu().numpy()
81
+ depth_map = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
82
+
83
+ """## Image Visualization"""
84
+
85
+ # ============================================
86
+ # Visualize Results
87
+ # ============================================
88
+ fig, axes = plt.subplots(1, 2, figsize=(12, 5))
89
+
90
+ # Original image
91
+ axes[0].imshow(image)
92
+ axes[0].set_title('Original Image')
93
+ axes[0].axis('off')
94
+
95
+ # Depth map
96
+ im = axes[1].imshow(depth_map, cmap='plasma')
97
+ axes[1].set_title('Depth Map (Yellow=Close, Purple=Far)')
98
+ axes[1].axis('off')
99
+ plt.colorbar(im, ax=axes[1], fraction=0.046)
100
+
101
+ plt.tight_layout()
102
+ plt.show()
103
+
104
+ print(f"Depth map shape: {depth_map.shape}")
105
+ print(f"Depth range: [{depth_map.min():.3f}, {depth_map.max():.3f}]")
106
+ print("Ready for processing!")
107
+ # depth_map is now a normalized numpy array where:
108
+ # - Values close to 1.0 = near to camera (yellow in visualization)
109
+ # - Values close to 0.0 = far from camera (purple in visualization)
110
+ # Use this depth_map for all subsequent processing!
111
+
112
+ """# Part 1. Depth-Guided Layer Separation"""
113
+
114
+ import cv2
115
+ import numpy as np
116
+ from matplotlib import pyplot as plt
117
+
118
+ """At this point ,we should have [image] and [depth_map] available"""
119
+
120
+ import numpy as np
121
+ import cv2
122
+
123
+ def separate_foreground_background(image, depth_map, *,
124
+ assume_bgr_input=True,
125
+ near_is_foreground=True,
126
+ foreground_depth_is_high=True):
127
+ """
128
+ Params
129
+ ------
130
+ assume_bgr_input: Whether the input image is in OpenCV's typical BGR format (True converts to RGB; set to False for RGB input)
131
+ near_is_foreground: Whether near objects are the foreground (True is common for "face/subject is closer")
132
+ foreground_depth_is_high: Whether the foreground's "depth value" is higher (In your map: foreground is brighter -> higher value -> True)
133
+ """
134
+
135
+ # ---- 1) Unify formats ----
136
+ if not isinstance(image, np.ndarray):
137
+ image = np.array(image)
138
+ if not isinstance(depth_map, np.ndarray):
139
+ depth_map = np.array(depth_map)
140
+
141
+ # Only convert to RGB if explicitly BGR (avoids unnecessary BGR<->RGB round-trips causing color shifts)
142
+ if assume_bgr_input and image.ndim == 3 and image.shape[2] == 3:
143
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
144
+
145
+ if image.ndim == 2: # Convert grayscale to 3 channels as well
146
+ image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
147
+ if depth_map.ndim == 3:
148
+ depth_map = depth_map[:, :, 0]
149
+
150
+ # ---- 2) Depth -> Binary Mask (Foreground=1) ----
151
+ depth_norm = cv2.normalize(depth_map, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
152
+ depth_smooth = cv2.GaussianBlur(depth_norm, (5, 5), 0)
153
+
154
+ # Key correction:
155
+ # In your example: near=foreground AND foreground depth value is higher (brighter),
156
+ # Therefore, THRESH_BINARY should be used (takes high values as 1), otherwise FG/BG will be inverted.
157
+ if near_is_foreground and foreground_depth_is_high:
158
+ thresh_flag = cv2.THRESH_BINARY
159
+ elif near_is_foreground and not foreground_depth_is_high:
160
+ thresh_flag = cv2.THRESH_BINARY_INV
161
+ elif (not near_is_foreground) and foreground_depth_is_high:
162
+ thresh_flag = cv2.THRESH_BINARY_INV
163
+ else: # not near_is_foreground and not foreground_depth_is_high
164
+ thresh_flag = cv2.THRESH_BINARY
165
+
166
+ _, binary_mask = cv2.threshold(depth_smooth, 0, 255, thresh_flag + cv2.THRESH_OTSU)
167
+
168
+ # Cleanup and find largest connected component
169
+ kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
170
+ mask_clean = cv2.morphologyEx(binary_mask, cv2.MORPH_OPEN, kernel, iterations=1)
171
+ mask_clean = cv2.morphologyEx(mask_clean, cv2.MORPH_CLOSE, kernel, iterations=2)
172
+
173
+ num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(mask_clean, 8)
174
+ if num_labels > 1:
175
+ largest_label = 1 + np.argmax(stats[1:, cv2.CC_STAT_AREA])
176
+ mask_clean = (labels == largest_label).astype(np.uint8) * 255
177
+
178
+ # Soften edges
179
+ mask_soft = cv2.GaussianBlur(mask_clean, (9, 9), 5).astype(np.float32) / 255.0
180
+
181
+ # ---- 3) Composite (Foreground=img*mask, Background=img*(1-mask)) ----
182
+ img_f = image.astype(np.float32) / 255.0
183
+ mask_3 = np.dstack([mask_soft]*3)
184
+
185
+ foreground = np.clip(img_f * mask_3, 0, 1)
186
+ background = np.clip(img_f * (1.0 - mask_3), 0, 1)
187
+
188
+ foreground = (foreground * 255.0).astype(np.uint8)
189
+ background = (background * 255.0).astype(np.uint8)
190
+
191
+ return foreground, background, mask_clean, mask_soft
192
+
193
+ from PIL import Image as PILImage
194
+
195
+ def visualize_results(image, depth_map, foreground, background, mask, mask_soft):
196
+ fig, axes = plt.subplots(2, 3, figsize=(15, 10))
197
+
198
+ axes[0, 0].imshow(image)
199
+ axes[0, 0].set_title('Original Image'); axes[0, 0].axis('off')
200
+
201
+ axes[0, 1].imshow(depth_map, cmap='plasma')
202
+ axes[0, 1].set_title('Depth Map'); axes[0, 1].axis('off')
203
+
204
+ axes[0, 2].imshow(mask, cmap='gray')
205
+ axes[0, 2].set_title('Binary Mask (Cleaned)'); axes[0, 2].axis('off')
206
+
207
+ axes[1, 0].imshow(mask_soft, cmap='gray')
208
+ axes[1, 0].set_title('Soft Mask (Blurred)'); axes[1, 0].axis('off')
209
+
210
+ axes[1, 1].imshow(foreground)
211
+ axes[1, 1].set_title('Foreground'); axes[1, 1].axis('off')
212
+
213
+ axes[1, 2].imshow(background)
214
+ axes[1, 2].set_title('Background'); axes[1, 2].axis('off')
215
+
216
+ plt.tight_layout()
217
+ plt.show()
218
+
219
+ foreground, background, mask_hard, mask_soft = separate_foreground_background(image, depth_map, assume_bgr_input=False,
220
+ near_is_foreground=True,
221
+ foreground_depth_is_high=True)
222
+
223
+ if not isinstance(image, np.ndarray):
224
+ image = np.array(image)
225
+
226
+ visualize_results(image, depth_map, foreground, background, mask_hard, mask_soft)
227
+
228
+ """# Part 3: Intelligent Background Reconstruction"""
229
+
230
+ kernel = np.ones((7,7), np.uint8)
231
+ mask_dilated = cv2.dilate(mask_hard, kernel, iterations=1)
232
+
233
+ # 1️⃣ Inpaint
234
+ bg_inpainted = cv2.inpaint(background, mask_dilated, inpaintRadius=6, flags=cv2.INPAINT_TELEA)
235
+
236
+ # 2️⃣ Smooth
237
+ bg_smooth = cv2.bilateralFilter(bg_inpainted, d=9, sigmaColor=75, sigmaSpace=75)
238
+
239
+ # 3️⃣ Optional: Only replace in the mask region
240
+ final_bg = np.where(mask_dilated[..., None] == 255, bg_smooth, background)
241
+
242
+ # -- New: Use the hard mask to create an eroded+feathered alpha with no black halo (recommended to pass this)
243
+ k3 = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3)) # Approximately erode 2px
244
+ mask_erode = cv2.erode(mask_hard, k3, iterations=1)
245
+ dist = cv2.distanceTransform(mask_erode, cv2.DIST_L2, 5)
246
+ alpha_no_halo = dist / 6 # feather≈12, adjustable 8~14
247
+ alpha_no_halo = np.clip(alpha_no_halo, 0, 1).astype(np.float32)
248
+ alpha_no_halo[mask_erode == 0] = 0.0
249
+ alpha_no_halo = alpha_no_halo[..., None] # HxWx1
250
+ original_rgb = image
251
+
252
+ # 4️⃣ Display
253
+ plt.figure(figsize=(10,5))
254
+ plt.subplot(1,2,1); plt.title("Original Background with Hole"); plt.imshow(background); plt.axis('off')
255
+ plt.subplot(1,2,2); plt.title("Clean Background (Inpainted)"); plt.imshow(final_bg); plt.axis('off')
256
+ plt.show()
257
+
258
+
259
+ import matplotlib.pyplot as plt
260
+ import numpy as np
261
+
262
+ # Check if the alpha mask is correct
263
+ plt.figure(figsize=(20, 5))
264
+
265
+ plt.subplot(151)
266
+ plt.imshow(image)
267
+ plt.title('Original Image')
268
+ plt.axis('off')
269
+
270
+ plt.subplot(152)
271
+ plt.imshow(alpha_no_halo.squeeze(), cmap='gray')
272
+ plt.title('Alpha Mask\n(Should cover full person)')
273
+ plt.axis('off')
274
+
275
+ plt.subplot(153)
276
+ plt.imshow(final_bg)
277
+ plt.title('Clean Background')
278
+ plt.axis('off')
279
+
280
+ # Test composite
281
+ fg_float = image.astype(np.float32) / 255.0
282
+ bg_float = final_bg.astype(np.float32) / 255.0
283
+ test_composite = fg_float * alpha_no_halo + bg_float * (1.0 - alpha_no_halo)
284
+ test_composite = (np.clip(test_composite, 0, 1) * 255).astype(np.uint8)
285
+
286
+ plt.subplot(154)
287
+ plt.imshow(test_composite)
288
+ plt.title('Test Composite\n(Does BG eat face?)')
289
+ plt.axis('off')
290
+
291
+ # Highlight areas where alpha < 0.5 (potential lost person areas)
292
+ alpha_highlight = image.copy()
293
+ low_alpha_mask = alpha_no_halo.squeeze() < 0.5
294
+ alpha_highlight[low_alpha_mask] = [255, 0, 0] # Mark with red
295
+
296
+ plt.subplot(155)
297
+ plt.imshow(alpha_highlight)
298
+ plt.title('Red = Alpha < 0.5\n(Lost person areas)')
299
+ plt.axis('off')
300
+
301
+ plt.tight_layout()
302
+ plt.show()
303
+
304
+ # Print diagnostic info
305
+ print("Alpha Mask Statistics:")
306
+ print(f" Min: {alpha_no_halo.min():.3f}")
307
+ print(f" Max: {alpha_no_halo.max():.3f}")
308
+ print(f" Shape: {alpha_no_halo.shape}")
309
+ print(f" Pixels with alpha > 0.9: {(alpha_no_halo > 0.9).sum()}")
310
+ print(f" Pixels with alpha < 0.5: {(alpha_no_halo < 0.5).sum()}")
311
+
312
+ """# Part 4: Depth-Aware Motion Synthesis"""
313
+
314
+ import numpy as np
315
+ import cv2
316
+
317
+ def create_motion_frames(
318
+ image_original,
319
+ background_clean,
320
+ alpha_mask,
321
+ n_frames=30,
322
+ fg_shift=12,
323
+ bg_shift=4,
324
+ direction='right',
325
+ scale_effect=0.015
326
+ ):
327
+ """
328
+ Part 4: Generates motion frames for a parallax effect.
329
+ This function *only* handles motion and compositing.
330
+ Depth-of-field (blur) will be added later.
331
+ """
332
+ h, w = image_original.shape[:2]
333
+
334
+ # --- 1. Set direction ---
335
+ direction_map = {
336
+ 'right': (1, 0),
337
+ 'left': (-1, 0),
338
+ 'up': (0, -1),
339
+ 'down': (0, 1)
340
+ }
341
+ dx, dy = direction_map.get(direction, (1, 0))
342
+
343
+ # --- 2. Prepare Alpha Mask ---
344
+ # Ensure alpha_mask is 3-channel, 0-1 float
345
+ if alpha_mask.ndim == 2:
346
+ alpha_mask = alpha_mask[..., None]
347
+ if alpha_mask.shape[2] == 1:
348
+ alpha_3ch = np.repeat(alpha_mask, 3, axis=2)
349
+ else:
350
+ alpha_3ch = alpha_mask
351
+
352
+ alpha_3ch = np.clip(alpha_3ch.astype(np.float32) / 255.0, 0, 1) if alpha_3ch.max() > 1 else np.clip(alpha_3ch.astype(np.float32), 0, 1)
353
+
354
+
355
+ frames = []
356
+ print(f"Generating {n_frames} motion frames (Part 4)...")
357
+
358
+ # --- 3. Loop to generate each frame ---
359
+ for i in range(n_frames):
360
+ # Use a sine wave to create smooth back-and-forth motion (0 -> 1 -> 0 -> -1 -> 0)
361
+ # This is very natural for a looping GIF
362
+ phase = (i / n_frames) * 2 * np.pi
363
+ ease = np.sin(phase)
364
+
365
+ # --- 4. Calculate motion and scale ---
366
+ # Scale effect (1.0 -> 1.015 -> 1.0)
367
+ scale = 1.0 + abs(ease) * scale_effect
368
+
369
+ # Foreground and background displacement
370
+ fg_x = dx * ease * fg_shift
371
+ fg_y = dy * ease * fg_shift
372
+ bg_x = dx * ease * bg_shift
373
+ bg_y = dy * ease * bg_shift
374
+
375
+ # --- 5. Create transformation matrices ---
376
+ M_fg_trans = np.float32([[1, 0, fg_x], [0, 1, fg_y]])
377
+ M_bg_trans = np.float32([[1, 0, bg_x], [0, 1, bg_y]])
378
+
379
+ center = (w / 2, h / 2)
380
+ M_scale = cv2.getRotationMatrix2D(center, 0, scale)
381
+
382
+ # --- 6. Apply transformations (motion + scale) ---
383
+
384
+ # --- Transform foreground ---
385
+ fg_moved = cv2.warpAffine(
386
+ image_original, M_fg_trans, (w, h),
387
+ flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT_101
388
+ )
389
+ fg_moved = cv2.warpAffine(
390
+ fg_moved, M_scale, (w, h),
391
+ flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT_101
392
+ )
393
+
394
+ # --- Transform background ---
395
+ bg_moved = cv2.warpAffine(
396
+ background_clean, M_bg_trans, (w, h),
397
+ flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REPLICATE
398
+ )
399
+ bg_moved = cv2.warpAffine(
400
+ bg_moved, M_scale, (w, h),
401
+ flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REPLICATE
402
+ )
403
+
404
+ # --- Transform Alpha Mask ---
405
+ # (Must use the exact same transform as the foreground)
406
+ alpha_moved = cv2.warpAffine(
407
+ alpha_3ch, M_fg_trans, (w, h),
408
+ flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=0
409
+ )
410
+ alpha_moved = cv2.warpAffine(
411
+ alpha_moved, M_scale, (w, h),
412
+ flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=0
413
+ )
414
+ alpha_moved = np.clip(alpha_moved, 0, 1)
415
+
416
+ # --- 7. Composite layers (no blur) ---
417
+ # This is the final composite for Part 4
418
+ fg_masked = cv2.multiply(fg_moved.astype(np.float32), alpha_moved)
419
+ bg_masked = cv2.multiply(bg_moved.astype(np.float32), 1.0 - alpha_moved)
420
+
421
+ composite = cv2.add(fg_masked, bg_masked)
422
+ frame = np.clip(composite, 0, 255).astype(np.uint8)
423
+
424
+ frames.append(frame)
425
+
426
+ print("Part 4: Motion frames generated.")
427
+ return frames
428
+
429
+ motion_frames = create_motion_frames(
430
+ image_original=image,
431
+ background_clean=final_bg,
432
+ alpha_mask=alpha_no_halo,
433
+ n_frames=30,
434
+ fg_shift=12,
435
+ bg_shift=4,
436
+ direction='right',
437
+ scale_effect=0.00
438
+ )
439
+
440
+ # Save a "no blur" GIF to preview the motion
441
+ import imageio.v2 as imageio
442
+ # Note: output_filename is not defined here, this line will error if run as-is.
443
+ # Assuming a placeholder filename for the notebook's logic.
444
+ output_filename = 'parallax_part4_only.gif'
445
+ imageio.mimsave(output_filename, motion_frames, duration=0.04, loop=0, optimize=True, palettesize=192)
446
+ print("Part 4 preview GIF saved!")
447
+
448
+ """# Part 5: Depth-of-Field & Bokeh Effects"""
449
+
450
+ import numpy as np
451
+ import cv2
452
+ import imageio.v2 as imageio
453
+
454
+ def create_multi_layer_animation(
455
+ image_original,
456
+ background_clean,
457
+ alpha_mask, # This is your 'alpha_no_halo'
458
+ depth_map, # This is your 'depth_map' (needs to be single-channel, 0-1 float)
459
+ n_frames=60,
460
+
461
+ # --- 1. Multi-layer motion settings ---
462
+ fg_shift=12, # Foreground (person) moves 12px
463
+ mid_shift=6, # Mid-ground (near background) moves 6px
464
+ far_shift=2, # Far-ground (far background) moves 2px
465
+
466
+ # --- 2. Dynamic zoom settings ---
467
+ zoom_center=1.10,
468
+ zoom_peak=1.05,
469
+
470
+ # --- 3. Dynamic Depth-of-Field settings ---
471
+ mid_blur_ksize=(9, 9), # Mid-ground blur (f/5.6)
472
+ far_blur_ksize=(35, 35), # Far-ground blur (f/1.4)
473
+
474
+ direction='right'
475
+ ):
476
+ """
477
+ Final version: Integrates multi-layer parallax (3 layers), dynamic DoF, and dynamic zoom
478
+ """
479
+ print("--- Start generating advanced multi-layer animation ---")
480
+
481
+ h, w = image_original.shape[:2]
482
+
483
+ # --- 1. Prepare motion ---
484
+ direction_map = {'right': (1, 0), 'left': (-1, 0), 'up': (0, -1), 'down': (0, 1)}
485
+ dx, dy = direction_map.get(direction, (1, 0))
486
+
487
+ # --- 2. Prepare base masks (Foreground vs. Background) ---
488
+ # Ensure alpha_mask is a 0-1 float
489
+ if alpha_mask.max() > 1:
490
+ alpha_mask = alpha_mask.astype(np.float32) / 255.0
491
+ if alpha_mask.ndim == 2:
492
+ alpha_mask = alpha_mask[..., None]
493
+
494
+ # Create foreground mask (fg_mask) and background mask (bg_mask)
495
+ fg_mask_3ch = np.repeat(alpha_mask, 3, axis=2)
496
+ bg_mask_3ch = 1.0 - fg_mask_3ch
497
+
498
+ # --- 3. Create mid/far-ground masks (outside loop) ---
499
+ print("...Analyzing depth map and creating layers...")
500
+
501
+ # Ensure depth_map is single-channel
502
+ if depth_map.ndim == 3:
503
+ depth_map = cv2.cvtColor(depth_map, cv2.COLOR_BGR2GRAY)
504
+
505
+ # Find depth values in the background region
506
+ bg_depth_values = depth_map[alpha_mask[..., 0] < 0.5]
507
+
508
+ # Find the 50th percentile (median) of background depth as the split point
509
+ if len(bg_depth_values) > 0:
510
+ bg_split_threshold = np.percentile(bg_depth_values, 50)
511
+ else:
512
+ bg_split_threshold = 0.5 # If no background, just use a default value
513
+
514
+ print(f" Background depth split point: {bg_split_threshold:.4f}")
515
+
516
+ # Create a raw binary mask (1 = mid-ground, 0 = far-ground)
517
+ # Depth value > threshold = closer = mid-ground
518
+ raw_mid_mask = (depth_map > bg_split_threshold).astype(np.float32)
519
+
520
+ # Blur this mask for a smooth transition between mid and far grounds
521
+ # Note: this mask currently includes the person/foreground area
522
+ raw_mid_mask_smooth = cv2.GaussianBlur(raw_mid_mask, (21, 21), 0)
523
+ if raw_mid_mask_smooth.ndim == 2:
524
+ raw_mid_mask_smooth = raw_mid_mask_smooth[..., None]
525
+
526
+ # Expand to 3 channels
527
+ raw_mid_mask_smooth_3ch = np.repeat(raw_mid_mask_smooth, 3, axis=2)
528
+
529
+ # --- 4. Generate the final 3 mutually exclusive masks ---
530
+ # Mid-ground mask = (smooth mid mask) * (background mask)
531
+ # This "cuts out" the person, leaving only the mid-ground part of the background
532
+ mid_mask_3ch = raw_mid_mask_smooth_3ch * bg_mask_3ch
533
+
534
+ # Far-ground mask = (1.0 - smooth mid mask) * (background mask)
535
+ # This "cuts out" the person, leaving only the far-ground part of the background
536
+ far_mask_3ch = (1.0 - raw_mid_mask_smooth_3ch) * bg_mask_3ch
537
+
538
+ # fg_mask_3ch (foreground) + mid_mask_3ch (mid-ground) + far_mask_3ch (far-ground)
539
+ # These three masks now sum to 1.0 (the whole image) and are mutually exclusive.
540
+
541
+ print("...Layers created. Starting frame generation...")
542
+
543
+ frames = []
544
+
545
+ # --- 5. Loop to generate each frame ---
546
+ for i in range(n_frames):
547
+
548
+ # --- 5a. Calculate motion and scale ---
549
+ phase = (i / n_frames) * 2 * np.pi
550
+ ease = np.sin(phase)
551
+
552
+ # Dynamic zoom
553
+ zoom_range = zoom_center - zoom_peak
554
+ scale = zoom_center - (zoom_range * abs(ease))
555
+ center = (w / 2, h / 2)
556
+ M_scale = cv2.getRotationMatrix2D(center, 0, scale)
557
+
558
+ # --- 5b. Create 3 different transformation matrices ---
559
+ M_fg_trans = np.float32([[1, 0, dx*ease*fg_shift], [0, 1, dy*ease*fg_shift]])
560
+ M_mid_trans = np.float32([[1, 0, dx*ease*mid_shift], [0, 1, dy*ease*mid_shift]])
561
+ M_far_trans = np.float32([[1, 0, dx*ease*far_shift], [0, 1, dy*ease*far_shift]])
562
+
563
+ # --- 5c. Transform and blur layers ---
564
+
565
+ # --- Foreground (sharp) ---
566
+ fg_warped = cv2.warpAffine(image_original, M_fg_trans, (w,h), borderMode=cv2.BORDER_REFLECT_101)
567
+ fg_final = cv2.warpAffine(fg_warped, M_scale, (w,h), borderMode=cv2.BORDER_REFLECT_101).astype(np.float32)
568
+
569
+ # --- Mid-ground (slight blur) ---
570
+ mid_warped = cv2.warpAffine(background_clean, M_mid_trans, (w,h), borderMode=cv2.BORDER_REPLICATE)
571
+ mid_warped_scaled = cv2.warpAffine(mid_warped, M_scale, (w,h), borderMode=cv2.BORDER_REPLICATE)
572
+ mid_final = cv2.GaussianBlur(mid_warped_scaled, mid_blur_ksize, 0).astype(np.float32)
573
+
574
+ # --- Far-ground (heavy blur) ---
575
+ far_warped = cv2.warpAffine(background_clean, M_far_trans, (w,h), borderMode=cv2.BORDER_REPLICATE)
576
+ far_warped_scaled = cv2.warpAffine(far_warped, M_scale, (w,h), borderMode=cv2.BORDER_REPLICATE)
577
+ far_final = cv2.GaussianBlur(far_warped_scaled, far_blur_ksize, 0).astype(np.float32)
578
+
579
+ # --- 5d. Transform the 3 masks ---
580
+ # Masks must be transformed along with their corresponding layers!
581
+ fg_mask_warped = cv2.warpAffine(fg_mask_3ch, M_fg_trans, (w,h))
582
+ fg_mask_warped = cv2.warpAffine(fg_mask_warped, M_scale, (w,h))
583
+
584
+ mid_mask_warped = cv2.warpAffine(mid_mask_3ch, M_mid_trans, (w,h))
585
+ mid_mask_warped = cv2.warpAffine(mid_mask_warped, M_scale, (w,h))
586
+
587
+ far_mask_warped = cv2.warpAffine(far_mask_3ch, M_far_trans, (w,h))
588
+ far_mask_warped = cv2.warpAffine(far_mask_warped, M_scale, (w,h))
589
+
590
+ # --- 5e. Final composite ---
591
+ # Re-normalize the masks to prevent black borders or tiny gaps after warp
592
+ total_mask = fg_mask_warped + mid_mask_warped + far_mask_warped + 1e-6 # Avoid division by zero
593
+ fg_mask_warped /= total_mask
594
+ mid_mask_warped /= total_mask
595
+ far_mask_warped /= total_mask
596
+
597
+ # Add the three layers, weighted by their masks
598
+ composite = (fg_final * fg_mask_warped) + \
599
+ (mid_final * mid_mask_warped) + \
600
+ (far_final * far_mask_warped)
601
+
602
+ frame = np.clip(composite, 0, 255).astype(np.uint8)
603
+ frames.append(frame)
604
+
605
+ if (i + 1) % 10 == 0:
606
+ print(f" ...Frame {i+1}/{n_frames} complete")
607
+
608
+ print(f"Advanced animation generation complete. Created {n_frames} frames.")
609
+ return frames
610
+
611
+ if depth_map.max() > 1:
612
+ depth_map_0_1 = depth_map.astype(np.float32) / 255.0
613
+ else:
614
+ depth_map_0_1 = depth_map.astype(np.float32)
615
+
616
+
617
+ # Call the new multi-layer function
618
+ multi_layer_frames = create_multi_layer_animation(
619
+ image_original=image,
620
+ background_clean=final_bg,
621
+ alpha_mask=alpha_no_halo, # Your foreground mask
622
+ depth_map=depth_map_0_1, # Your 0-1 depth map
623
+ n_frames=60,
624
+ fg_shift=12,
625
+ mid_shift=6,
626
+ far_shift=2,
627
+ zoom_center=1.10,
628
+ zoom_peak=1.05
629
+ )
630
+
631
+ # Save the final GIF
632
+ imageio.mimsave('parallax_final_multi_layer.gif', multi_layer_frames, duration=0.04, loop=0)
633
+ print("Final multi-layer parallax animation saved!")
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ transformers
2
+ torch
3
+ torchvision
4
+ pillow
5
+ matplotlib
6
+ requests
7
+ opencv-python
8
+ imageio
9
+ tqdm
10
+ gradio