Krokodilpirat commited on
Commit
8f4905c
·
verified ·
1 Parent(s): 26c28ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -348
app.py CHANGED
@@ -125,279 +125,84 @@ def generate_blip_name(frame: np.ndarray) -> str:
125
  print(f"BLIP error: {e}")
126
  return "video"
127
 
128
- # --- 🎨 NEW: Thumbnail Generation Functions ---
129
 
130
- def create_gradient_thumbnail(rgb_frame, depth_frame, longest_side=1024, add_logo=False):
131
  """
132
- Creates thumbnail with vertical RGB→Depth gradient from real depth map (fallback method)
133
 
134
  Args:
135
- rgb_frame: Original RGB Frame
136
- depth_frame: Real Depth Map from Video Depth Anything Model
137
- longest_side: Max size (1024px)
138
- add_logo: Add depth logo (always True)
139
 
140
  Returns:
141
- np.array: Thumbnail with RGB→Depth gradient
142
  """
143
  try:
144
- print(f"DEBUG: Creating gradient thumbnail from real depth - RGB: {rgb_frame.shape}, Depth: {depth_frame.shape}")
145
-
146
- # 1. Ensure RGB frame is properly sized first
147
- rgb_h, rgb_w = rgb_frame.shape[:2]
148
- if max(rgb_h, rgb_w) > longest_side:
149
- scale = longest_side / max(rgb_h, rgb_w)
150
- new_h, new_w = int(rgb_h * scale), int(rgb_w * scale)
151
- rgb_resized = cv2.resize(rgb_frame, (new_w, new_h))
152
- else:
153
- rgb_resized = rgb_frame.copy()
154
-
155
- print(f"DEBUG: RGB resized to: {rgb_resized.shape}")
156
-
157
- # 2. Process depth map safely
158
- if len(depth_frame.shape) == 2:
159
- # Depth is grayscale (2D)
160
- depth_gray = depth_frame
161
- elif len(depth_frame.shape) == 3:
162
- # Depth is 3-channel, convert to grayscale
163
- if depth_frame.shape[2] == 3:
164
- depth_gray = cv2.cvtColor(depth_frame, cv2.COLOR_RGB2GRAY)
165
- else:
166
- depth_gray = depth_frame[:, :, 0] # Take first channel
167
- else:
168
- raise ValueError(f"Unexpected depth shape: {depth_frame.shape}")
169
-
170
- print(f"DEBUG: Depth processed to grayscale: {depth_gray.shape}")
171
-
172
- # 3. CRITICAL: Resize depth to match RGB dimensions exactly
173
- target_h, target_w = rgb_resized.shape[:2]
174
-
175
- # Ensure depth_gray is valid for resizing
176
- if depth_gray.size == 0 or depth_gray.dtype not in [np.uint8, np.float32, np.float64]:
177
- print(f"DEBUG: Invalid depth data, creating fallback")
178
- depth_resized = np.zeros((target_h, target_w), dtype=np.uint8)
179
- else:
180
- try:
181
- depth_resized = cv2.resize(depth_gray, (target_w, target_h), interpolation=cv2.INTER_LINEAR)
182
- except Exception as resize_error:
183
- print(f"DEBUG: Resize failed: {resize_error}, creating fallback")
184
- depth_resized = np.zeros((target_h, target_w), dtype=np.uint8)
185
-
186
- print(f"DEBUG: Depth resized to match RGB: {depth_resized.shape}")
187
-
188
- # 4. Normalize depth to 0-255 range
189
- if depth_resized.max() > depth_resized.min():
190
- depth_norm = ((depth_resized - depth_resized.min()) / (depth_resized.max() - depth_resized.min()) * 255).astype(np.uint8)
191
- else:
192
- depth_norm = np.zeros_like(depth_resized, dtype=np.uint8)
193
 
194
- # 5. Convert to 3-channel for blending
195
- depth_3ch = np.stack([depth_norm] * 3, axis=-1)
196
-
197
- print(f"DEBUG: Final processing - RGB: {rgb_resized.shape}, Depth: {depth_3ch.shape}")
198
-
199
- # 6. Create gradient zones
200
- height = rgb_resized.shape[0]
201
- rgb_end = int(height * 0.60) # 60% RGB
202
- gradient_start = rgb_end # Gradient starts at 60%
203
- gradient_end = int(height * 0.80) # Gradient ends at 80%
204
-
205
- print(f"DEBUG: Gradient zones - RGB: 0-{rgb_end}, Gradient: {gradient_start}-{gradient_end}, Depth: {gradient_end}-{height}")
206
-
207
- # 7. Assemble thumbnail
208
- result = np.zeros_like(rgb_resized)
209
-
210
- # Top 60%: Pure RGB
211
- result[:rgb_end] = rgb_resized[:rgb_end]
212
 
213
- # 60-80%: Smooth gradient
214
- for y in range(gradient_start, min(gradient_end, height)):
215
- # Gradient factor: 0.0 (RGB) 1.0 (Depth)
216
- factor = (y - gradient_start) / max(1, (gradient_end - gradient_start))
217
-
218
- # Smooth transition (sine curve)
219
- smooth_factor = 0.5 * (1 - np.cos(factor * np.pi))
220
-
221
- # Blend RGB and real depth
222
- rgb_line = rgb_resized[y].astype(np.float32)
223
- depth_line = depth_3ch[y].astype(np.float32)
224
 
225
- blended = (1 - smooth_factor) * rgb_line + smooth_factor * depth_line
226
- result[y] = blended.astype(np.uint8)
227
-
228
- # Bottom 20%: Pure Depth
229
- if gradient_end < height:
230
- result[gradient_end:] = depth_3ch[gradient_end:]
231
-
232
- # 8. Always add depth logo
233
- result = add_depth_logo(result)
234
-
235
- print(f"DEBUG: Real depth gradient thumbnail created successfully: {result.shape}")
236
- return result
237
-
238
- except Exception as e:
239
- print(f"DEBUG: Gradient thumbnail creation failed: {e}")
240
- import traceback
241
- traceback.print_exc()
242
- # Fallback: return RGB frame with logo
243
- try:
244
- fallback = add_depth_logo(rgb_frame)
245
- print("DEBUG: Returned fallback RGB with logo")
246
- return fallback
247
- except:
248
- print("DEBUG: Complete fallback - returning original RGB")
249
- return rgb_frame
250
-
251
- def create_gradient_thumbnail_simple(rgb_frame, depth_frame):
252
- """
253
- Creates gradient thumbnail from perfectly matched RGB and depth frames (from RGBD processing)
254
-
255
- Args:
256
- rgb_frame: RGB frame (already processed and sized)
257
- depth_frame: Depth frame (already processed to match RGB exactly)
258
-
259
- Returns:
260
- np.array: Thumbnail with RGB→Depth gradient and "D" logo
261
- """
262
- try:
263
- print(f"DEBUG: Creating simple gradient thumbnail - RGB: {rgb_frame.shape}, Depth: {depth_frame.shape}")
264
 
265
- # Frames are already perfectly matched - no resizing needed!
266
- height = rgb_frame.shape[0]
267
 
268
- # Create gradient zones
 
269
  rgb_end = int(height * 0.60) # 60% RGB
270
- gradient_start = rgb_end # Gradient starts at 60%
271
- gradient_end = int(height * 0.80) # Gradient ends at 80%
272
-
273
- print(f"DEBUG: Simple gradient zones - RGB: 0-{rgb_end}, Gradient: {gradient_start}-{gradient_end}, Depth: {gradient_end}-{height}")
274
 
275
- # Assemble thumbnail
276
- result = np.zeros_like(rgb_frame)
277
 
278
- # Top 60%: Pure RGB
279
- result[:rgb_end] = rgb_frame[:rgb_end]
280
 
281
- # 60-80%: Smooth gradient
282
  for y in range(gradient_start, min(gradient_end, height)):
283
- # Gradient factor: 0.0 (RGB) → 1.0 (Depth)
284
  factor = (y - gradient_start) / max(1, (gradient_end - gradient_start))
285
 
286
- # Smooth transition (sine curve)
287
  smooth_factor = 0.5 * (1 - np.cos(factor * np.pi))
288
 
289
- # Blend RGB and depth (both are already 3-channel and matched)
290
- rgb_line = rgb_frame[y].astype(np.float32)
291
- depth_line = depth_frame[y].astype(np.float32)
292
 
293
  blended = (1 - smooth_factor) * rgb_line + smooth_factor * depth_line
294
  result[y] = blended.astype(np.uint8)
295
 
296
- # Bottom 20%: Pure Depth
297
  if gradient_end < height:
298
- result[gradient_end:] = depth_frame[gradient_end:]
299
 
300
- # Always add depth logo
301
  result = add_depth_logo(result)
302
 
303
- print(f"DEBUG: Simple gradient thumbnail created successfully: {result.shape}")
304
  return result
305
 
306
  except Exception as e:
307
- print(f"DEBUG: Simple gradient thumbnail creation failed: {e}")
308
  import traceback
309
  traceback.print_exc()
310
- # Fallback: return RGB frame with logo
311
  try:
312
  return add_depth_logo(rgb_frame)
313
  except:
314
  return rgb_frame
315
- """
316
- Erstellt Thumbnail mit vertikalem RGB→Depth Verlauf aus echter Depth-Map
317
-
318
- Args:
319
- rgb_frame: Original RGB Frame
320
- depth_frame: Echte Depth Map vom Video Depth Anything Model
321
- longest_side: Max Größe (1024px)
322
- add_logo: Looking Glass Logo hinzufügen
323
-
324
- Returns:
325
- np.array: Thumbnail mit RGB→Depth Verlauf
326
- """
327
- try:
328
- print(f"DEBUG: Creating gradient thumbnail from real depth - RGB: {rgb_frame.shape}, Depth: {depth_frame.shape}")
329
-
330
- # 1. Frames auf gleiche Größe bringen und auf target size skalieren
331
- h, w = rgb_frame.shape[:2]
332
- if max(h, w) > longest_side:
333
- scale = longest_side / max(h, w)
334
- new_h, new_w = int(h * scale), int(w * scale)
335
- rgb_resized = cv2.resize(rgb_frame, (new_w, new_h))
336
- else:
337
- rgb_resized = rgb_frame.copy()
338
-
339
- # 2. Depth Map verarbeiten (depth_frame kommt als Grayscale Array vom Model)
340
- if len(depth_frame.shape) == 2:
341
- # Depth ist bereits Grayscale
342
- depth_gray = depth_frame
343
- else:
344
- # Falls depth als 3-channel kommt
345
- depth_gray = cv2.cvtColor(depth_frame, cv2.COLOR_BGR2GRAY)
346
-
347
- # Depth auf RGB-Frame-Größe bringen
348
- depth_resized = cv2.resize(depth_gray, (rgb_resized.shape[1], rgb_resized.shape[0]))
349
-
350
- # Depth normalisieren (0-255)
351
- depth_norm = ((depth_resized - depth_resized.min()) / (depth_resized.max() - depth_resized.min()) * 255).astype(np.uint8)
352
-
353
- # Zu 3-channel konvertieren
354
- depth_3ch = np.stack([depth_norm] * 3, axis=-1)
355
-
356
- print(f"DEBUG: Processed frames - RGB: {rgb_resized.shape}, Depth: {depth_3ch.shape}")
357
-
358
- # 3. Verlauf-Parameter (prozentual)
359
- height = rgb_resized.shape[0]
360
- rgb_end = int(height * 0.60) # 60% RGB
361
- gradient_start = rgb_end # Verlauf startet bei 60%
362
- gradient_end = int(height * 0.80) # Verlauf endet bei 80%
363
-
364
- print(f"DEBUG: Gradient zones - RGB: 0-{rgb_end}, Gradient: {gradient_start}-{gradient_end}, Depth: {gradient_end}-{height}")
365
-
366
- # 4. Thumbnail zusammensetzen
367
- result = np.zeros_like(rgb_resized)
368
-
369
- # Obere 60%: Pure RGB
370
- result[:rgb_end] = rgb_resized[:rgb_end]
371
-
372
- # 60-80%: Prozeduraler Verlauf
373
- for y in range(gradient_start, min(gradient_end, height)):
374
- # Verlauf-Factor: 0.0 (RGB) → 1.0 (Depth)
375
- factor = (y - gradient_start) / max(1, (gradient_end - gradient_start))
376
-
377
- # Smooth Transition (sine curve for smoother blend)
378
- smooth_factor = 0.5 * (1 - np.cos(factor * np.pi))
379
-
380
- # Blend RGB und echte Depth
381
- rgb_line = rgb_resized[y].astype(np.float32)
382
- depth_line = depth_3ch[y].astype(np.float32)
383
-
384
- blended = (1 - smooth_factor) * rgb_line + smooth_factor * depth_line
385
- result[y] = blended.astype(np.uint8)
386
-
387
- # Untere 20%: Pure Depth
388
- if gradient_end < height:
389
- result[gradient_end:] = depth_3ch[gradient_end:]
390
-
391
- # 5. Immer "D" Logo hinzufügen
392
- result = add_depth_logo(result)
393
-
394
- print(f"DEBUG: Real depth gradient thumbnail created successfully: {result.shape}")
395
- return result
396
-
397
- except Exception as e:
398
- print(f"DEBUG: Gradient thumbnail creation failed: {e}")
399
- # Fallback: return original RGB frame
400
- return rgb_frame
401
 
402
  def add_depth_logo(thumbnail, position="bottom-right"):
403
  """Adds improved 'D' logo to thumbnail for depth indication"""
@@ -592,8 +397,10 @@ def download_civitai_video(civitai_url):
592
  if '.' in filename_part:
593
  temp_path = f"temp_civitai_{filename_part}"
594
  else:
 
595
  temp_path = f"temp_civitai_{int(time.time())}.webm"
596
  else:
 
597
  temp_path = f"temp_civitai_{int(time.time())}.webm"
598
  except:
599
  import time
@@ -691,9 +498,9 @@ current_video_url = None
691
  blip_generated_name = ""
692
  original_filename = ""
693
 
694
- # --- Main inference function ---
695
  def infer_video_depth_from_source(upload_video, video_url, filename, use_blip, create_thumbnail, *args):
696
- """Process video to generate depth maps and RGBD output"""
697
  try:
698
  max_len, target_fps, max_res, stitch, grayscale, convert_from_color, blur = args
699
 
@@ -702,16 +509,15 @@ def infer_video_depth_from_source(upload_video, video_url, filename, use_blip, c
702
  if not input_path:
703
  return None, None, "Error: No video source provided", None
704
 
705
- # Fix filename at generation time (no more changing after this point)
706
  base_name = filename.strip().replace(" ", "_")[:30] if filename.strip() else "output"
707
-
708
  print(f"DEBUG: Final filename locked in: '{base_name}'")
709
 
710
  # Create output directory
711
  output_dir = "./outputs"
712
  os.makedirs(output_dir, exist_ok=True)
713
 
714
- # Use final names (not temp names!)
715
  vis_video_path = os.path.join(output_dir, base_name + "_vis.mp4")
716
  rgbd_video_path = os.path.join(output_dir, base_name + "_RGBD.mp4")
717
 
@@ -728,10 +534,12 @@ def infer_video_depth_from_source(upload_video, video_url, filename, use_blip, c
728
  depths, fps = video_depth_anything.infer_video_depth(frames, target_fps, input_size=518, device=DEVICE)
729
  print("✅ Depth maps generated successfully")
730
 
731
- # Save depth visualization with final name
732
  save_video(depths, vis_video_path, fps=fps, is_depths=True)
733
 
734
  rgbd_path = None
 
 
735
  if stitch:
736
  print("Creating RGBD stitched video...")
737
  # Read full resolution frames for stitching
@@ -767,11 +575,37 @@ def infer_video_depth_from_source(upload_video, video_url, filename, use_blip, c
767
  depth_resized = cv2.resize(depth_vis, (rgb.shape[1], rgb.shape[0]))
768
  stitched = cv2.hconcat([rgb, depth_resized])
769
  stitched_frames.append(stitched)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
770
 
771
- # Save stitched video with final name
772
  save_video(np.array(stitched_frames), rgbd_video_path, fps=fps)
773
  print("✅ RGBD video created successfully")
774
 
 
 
 
 
 
 
 
 
775
  # Add audio from original video if possible
776
  try:
777
  temp_audio_path = rgbd_video_path.replace('.mp4', '_audio.mp4')
@@ -788,36 +622,23 @@ def infer_video_depth_from_source(upload_video, video_url, filename, use_blip, c
788
  print(f"Audio processing failed: {e}")
789
  rgbd_path = rgbd_video_path
790
 
791
- # 🎯 Thumbnail-Generierung (wenn aktiviert)
792
- thumbnail = None
793
- if create_thumbnail:
794
- print("Creating thumbnail from completed depth data...")
795
- try:
796
- # Erstes Frame für Thumbnail (konsistent mit BLIP)
797
- rgb_frame = get_first_frame_for_blip(input_path, target_size=1024)
798
- # Erstes Depth-Frame (korrespondierend zum RGB-Frame)
799
- first_depth_frame = depths[0] # Erstes Depth vom AI-Model!
800
-
801
- # Gradient-Thumbnail mit echter Depth erstellen (mit "D" Logo)
802
- thumbnail = create_gradient_thumbnail(
803
- rgb_frame,
804
- first_depth_frame,
805
- longest_side=1024,
806
- add_logo=True # Immer Logo hinzufügen
807
- )
808
-
809
- # Thumbnail in beide Videos einbetten
810
- embed_thumbnail_in_video(vis_video_path, thumbnail, base_name)
811
- if rgbd_path:
812
- embed_thumbnail_in_video(rgbd_path, thumbnail, base_name)
813
-
814
- print("✅ Thumbnail created and embedded successfully")
815
-
816
- except Exception as e:
817
- print(f"❌ Thumbnail creation failed: {e}")
818
- thumbnail = None
819
  else:
820
- print("📷 Thumbnail generation skipped (disabled)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
821
 
822
  # Clean up memory
823
  gc.collect()
@@ -840,7 +661,7 @@ def infer_video_depth_from_source(upload_video, video_url, filename, use_blip, c
840
 
841
  # --- UI event handlers ---
842
  def on_video_upload_change(video_file, use_blip):
843
- """Handle video upload and store video info for toggling - NO early thumbnail generation"""
844
  global current_video_file, blip_generated_name, original_filename, current_video_url
845
 
846
  print(f"DEBUG: Upload handler called with video_file: {video_file}")
@@ -899,92 +720,22 @@ def on_video_upload_change(video_file, use_blip):
899
  blip_generated_name = generate_blip_name(frame)
900
  print(f"DEBUG: BLIP name generated: '{blip_generated_name}'")
901
 
902
- # Return appropriate name based on BLIP setting - NO thumbnail preview yet
903
  final_name = blip_generated_name if (use_blip and blip_generated_name) else original_filename
904
  print(f"DEBUG: Final name returned: '{final_name}' (BLIP: {use_blip})")
905
- print(f"DEBUG: Returning - filename: '{final_name}', clear URL: '', status: 'success' - NO thumbnail yet")
906
  return final_name, "", "Video uploaded successfully!"
907
 
908
  except Exception as e:
909
  error_msg = f"Upload processing failed: {str(e)}"
910
  print(f"DEBUG ERROR: {error_msg}")
911
  return "uploaded_video", gr.update(), error_msg
912
-
913
- if not video_file:
914
- print("DEBUG: No video file - clearing state")
915
- current_video_file = None
916
- blip_generated_name = ""
917
- original_filename = ""
918
- return "", gr.update(), "Upload a video file", None # Clear thumbnail too
919
-
920
- try:
921
- # Store the current video
922
- current_video_file = video_file
923
- current_video_url = None # Clear URL when uploading file
924
-
925
- print(f"DEBUG: Processing upload - video_file type: {type(video_file)}")
926
-
927
- # Generate original filename FIRST - try multiple ways
928
- original_filename = "uploaded_video" # Default fallback
929
-
930
- # Method 1: Check .name attribute
931
- if hasattr(video_file, 'name') and video_file.name:
932
- print(f"DEBUG: video_file.name = '{video_file.name}'")
933
- original_name = os.path.splitext(os.path.basename(video_file.name))[0]
934
- cleaned = "".join(c for c in original_name if c.isalnum() or c in "_-")[:30]
935
- if cleaned:
936
- original_filename = cleaned
937
- print(f"DEBUG: Method 1 success: '{original_filename}'")
938
-
939
- # Method 2: Check .orig_name attribute (Gradio sometimes uses this)
940
- elif hasattr(video_file, 'orig_name') and video_file.orig_name:
941
- print(f"DEBUG: video_file.orig_name = '{video_file.orig_name}'")
942
- original_name = os.path.splitext(os.path.basename(video_file.orig_name))[0]
943
- cleaned = "".join(c for c in original_name if c.isalnum() or c in "_-")[:30]
944
- if cleaned:
945
- original_filename = cleaned
946
- print(f"DEBUG: Method 2 success: '{original_filename}'")
947
-
948
- # Method 3: Try to get filename from the file path itself
949
- elif isinstance(video_file, str):
950
- print(f"DEBUG: video_file is string: '{video_file}'")
951
- original_name = os.path.splitext(os.path.basename(video_file))[0]
952
- cleaned = "".join(c for c in original_name if c.isalnum() or c in "_-")[:30]
953
- if cleaned:
954
- original_filename = cleaned
955
- print(f"DEBUG: Method 3 success: '{original_filename}'")
956
-
957
- print(f"DEBUG: Final original filename set to: '{original_filename}'")
958
-
959
- # Generate BLIP name
960
- blip_generated_name = ""
961
- if use_blip:
962
- print("DEBUG: Starting optimized BLIP processing...")
963
- frame = get_middle_frame_for_blip(video_file, target_size=480)
964
- blip_generated_name = generate_blip_name(frame)
965
- print(f"DEBUG: BLIP name generated: '{blip_generated_name}'")
966
-
967
- # Generate thumbnail preview
968
- thumbnail = update_thumbnail_preview(use_blip, False) # No logo for now
969
-
970
- # Return appropriate name based on BLIP setting
971
- final_name = blip_generated_name if (use_blip and blip_generated_name) else original_filename
972
- print(f"DEBUG: Final name returned: '{final_name}' (BLIP: {use_blip})")
973
- print(f"DEBUG: Returning - filename: '{final_name}', clear URL: '', status: 'success'")
974
- return final_name, "", "Video uploaded successfully!", thumbnail
975
-
976
- except Exception as e:
977
- error_msg = f"Upload processing failed: {str(e)}"
978
- print(f"DEBUG ERROR: {error_msg}")
979
- return "uploaded_video", gr.update(), error_msg, None
980
 
981
  def on_video_url_change(url, use_blip):
982
- """Handle URL input change with support for MJ and Civitai - NO early thumbnail generation"""
983
  global current_video_file, current_video_url, blip_generated_name, original_filename
984
 
985
  if not url or url.strip() == "":
986
  # WICHTIG: Nur State löschen wenn wir kein Upload-Video haben!
987
- # Sonst würde Upload → URL clear → Video verschwinden
988
  if current_video_file is None:
989
  current_video_url = None
990
  blip_generated_name = ""
@@ -1043,10 +794,10 @@ def on_video_url_change(url, use_blip):
1043
  print(f"BLIP naming failed: {e}")
1044
  blip_generated_name = ""
1045
 
1046
- # Return appropriate name - NO thumbnail preview yet
1047
  final_name = blip_generated_name if (use_blip and blip_generated_name) else original_filename
1048
  success_msg = f"✅ {source.title()} video downloaded successfully!"
1049
- print(f"DEBUG: {source.title()} final name returned: '{final_name}' (BLIP: {use_blip}) - NO thumbnail yet")
1050
  return video_path, final_name, success_msg
1051
 
1052
  except Exception as e:
@@ -1132,7 +883,7 @@ with gr.Blocks(analytics_enabled=False, title="Video Depth Anything") as demo:
1132
  show_label=True
1133
  )
1134
 
1135
- # Single row with all input controls and thumbnail preview - FIXED
1136
  with gr.Row():
1137
  video_url = gr.Textbox(
1138
  label="Video URL (MJ, Civitai, or Kling)",
@@ -1165,7 +916,7 @@ with gr.Blocks(analytics_enabled=False, title="Video Depth Anything") as demo:
1165
  scale=1
1166
  )
1167
 
1168
- # Event handlers for input changes - SIMPLIFIED (no thumbnail previews during input)
1169
  video_url.change(
1170
  fn=on_video_url_change,
1171
  inputs=[video_url, use_blip],
@@ -1241,7 +992,7 @@ with gr.Blocks(analytics_enabled=False, title="Video Depth Anything") as demo:
1241
 
1242
  run_btn = gr.Button("Generate Depth Video", variant="primary", size="lg")
1243
 
1244
- # Main processing event - UPDATED to include thumbnail generation and preview
1245
  run_btn.click(
1246
  fn=infer_video_depth_from_source,
1247
  inputs=[
 
125
  print(f"BLIP error: {e}")
126
  return "video"
127
 
128
+ # --- 🎨 NEW: Simple Thumbnail Generation Function ---
129
 
130
+ def create_simple_gradient_thumbnail(rgb_frame, depth_frame):
131
  """
132
+ Erstellt Gradient-Thumbnail aus bereits perfekt passenden RGB und Depth Frames
133
 
134
  Args:
135
+ rgb_frame: Original RGB Frame (volle Auflösung)
136
+ depth_frame: Depth Frame (bereits auf RGB-Größe angepasst und verarbeitet)
 
 
137
 
138
  Returns:
139
+ np.array: Thumbnail mit RGB→Depth Gradient
140
  """
141
  try:
142
+ print(f"DEBUG: Creating simple gradient - RGB: {rgb_frame.shape}, Depth: {depth_frame.shape}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
 
144
+ # 1. Skaliere auf Thumbnail-Größe (beide Frames gleichzeitig)
145
+ target_size = 1024
146
+ h, w = rgb_frame.shape[:2]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
+ if max(h, w) > target_size:
149
+ scale = target_size / max(h, w)
150
+ new_h, new_w = int(h * scale), int(w * scale)
 
 
 
 
 
 
 
 
151
 
152
+ rgb_thumb = cv2.resize(rgb_frame, (new_w, new_h))
153
+ depth_thumb = cv2.resize(depth_frame, (new_w, new_h))
154
+ else:
155
+ rgb_thumb = rgb_frame.copy()
156
+ depth_thumb = depth_frame.copy()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
 
158
+ print(f"DEBUG: Thumbnail size - RGB: {rgb_thumb.shape}, Depth: {depth_thumb.shape}")
159
+ print(f"DEBUG: Depth range after resize: {depth_thumb.min()} - {depth_thumb.max()}")
160
 
161
+ # 2. Erstelle Gradient-Bereiche
162
+ height = rgb_thumb.shape[0]
163
  rgb_end = int(height * 0.60) # 60% RGB
164
+ gradient_start = rgb_end # Gradient startet bei 60%
165
+ gradient_end = int(height * 0.80) # Gradient endet bei 80%
 
 
166
 
167
+ print(f"DEBUG: Zones - RGB: 0-{rgb_end}, Gradient: {gradient_start}-{gradient_end}, Depth: {gradient_end}-{height}")
 
168
 
169
+ # 3. Baue Thumbnail zusammen
170
+ result = rgb_thumb.copy()
171
 
172
+ # 60-80%: Smooth Gradient
173
  for y in range(gradient_start, min(gradient_end, height)):
174
+ # Gradient-Faktor: 0.0 (RGB) → 1.0 (Depth)
175
  factor = (y - gradient_start) / max(1, (gradient_end - gradient_start))
176
 
177
+ # Smooth Transition (Sinus-Kurve für weichen Übergang)
178
  smooth_factor = 0.5 * (1 - np.cos(factor * np.pi))
179
 
180
+ # Blende RGB und Depth
181
+ rgb_line = rgb_thumb[y].astype(np.float32)
182
+ depth_line = depth_thumb[y].astype(np.float32)
183
 
184
  blended = (1 - smooth_factor) * rgb_line + smooth_factor * depth_line
185
  result[y] = blended.astype(np.uint8)
186
 
187
+ # 80-100%: Pure Depth
188
  if gradient_end < height:
189
+ result[gradient_end:] = depth_thumb[gradient_end:]
190
 
191
+ # Füge "D" Logo hinzu
192
  result = add_depth_logo(result)
193
 
194
+ print(f"DEBUG: Simple gradient thumbnail completed: {result.shape}")
195
  return result
196
 
197
  except Exception as e:
198
+ print(f"ERROR: Simple gradient failed: {e}")
199
  import traceback
200
  traceback.print_exc()
201
+ # Fallback: RGB mit Logo
202
  try:
203
  return add_depth_logo(rgb_frame)
204
  except:
205
  return rgb_frame
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
 
207
  def add_depth_logo(thumbnail, position="bottom-right"):
208
  """Adds improved 'D' logo to thumbnail for depth indication"""
 
397
  if '.' in filename_part:
398
  temp_path = f"temp_civitai_{filename_part}"
399
  else:
400
+ import time
401
  temp_path = f"temp_civitai_{int(time.time())}.webm"
402
  else:
403
+ import time
404
  temp_path = f"temp_civitai_{int(time.time())}.webm"
405
  except:
406
  import time
 
498
  blip_generated_name = ""
499
  original_filename = ""
500
 
501
+ # --- MAIN INFERENCE FUNCTION - FINAL FIX ---
502
  def infer_video_depth_from_source(upload_video, video_url, filename, use_blip, create_thumbnail, *args):
503
+ """Process video to generate depth maps and RGBD output - FINAL THUMBNAIL FIX"""
504
  try:
505
  max_len, target_fps, max_res, stitch, grayscale, convert_from_color, blur = args
506
 
 
509
  if not input_path:
510
  return None, None, "Error: No video source provided", None
511
 
512
+ # Fix filename at generation time
513
  base_name = filename.strip().replace(" ", "_")[:30] if filename.strip() else "output"
 
514
  print(f"DEBUG: Final filename locked in: '{base_name}'")
515
 
516
  # Create output directory
517
  output_dir = "./outputs"
518
  os.makedirs(output_dir, exist_ok=True)
519
 
520
+ # Use final names
521
  vis_video_path = os.path.join(output_dir, base_name + "_vis.mp4")
522
  rgbd_video_path = os.path.join(output_dir, base_name + "_RGBD.mp4")
523
 
 
534
  depths, fps = video_depth_anything.infer_video_depth(frames, target_fps, input_size=518, device=DEVICE)
535
  print("✅ Depth maps generated successfully")
536
 
537
+ # Save depth visualization
538
  save_video(depths, vis_video_path, fps=fps, is_depths=True)
539
 
540
  rgbd_path = None
541
+ thumbnail = None
542
+
543
  if stitch:
544
  print("Creating RGBD stitched video...")
545
  # Read full resolution frames for stitching
 
575
  depth_resized = cv2.resize(depth_vis, (rgb.shape[1], rgb.shape[0]))
576
  stitched = cv2.hconcat([rgb, depth_resized])
577
  stitched_frames.append(stitched)
578
+
579
+ # 🎯 FINAL FIX: Nutze erstes Frame-Pair für Thumbnail (bereits perfekt passend)
580
+ if i == 0 and create_thumbnail:
581
+ print("Creating thumbnail from first perfectly matched RGB+Depth pair...")
582
+ try:
583
+ print(f"DEBUG: Using RGB: {rgb.shape}, Depth: {depth_resized.shape}")
584
+ print(f"DEBUG: Depth range: {depth_resized.min()} - {depth_resized.max()}")
585
+
586
+ # Erstelle Thumbnail mit den bereits perfekt passenden Frames
587
+ thumbnail = create_simple_gradient_thumbnail(rgb, depth_resized)
588
+
589
+ print("✅ Thumbnail created from first RGBD pair")
590
+
591
+ except Exception as e:
592
+ print(f"❌ Thumbnail creation failed: {e}")
593
+ import traceback
594
+ traceback.print_exc()
595
+ thumbnail = None
596
 
597
+ # Save stitched video
598
  save_video(np.array(stitched_frames), rgbd_video_path, fps=fps)
599
  print("✅ RGBD video created successfully")
600
 
601
+ # Embed thumbnail in videos if created
602
+ if create_thumbnail and thumbnail is not None:
603
+ embed_thumbnail_in_video(vis_video_path, thumbnail, base_name)
604
+ embed_thumbnail_in_video(rgbd_video_path, thumbnail, base_name)
605
+ print("✅ Thumbnail embedded in videos")
606
+ elif create_thumbnail:
607
+ print("❌ No thumbnail to embed")
608
+
609
  # Add audio from original video if possible
610
  try:
611
  temp_audio_path = rgbd_video_path.replace('.mp4', '_audio.mp4')
 
622
  print(f"Audio processing failed: {e}")
623
  rgbd_path = rgbd_video_path
624
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
625
  else:
626
+ # If no RGBD stitching, fallback to old thumbnail method
627
+ if create_thumbnail:
628
+ print("Creating fallback thumbnail (no RGBD stitching)...")
629
+ try:
630
+ rgb_frame = get_first_frame_for_blip(input_path, target_size=1024)
631
+ if rgb_frame is not None:
632
+ # Simple RGB thumbnail with logo
633
+ thumbnail = add_depth_logo(rgb_frame)
634
+ embed_thumbnail_in_video(vis_video_path, thumbnail, base_name)
635
+ print("✅ Fallback RGB thumbnail created and embedded")
636
+ else:
637
+ print("❌ Could not create fallback thumbnail")
638
+ thumbnail = None
639
+ except Exception as e:
640
+ print(f"❌ Fallback thumbnail creation failed: {e}")
641
+ thumbnail = None
642
 
643
  # Clean up memory
644
  gc.collect()
 
661
 
662
  # --- UI event handlers ---
663
  def on_video_upload_change(video_file, use_blip):
664
+ """Handle video upload and store video info for toggling"""
665
  global current_video_file, blip_generated_name, original_filename, current_video_url
666
 
667
  print(f"DEBUG: Upload handler called with video_file: {video_file}")
 
720
  blip_generated_name = generate_blip_name(frame)
721
  print(f"DEBUG: BLIP name generated: '{blip_generated_name}'")
722
 
723
+ # Return appropriate name based on BLIP setting
724
  final_name = blip_generated_name if (use_blip and blip_generated_name) else original_filename
725
  print(f"DEBUG: Final name returned: '{final_name}' (BLIP: {use_blip})")
 
726
  return final_name, "", "Video uploaded successfully!"
727
 
728
  except Exception as e:
729
  error_msg = f"Upload processing failed: {str(e)}"
730
  print(f"DEBUG ERROR: {error_msg}")
731
  return "uploaded_video", gr.update(), error_msg
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
732
 
733
  def on_video_url_change(url, use_blip):
734
+ """Handle URL input change with support for MJ and Civitai"""
735
  global current_video_file, current_video_url, blip_generated_name, original_filename
736
 
737
  if not url or url.strip() == "":
738
  # WICHTIG: Nur State löschen wenn wir kein Upload-Video haben!
 
739
  if current_video_file is None:
740
  current_video_url = None
741
  blip_generated_name = ""
 
794
  print(f"BLIP naming failed: {e}")
795
  blip_generated_name = ""
796
 
797
+ # Return appropriate name
798
  final_name = blip_generated_name if (use_blip and blip_generated_name) else original_filename
799
  success_msg = f"✅ {source.title()} video downloaded successfully!"
800
+ print(f"DEBUG: {source.title()} final name returned: '{final_name}' (BLIP: {use_blip})")
801
  return video_path, final_name, success_msg
802
 
803
  except Exception as e:
 
883
  show_label=True
884
  )
885
 
886
+ # Single row with all input controls and thumbnail preview
887
  with gr.Row():
888
  video_url = gr.Textbox(
889
  label="Video URL (MJ, Civitai, or Kling)",
 
916
  scale=1
917
  )
918
 
919
+ # Event handlers for input changes
920
  video_url.change(
921
  fn=on_video_url_change,
922
  inputs=[video_url, use_blip],
 
992
 
993
  run_btn = gr.Button("Generate Depth Video", variant="primary", size="lg")
994
 
995
+ # Main processing event
996
  run_btn.click(
997
  fn=infer_video_depth_from_source,
998
  inputs=[