Spaces:

Krokodilpirat
/

Video-Depth-Anything_RGBD_Zero

Running on Zero

App Files Files Community

Krokodilpirat commited on Jun 28, 2025

Commit

8f4905c

verified ·

1 Parent(s): 26c28ee

Update app.py

Browse files

Files changed (1) hide show

app.py +99 -348

app.py CHANGED Viewed

@@ -125,279 +125,84 @@ def generate_blip_name(frame: np.ndarray) -> str:
         print(f"BLIP error: {e}")
         return "video"
-# --- 🎨 NEW: Thumbnail Generation Functions ---
-def create_gradient_thumbnail(rgb_frame, depth_frame, longest_side=1024, add_logo=False):
     """
-    Creates thumbnail with vertical RGB→Depth gradient from real depth map (fallback method)
     Args:
-        rgb_frame: Original RGB Frame
-        depth_frame: Real Depth Map from Video Depth Anything Model
-        longest_side: Max size (1024px)
-        add_logo: Add depth logo (always True)
     Returns:
-        np.array: Thumbnail with RGB→Depth gradient
     """
     try:
-        print(f"DEBUG: Creating gradient thumbnail from real depth - RGB: {rgb_frame.shape}, Depth: {depth_frame.shape}")
-        # 1. Ensure RGB frame is properly sized first
-        rgb_h, rgb_w = rgb_frame.shape[:2]
-        if max(rgb_h, rgb_w) > longest_side:
-            scale = longest_side / max(rgb_h, rgb_w)
-            new_h, new_w = int(rgb_h * scale), int(rgb_w * scale)
-            rgb_resized = cv2.resize(rgb_frame, (new_w, new_h))
-        else:
-            rgb_resized = rgb_frame.copy()
-        print(f"DEBUG: RGB resized to: {rgb_resized.shape}")
-        # 2. Process depth map safely
-        if len(depth_frame.shape) == 2:
-            # Depth is grayscale (2D)
-            depth_gray = depth_frame
-        elif len(depth_frame.shape) == 3:
-            # Depth is 3-channel, convert to grayscale
-            if depth_frame.shape[2] == 3:
-                depth_gray = cv2.cvtColor(depth_frame, cv2.COLOR_RGB2GRAY)
-            else:
-                depth_gray = depth_frame[:, :, 0]  # Take first channel
-        else:
-            raise ValueError(f"Unexpected depth shape: {depth_frame.shape}")
-        print(f"DEBUG: Depth processed to grayscale: {depth_gray.shape}")
-        # 3. CRITICAL: Resize depth to match RGB dimensions exactly
-        target_h, target_w = rgb_resized.shape[:2]
-        # Ensure depth_gray is valid for resizing
-        if depth_gray.size == 0 or depth_gray.dtype not in [np.uint8, np.float32, np.float64]:
-            print(f"DEBUG: Invalid depth data, creating fallback")
-            depth_resized = np.zeros((target_h, target_w), dtype=np.uint8)
-        else:
-            try:
-                depth_resized = cv2.resize(depth_gray, (target_w, target_h), interpolation=cv2.INTER_LINEAR)
-            except Exception as resize_error:
-                print(f"DEBUG: Resize failed: {resize_error}, creating fallback")
-                depth_resized = np.zeros((target_h, target_w), dtype=np.uint8)
-        print(f"DEBUG: Depth resized to match RGB: {depth_resized.shape}")
-        # 4. Normalize depth to 0-255 range
-        if depth_resized.max() > depth_resized.min():
-            depth_norm = ((depth_resized - depth_resized.min()) / (depth_resized.max() - depth_resized.min()) * 255).astype(np.uint8)
-        else:
-            depth_norm = np.zeros_like(depth_resized, dtype=np.uint8)
-        # 5. Convert to 3-channel for blending
-        depth_3ch = np.stack([depth_norm] * 3, axis=-1)
-        print(f"DEBUG: Final processing - RGB: {rgb_resized.shape}, Depth: {depth_3ch.shape}")
-        # 6. Create gradient zones
-        height = rgb_resized.shape[0]
-        rgb_end = int(height * 0.60)      # 60% RGB
-        gradient_start = rgb_end          # Gradient starts at 60%
-        gradient_end = int(height * 0.80) # Gradient ends at 80%
-        print(f"DEBUG: Gradient zones - RGB: 0-{rgb_end}, Gradient: {gradient_start}-{gradient_end}, Depth: {gradient_end}-{height}")
-        # 7. Assemble thumbnail
-        result = np.zeros_like(rgb_resized)
-        # Top 60%: Pure RGB
-        result[:rgb_end] = rgb_resized[:rgb_end]
-        # 60-80%: Smooth gradient
-        for y in range(gradient_start, min(gradient_end, height)):
-            # Gradient factor: 0.0 (RGB) → 1.0 (Depth)
-            factor = (y - gradient_start) / max(1, (gradient_end - gradient_start))
-            # Smooth transition (sine curve)
-            smooth_factor = 0.5 * (1 - np.cos(factor * np.pi))
-            # Blend RGB and real depth
-            rgb_line = rgb_resized[y].astype(np.float32)
-            depth_line = depth_3ch[y].astype(np.float32)
-            blended = (1 - smooth_factor) * rgb_line + smooth_factor * depth_line
-            result[y] = blended.astype(np.uint8)
-        # Bottom 20%: Pure Depth
-        if gradient_end < height:
-            result[gradient_end:] = depth_3ch[gradient_end:]
-        # 8. Always add depth logo
-        result = add_depth_logo(result)
-        print(f"DEBUG: Real depth gradient thumbnail created successfully: {result.shape}")
-        return result
-    except Exception as e:
-        print(f"DEBUG: Gradient thumbnail creation failed: {e}")
-        import traceback
-        traceback.print_exc()
-        # Fallback: return RGB frame with logo
-        try:
-            fallback = add_depth_logo(rgb_frame)
-            print("DEBUG: Returned fallback RGB with logo")
-            return fallback
-        except:
-            print("DEBUG: Complete fallback - returning original RGB")
-            return rgb_frame
-def create_gradient_thumbnail_simple(rgb_frame, depth_frame):
-    """
-    Creates gradient thumbnail from perfectly matched RGB and depth frames (from RGBD processing)
-    Args:
-        rgb_frame: RGB frame (already processed and sized)
-        depth_frame: Depth frame (already processed to match RGB exactly)
-    Returns:
-        np.array: Thumbnail with RGB→Depth gradient and "D" logo
-    """
-    try:
-        print(f"DEBUG: Creating simple gradient thumbnail - RGB: {rgb_frame.shape}, Depth: {depth_frame.shape}")
-        # Frames are already perfectly matched - no resizing needed!
-        height = rgb_frame.shape[0]
-        # Create gradient zones
         rgb_end = int(height * 0.60)      # 60% RGB
-        gradient_start = rgb_end          # Gradient starts at 60%
-        gradient_end = int(height * 0.80) # Gradient ends at 80%
-        print(f"DEBUG: Simple gradient zones - RGB: 0-{rgb_end}, Gradient: {gradient_start}-{gradient_end}, Depth: {gradient_end}-{height}")
-        # Assemble thumbnail
-        result = np.zeros_like(rgb_frame)
-        # Top 60%: Pure RGB
-        result[:rgb_end] = rgb_frame[:rgb_end]
-        # 60-80%: Smooth gradient
         for y in range(gradient_start, min(gradient_end, height)):
-            # Gradient factor: 0.0 (RGB) → 1.0 (Depth)
             factor = (y - gradient_start) / max(1, (gradient_end - gradient_start))
-            # Smooth transition (sine curve)
             smooth_factor = 0.5 * (1 - np.cos(factor * np.pi))
-            # Blend RGB and depth (both are already 3-channel and matched)
-            rgb_line = rgb_frame[y].astype(np.float32)
-            depth_line = depth_frame[y].astype(np.float32)
             blended = (1 - smooth_factor) * rgb_line + smooth_factor * depth_line
             result[y] = blended.astype(np.uint8)
-        # Bottom 20%: Pure Depth
         if gradient_end < height:
-            result[gradient_end:] = depth_frame[gradient_end:]
-        # Always add depth logo
         result = add_depth_logo(result)
-        print(f"DEBUG: Simple gradient thumbnail created successfully: {result.shape}")
         return result
     except Exception as e:
-        print(f"DEBUG: Simple gradient thumbnail creation failed: {e}")
         import traceback
         traceback.print_exc()
-        # Fallback: return RGB frame with logo
         try:
             return add_depth_logo(rgb_frame)
         except:
             return rgb_frame
-    """
-    Erstellt Thumbnail mit vertikalem RGB→Depth Verlauf aus echter Depth-Map
-    Args:
-        rgb_frame: Original RGB Frame
-        depth_frame: Echte Depth Map vom Video Depth Anything Model
-        longest_side: Max Größe (1024px)
-        add_logo: Looking Glass Logo hinzufügen
-    Returns:
-        np.array: Thumbnail mit RGB→Depth Verlauf
-    """
-    try:
-        print(f"DEBUG: Creating gradient thumbnail from real depth - RGB: {rgb_frame.shape}, Depth: {depth_frame.shape}")
-        # 1. Frames auf gleiche Größe bringen und auf target size skalieren
-        h, w = rgb_frame.shape[:2]
-        if max(h, w) > longest_side:
-            scale = longest_side / max(h, w)
-            new_h, new_w = int(h * scale), int(w * scale)
-            rgb_resized = cv2.resize(rgb_frame, (new_w, new_h))
-        else:
-            rgb_resized = rgb_frame.copy()
-        # 2. Depth Map verarbeiten (depth_frame kommt als Grayscale Array vom Model)
-        if len(depth_frame.shape) == 2:
-            # Depth ist bereits Grayscale
-            depth_gray = depth_frame
-        else:
-            # Falls depth als 3-channel kommt
-            depth_gray = cv2.cvtColor(depth_frame, cv2.COLOR_BGR2GRAY)
-        # Depth auf RGB-Frame-Größe bringen
-        depth_resized = cv2.resize(depth_gray, (rgb_resized.shape[1], rgb_resized.shape[0]))
-        # Depth normalisieren (0-255)
-        depth_norm = ((depth_resized - depth_resized.min()) / (depth_resized.max() - depth_resized.min()) * 255).astype(np.uint8)
-        # Zu 3-channel konvertieren
-        depth_3ch = np.stack([depth_norm] * 3, axis=-1)
-        print(f"DEBUG: Processed frames - RGB: {rgb_resized.shape}, Depth: {depth_3ch.shape}")
-        # 3. Verlauf-Parameter (prozentual)
-        height = rgb_resized.shape[0]
-        rgb_end = int(height * 0.60)      # 60% RGB
-        gradient_start = rgb_end          # Verlauf startet bei 60%
-        gradient_end = int(height * 0.80) # Verlauf endet bei 80%
-        print(f"DEBUG: Gradient zones - RGB: 0-{rgb_end}, Gradient: {gradient_start}-{gradient_end}, Depth: {gradient_end}-{height}")
-        # 4. Thumbnail zusammensetzen
-        result = np.zeros_like(rgb_resized)
-        # Obere 60%: Pure RGB
-        result[:rgb_end] = rgb_resized[:rgb_end]
-        # 60-80%: Prozeduraler Verlauf
-        for y in range(gradient_start, min(gradient_end, height)):
-            # Verlauf-Factor: 0.0 (RGB) → 1.0 (Depth)
-            factor = (y - gradient_start) / max(1, (gradient_end - gradient_start))
-            # Smooth Transition (sine curve for smoother blend)
-            smooth_factor = 0.5 * (1 - np.cos(factor * np.pi))
-            # Blend RGB und echte Depth
-            rgb_line = rgb_resized[y].astype(np.float32)
-            depth_line = depth_3ch[y].astype(np.float32)
-            blended = (1 - smooth_factor) * rgb_line + smooth_factor * depth_line
-            result[y] = blended.astype(np.uint8)
-        # Untere 20%: Pure Depth
-        if gradient_end < height:
-            result[gradient_end:] = depth_3ch[gradient_end:]
-        # 5. Immer "D" Logo hinzufügen
-        result = add_depth_logo(result)
-        print(f"DEBUG: Real depth gradient thumbnail created successfully: {result.shape}")
-        return result
-    except Exception as e:
-        print(f"DEBUG: Gradient thumbnail creation failed: {e}")
-        # Fallback: return original RGB frame
-        return rgb_frame
 def add_depth_logo(thumbnail, position="bottom-right"):
     """Adds improved 'D' logo to thumbnail for depth indication"""
@@ -592,8 +397,10 @@ def download_civitai_video(civitai_url):
                 if '.' in filename_part:
                     temp_path = f"temp_civitai_{filename_part}"
                 else:
                     temp_path = f"temp_civitai_{int(time.time())}.webm"
             else:
                 temp_path = f"temp_civitai_{int(time.time())}.webm"
         except:
             import time
@@ -691,9 +498,9 @@ current_video_url = None
 blip_generated_name = ""
 original_filename = ""
-# --- Main inference function ---
 def infer_video_depth_from_source(upload_video, video_url, filename, use_blip, create_thumbnail, *args):
-    """Process video to generate depth maps and RGBD output"""
     try:
         max_len, target_fps, max_res, stitch, grayscale, convert_from_color, blur = args
@@ -702,16 +509,15 @@ def infer_video_depth_from_source(upload_video, video_url, filename, use_blip, c
         if not input_path:
             return None, None, "Error: No video source provided", None
-        # Fix filename at generation time (no more changing after this point)
         base_name = filename.strip().replace(" ", "_")[:30] if filename.strip() else "output"
         print(f"DEBUG: Final filename locked in: '{base_name}'")
         # Create output directory
         output_dir = "./outputs"
         os.makedirs(output_dir, exist_ok=True)
-        # Use final names (not temp names!)
         vis_video_path = os.path.join(output_dir, base_name + "_vis.mp4")
         rgbd_video_path = os.path.join(output_dir, base_name + "_RGBD.mp4")
@@ -728,10 +534,12 @@ def infer_video_depth_from_source(upload_video, video_url, filename, use_blip, c
         depths, fps = video_depth_anything.infer_video_depth(frames, target_fps, input_size=518, device=DEVICE)
         print("✅ Depth maps generated successfully")
-        # Save depth visualization with final name
         save_video(depths, vis_video_path, fps=fps, is_depths=True)
         rgbd_path = None
         if stitch:
             print("Creating RGBD stitched video...")
             # Read full resolution frames for stitching
@@ -767,11 +575,37 @@ def infer_video_depth_from_source(upload_video, video_url, filename, use_blip, c
                 depth_resized = cv2.resize(depth_vis, (rgb.shape[1], rgb.shape[0]))
                 stitched = cv2.hconcat([rgb, depth_resized])
                 stitched_frames.append(stitched)
-            # Save stitched video with final name
             save_video(np.array(stitched_frames), rgbd_video_path, fps=fps)
             print("✅ RGBD video created successfully")
             # Add audio from original video if possible
             try:
                 temp_audio_path = rgbd_video_path.replace('.mp4', '_audio.mp4')
@@ -788,36 +622,23 @@ def infer_video_depth_from_source(upload_video, video_url, filename, use_blip, c
                 print(f"Audio processing failed: {e}")
                 rgbd_path = rgbd_video_path
-        # 🎯 Thumbnail-Generierung (wenn aktiviert)
-        thumbnail = None
-        if create_thumbnail:
-            print("Creating thumbnail from completed depth data...")
-            try:
-                # Erstes Frame für Thumbnail (konsistent mit BLIP)
-                rgb_frame = get_first_frame_for_blip(input_path, target_size=1024)
-                # Erstes Depth-Frame (korrespondierend zum RGB-Frame)
-                first_depth_frame = depths[0]  # Erstes Depth vom AI-Model!
-                # Gradient-Thumbnail mit echter Depth erstellen (mit "D" Logo)
-                thumbnail = create_gradient_thumbnail(
-                    rgb_frame,
-                    first_depth_frame,
-                    longest_side=1024,
-                    add_logo=True  # Immer Logo hinzufügen
-                )
-                # Thumbnail in beide Videos einbetten
-                embed_thumbnail_in_video(vis_video_path, thumbnail, base_name)
-                if rgbd_path:
-                    embed_thumbnail_in_video(rgbd_path, thumbnail, base_name)
-                print("✅ Thumbnail created and embedded successfully")
-            except Exception as e:
-                print(f"❌ Thumbnail creation failed: {e}")
-                thumbnail = None
         else:
-            print("📷 Thumbnail generation skipped (disabled)")
         # Clean up memory
         gc.collect()
@@ -840,7 +661,7 @@ def infer_video_depth_from_source(upload_video, video_url, filename, use_blip, c
 # --- UI event handlers ---
 def on_video_upload_change(video_file, use_blip):
-    """Handle video upload and store video info for toggling - NO early thumbnail generation"""
     global current_video_file, blip_generated_name, original_filename, current_video_url
     print(f"DEBUG: Upload handler called with video_file: {video_file}")
@@ -899,92 +720,22 @@ def on_video_upload_change(video_file, use_blip):
             blip_generated_name = generate_blip_name(frame)
             print(f"DEBUG: BLIP name generated: '{blip_generated_name}'")
-        # Return appropriate name based on BLIP setting - NO thumbnail preview yet
         final_name = blip_generated_name if (use_blip and blip_generated_name) else original_filename
         print(f"DEBUG: Final name returned: '{final_name}' (BLIP: {use_blip})")
-        print(f"DEBUG: Returning - filename: '{final_name}', clear URL: '', status: 'success' - NO thumbnail yet")
         return final_name, "", "Video uploaded successfully!"
     except Exception as e:
         error_msg = f"Upload processing failed: {str(e)}"
         print(f"DEBUG ERROR: {error_msg}")
         return "uploaded_video", gr.update(), error_msg
-    if not video_file:
-        print("DEBUG: No video file - clearing state")
-        current_video_file = None
-        blip_generated_name = ""
-        original_filename = ""
-        return "", gr.update(), "Upload a video file", None  # Clear thumbnail too
-    try:
-        # Store the current video
-        current_video_file = video_file
-        current_video_url = None  # Clear URL when uploading file
-        print(f"DEBUG: Processing upload - video_file type: {type(video_file)}")
-        # Generate original filename FIRST - try multiple ways
-        original_filename = "uploaded_video"  # Default fallback
-        # Method 1: Check .name attribute
-        if hasattr(video_file, 'name') and video_file.name:
-            print(f"DEBUG: video_file.name = '{video_file.name}'")
-            original_name = os.path.splitext(os.path.basename(video_file.name))[0]
-            cleaned = "".join(c for c in original_name if c.isalnum() or c in "_-")[:30]
-            if cleaned:
-                original_filename = cleaned
-                print(f"DEBUG: Method 1 success: '{original_filename}'")
-        # Method 2: Check .orig_name attribute (Gradio sometimes uses this)
-        elif hasattr(video_file, 'orig_name') and video_file.orig_name:
-            print(f"DEBUG: video_file.orig_name = '{video_file.orig_name}'")
-            original_name = os.path.splitext(os.path.basename(video_file.orig_name))[0]
-            cleaned = "".join(c for c in original_name if c.isalnum() or c in "_-")[:30]
-            if cleaned:
-                original_filename = cleaned
-                print(f"DEBUG: Method 2 success: '{original_filename}'")
-        # Method 3: Try to get filename from the file path itself
-        elif isinstance(video_file, str):
-            print(f"DEBUG: video_file is string: '{video_file}'")
-            original_name = os.path.splitext(os.path.basename(video_file))[0]
-            cleaned = "".join(c for c in original_name if c.isalnum() or c in "_-")[:30]
-            if cleaned:
-                original_filename = cleaned
-                print(f"DEBUG: Method 3 success: '{original_filename}'")
-        print(f"DEBUG: Final original filename set to: '{original_filename}'")
-        # Generate BLIP name
-        blip_generated_name = ""
-        if use_blip:
-            print("DEBUG: Starting optimized BLIP processing...")
-            frame = get_middle_frame_for_blip(video_file, target_size=480)
-            blip_generated_name = generate_blip_name(frame)
-            print(f"DEBUG: BLIP name generated: '{blip_generated_name}'")
-        # Generate thumbnail preview
-        thumbnail = update_thumbnail_preview(use_blip, False)  # No logo for now
-        # Return appropriate name based on BLIP setting
-        final_name = blip_generated_name if (use_blip and blip_generated_name) else original_filename
-        print(f"DEBUG: Final name returned: '{final_name}' (BLIP: {use_blip})")
-        print(f"DEBUG: Returning - filename: '{final_name}', clear URL: '', status: 'success'")
-        return final_name, "", "Video uploaded successfully!", thumbnail
-    except Exception as e:
-        error_msg = f"Upload processing failed: {str(e)}"
-        print(f"DEBUG ERROR: {error_msg}")
-        return "uploaded_video", gr.update(), error_msg, None
 def on_video_url_change(url, use_blip):
-    """Handle URL input change with support for MJ and Civitai - NO early thumbnail generation"""
     global current_video_file, current_video_url, blip_generated_name, original_filename
     if not url or url.strip() == "":
         # WICHTIG: Nur State löschen wenn wir kein Upload-Video haben!
-        # Sonst würde Upload → URL clear → Video verschwinden
         if current_video_file is None:
             current_video_url = None
             blip_generated_name = ""
@@ -1043,10 +794,10 @@ def on_video_url_change(url, use_blip):
                 print(f"BLIP naming failed: {e}")
                 blip_generated_name = ""
-        # Return appropriate name - NO thumbnail preview yet
         final_name = blip_generated_name if (use_blip and blip_generated_name) else original_filename
         success_msg = f"✅ {source.title()} video downloaded successfully!"
-        print(f"DEBUG: {source.title()} final name returned: '{final_name}' (BLIP: {use_blip}) - NO thumbnail yet")
         return video_path, final_name, success_msg
     except Exception as e:
@@ -1132,7 +883,7 @@ with gr.Blocks(analytics_enabled=False, title="Video Depth Anything") as demo:
                 show_label=True
             )
-    # Single row with all input controls and thumbnail preview - FIXED
     with gr.Row():
         video_url = gr.Textbox(
             label="Video URL (MJ, Civitai, or Kling)",
@@ -1165,7 +916,7 @@ with gr.Blocks(analytics_enabled=False, title="Video Depth Anything") as demo:
             scale=1
         )
-    # Event handlers for input changes - SIMPLIFIED (no thumbnail previews during input)
     video_url.change(
         fn=on_video_url_change,
         inputs=[video_url, use_blip],
@@ -1241,7 +992,7 @@ with gr.Blocks(analytics_enabled=False, title="Video Depth Anything") as demo:
     run_btn = gr.Button("Generate Depth Video", variant="primary", size="lg")
-    # Main processing event - UPDATED to include thumbnail generation and preview
     run_btn.click(
         fn=infer_video_depth_from_source,
         inputs=[

         print(f"BLIP error: {e}")
         return "video"
+# --- 🎨 NEW: Simple Thumbnail Generation Function ---
+def create_simple_gradient_thumbnail(rgb_frame, depth_frame):
     """
+    Erstellt Gradient-Thumbnail aus bereits perfekt passenden RGB und Depth Frames
     Args:
+        rgb_frame: Original RGB Frame (volle Auflösung)
+        depth_frame: Depth Frame (bereits auf RGB-Größe angepasst und verarbeitet)
     Returns:
+        np.array: Thumbnail mit RGB→Depth Gradient
     """
     try:
+        print(f"DEBUG: Creating simple gradient - RGB: {rgb_frame.shape}, Depth: {depth_frame.shape}")
+        # 1. Skaliere auf Thumbnail-Größe (beide Frames gleichzeitig)
+        target_size = 1024
+        h, w = rgb_frame.shape[:2]
+        if max(h, w) > target_size:
+            scale = target_size / max(h, w)
+            new_h, new_w = int(h * scale), int(w * scale)
+            rgb_thumb = cv2.resize(rgb_frame, (new_w, new_h))
+            depth_thumb = cv2.resize(depth_frame, (new_w, new_h))
+        else:
+            rgb_thumb = rgb_frame.copy()
+            depth_thumb = depth_frame.copy()
+        print(f"DEBUG: Thumbnail size - RGB: {rgb_thumb.shape}, Depth: {depth_thumb.shape}")
+        print(f"DEBUG: Depth range after resize: {depth_thumb.min()} - {depth_thumb.max()}")
+        # 2. Erstelle Gradient-Bereiche
+        height = rgb_thumb.shape[0]
         rgb_end = int(height * 0.60)      # 60% RGB
+        gradient_start = rgb_end          # Gradient startet bei 60%
+        gradient_end = int(height * 0.80) # Gradient endet bei 80%
+        print(f"DEBUG: Zones - RGB: 0-{rgb_end}, Gradient: {gradient_start}-{gradient_end}, Depth: {gradient_end}-{height}")
+        # 3. Baue Thumbnail zusammen
+        result = rgb_thumb.copy()
+        # 60-80%: Smooth Gradient
         for y in range(gradient_start, min(gradient_end, height)):
+            # Gradient-Faktor: 0.0 (RGB) → 1.0 (Depth)
             factor = (y - gradient_start) / max(1, (gradient_end - gradient_start))
+            # Smooth Transition (Sinus-Kurve für weichen Übergang)
             smooth_factor = 0.5 * (1 - np.cos(factor * np.pi))
+            # Blende RGB und Depth
+            rgb_line = rgb_thumb[y].astype(np.float32)
+            depth_line = depth_thumb[y].astype(np.float32)
             blended = (1 - smooth_factor) * rgb_line + smooth_factor * depth_line
             result[y] = blended.astype(np.uint8)
+        # 80-100%: Pure Depth
         if gradient_end < height:
+            result[gradient_end:] = depth_thumb[gradient_end:]
+        # Füge "D" Logo hinzu
         result = add_depth_logo(result)
+        print(f"DEBUG: Simple gradient thumbnail completed: {result.shape}")
         return result
     except Exception as e:
+        print(f"ERROR: Simple gradient failed: {e}")
         import traceback
         traceback.print_exc()
+        # Fallback: RGB mit Logo
         try:
             return add_depth_logo(rgb_frame)
         except:
             return rgb_frame
 def add_depth_logo(thumbnail, position="bottom-right"):
     """Adds improved 'D' logo to thumbnail for depth indication"""
                 if '.' in filename_part:
                     temp_path = f"temp_civitai_{filename_part}"
                 else:
+                    import time
                     temp_path = f"temp_civitai_{int(time.time())}.webm"
             else:
+                import time
                 temp_path = f"temp_civitai_{int(time.time())}.webm"
         except:
             import time
 blip_generated_name = ""
 original_filename = ""
+# --- MAIN INFERENCE FUNCTION - FINAL FIX ---
 def infer_video_depth_from_source(upload_video, video_url, filename, use_blip, create_thumbnail, *args):
+    """Process video to generate depth maps and RGBD output - FINAL THUMBNAIL FIX"""
     try:
         max_len, target_fps, max_res, stitch, grayscale, convert_from_color, blur = args
         if not input_path:
             return None, None, "Error: No video source provided", None
+        # Fix filename at generation time
         base_name = filename.strip().replace(" ", "_")[:30] if filename.strip() else "output"
         print(f"DEBUG: Final filename locked in: '{base_name}'")
         # Create output directory
         output_dir = "./outputs"
         os.makedirs(output_dir, exist_ok=True)
+        # Use final names
         vis_video_path = os.path.join(output_dir, base_name + "_vis.mp4")
         rgbd_video_path = os.path.join(output_dir, base_name + "_RGBD.mp4")
         depths, fps = video_depth_anything.infer_video_depth(frames, target_fps, input_size=518, device=DEVICE)
         print("✅ Depth maps generated successfully")
+        # Save depth visualization
         save_video(depths, vis_video_path, fps=fps, is_depths=True)
         rgbd_path = None
+        thumbnail = None
         if stitch:
             print("Creating RGBD stitched video...")
             # Read full resolution frames for stitching
                 depth_resized = cv2.resize(depth_vis, (rgb.shape[1], rgb.shape[0]))
                 stitched = cv2.hconcat([rgb, depth_resized])
                 stitched_frames.append(stitched)
+                # 🎯 FINAL FIX: Nutze erstes Frame-Pair für Thumbnail (bereits perfekt passend)
+                if i == 0 and create_thumbnail:
+                    print("Creating thumbnail from first perfectly matched RGB+Depth pair...")
+                    try:
+                        print(f"DEBUG: Using RGB: {rgb.shape}, Depth: {depth_resized.shape}")
+                        print(f"DEBUG: Depth range: {depth_resized.min()} - {depth_resized.max()}")
+                        # Erstelle Thumbnail mit den bereits perfekt passenden Frames
+                        thumbnail = create_simple_gradient_thumbnail(rgb, depth_resized)
+                        print("✅ Thumbnail created from first RGBD pair")
+                    except Exception as e:
+                        print(f"❌ Thumbnail creation failed: {e}")
+                        import traceback
+                        traceback.print_exc()
+                        thumbnail = None
+            # Save stitched video
             save_video(np.array(stitched_frames), rgbd_video_path, fps=fps)
             print("✅ RGBD video created successfully")
+            # Embed thumbnail in videos if created
+            if create_thumbnail and thumbnail is not None:
+                embed_thumbnail_in_video(vis_video_path, thumbnail, base_name)
+                embed_thumbnail_in_video(rgbd_video_path, thumbnail, base_name)
+                print("✅ Thumbnail embedded in videos")
+            elif create_thumbnail:
+                print("❌ No thumbnail to embed")
             # Add audio from original video if possible
             try:
                 temp_audio_path = rgbd_video_path.replace('.mp4', '_audio.mp4')
                 print(f"Audio processing failed: {e}")
                 rgbd_path = rgbd_video_path
         else:
+            # If no RGBD stitching, fallback to old thumbnail method
+            if create_thumbnail:
+                print("Creating fallback thumbnail (no RGBD stitching)...")
+                try:
+                    rgb_frame = get_first_frame_for_blip(input_path, target_size=1024)
+                    if rgb_frame is not None:
+                        # Simple RGB thumbnail with logo
+                        thumbnail = add_depth_logo(rgb_frame)
+                        embed_thumbnail_in_video(vis_video_path, thumbnail, base_name)
+                        print("✅ Fallback RGB thumbnail created and embedded")
+                    else:
+                        print("❌ Could not create fallback thumbnail")
+                        thumbnail = None
+                except Exception as e:
+                    print(f"❌ Fallback thumbnail creation failed: {e}")
+                    thumbnail = None
         # Clean up memory
         gc.collect()
 # --- UI event handlers ---
 def on_video_upload_change(video_file, use_blip):
+    """Handle video upload and store video info for toggling"""
     global current_video_file, blip_generated_name, original_filename, current_video_url
     print(f"DEBUG: Upload handler called with video_file: {video_file}")
             blip_generated_name = generate_blip_name(frame)
             print(f"DEBUG: BLIP name generated: '{blip_generated_name}'")
+        # Return appropriate name based on BLIP setting
         final_name = blip_generated_name if (use_blip and blip_generated_name) else original_filename
         print(f"DEBUG: Final name returned: '{final_name}' (BLIP: {use_blip})")
         return final_name, "", "Video uploaded successfully!"
     except Exception as e:
         error_msg = f"Upload processing failed: {str(e)}"
         print(f"DEBUG ERROR: {error_msg}")
         return "uploaded_video", gr.update(), error_msg
 def on_video_url_change(url, use_blip):
+    """Handle URL input change with support for MJ and Civitai"""
     global current_video_file, current_video_url, blip_generated_name, original_filename
     if not url or url.strip() == "":
         # WICHTIG: Nur State löschen wenn wir kein Upload-Video haben!
         if current_video_file is None:
             current_video_url = None
             blip_generated_name = ""
                 print(f"BLIP naming failed: {e}")
                 blip_generated_name = ""
+        # Return appropriate name
         final_name = blip_generated_name if (use_blip and blip_generated_name) else original_filename
         success_msg = f"✅ {source.title()} video downloaded successfully!"
+        print(f"DEBUG: {source.title()} final name returned: '{final_name}' (BLIP: {use_blip})")
         return video_path, final_name, success_msg
     except Exception as e:
                 show_label=True
             )
+    # Single row with all input controls and thumbnail preview
     with gr.Row():
         video_url = gr.Textbox(
             label="Video URL (MJ, Civitai, or Kling)",
             scale=1
         )
+    # Event handlers for input changes
     video_url.change(
         fn=on_video_url_change,
         inputs=[video_url, use_blip],
     run_btn = gr.Button("Generate Depth Video", variant="primary", size="lg")
+    # Main processing event
     run_btn.click(
         fn=infer_video_depth_from_source,
         inputs=[