Spaces:
Runtime error
Runtime error
| # Patch for Python 3.13: audioop removed from stdlib, pydub/gradio tries to import it | |
| import sys | |
| import types | |
| if "audioop" not in sys.modules: | |
| sys.modules["audioop"] = types.ModuleType("audioop") | |
| if "pyaudioop" not in sys.modules: | |
| sys.modules["pyaudioop"] = types.ModuleType("pyaudioop") | |
| import gradio as gr | |
| import torch | |
| import numpy as np | |
| import cv2 | |
| from PIL import Image | |
| from transformers import AutoImageProcessor, AutoModelForDepthEstimation | |
| import tempfile | |
| import os | |
| # Load model at startup on CPU | |
| print("Loading Depth Anything V2 Small...") | |
| processor = AutoImageProcessor.from_pretrained("depth-anything/Depth-Anything-V2-Small-hf") | |
| model = AutoModelForDepthEstimation.from_pretrained("depth-anything/Depth-Anything-V2-Small-hf") | |
| model.eval() | |
| print("Model loaded.") | |
| def estimate_depth(frame_rgb: np.ndarray) -> np.ndarray: | |
| h, w = frame_rgb.shape[:2] | |
| small = cv2.resize(frame_rgb, (256, int(256 * h / w))) | |
| pil_img = Image.fromarray(small) | |
| inputs = processor(images=pil_img, return_tensors="pt") | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| depth = outputs.predicted_depth.squeeze().numpy() | |
| depth_norm = cv2.normalize(depth, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8) | |
| depth_full = cv2.resize(depth_norm, (w, h), interpolation=cv2.INTER_LINEAR) | |
| return depth_full | |
| def process_video(video_path, fps_out, max_frames, progress=gr.Progress()): | |
| if video_path is None: | |
| return None, None, None | |
| cap = cv2.VideoCapture(video_path) | |
| if not cap.isOpened(): | |
| raise gr.Error("Could not open video file.") | |
| src_fps = cap.get(cv2.CAP_PROP_FPS) or 24 | |
| total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) | |
| w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) | |
| h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) | |
| step = max(1, int(src_fps / fps_out)) | |
| frame_indices = list(range(0, min(total_frames, max_frames * step), step))[:max_frames] | |
| tmp_dir = tempfile.mkdtemp() | |
| depth_path = os.path.join(tmp_dir, "depth.mp4") | |
| preview_path = os.path.join(tmp_dir, "preview.mp4") | |
| fourcc = cv2.VideoWriter_fourcc(*"mp4v") | |
| depth_writer = cv2.VideoWriter(depth_path, fourcc, fps_out, (w, h), isColor=False) | |
| preview_writer = cv2.VideoWriter(preview_path, fourcc, fps_out, (w * 2, h)) | |
| first_depth_frame = None | |
| for i, idx in enumerate(frame_indices): | |
| progress(i / len(frame_indices), desc=f"Processing frame {i+1}/{len(frame_indices)}") | |
| cap.set(cv2.CAP_PROP_POS_FRAMES, idx) | |
| ret, frame_bgr = cap.read() | |
| if not ret: | |
| continue | |
| frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB) | |
| depth = estimate_depth(frame_rgb) | |
| if first_depth_frame is None: | |
| first_depth_frame = depth | |
| depth_writer.write(depth) | |
| depth_color = cv2.applyColorMap(depth, cv2.COLORMAP_INFERNO) | |
| depth_color_rgb = cv2.cvtColor(depth_color, cv2.COLOR_BGR2RGB) | |
| side = np.concatenate([frame_rgb, depth_color_rgb], axis=1) | |
| preview_writer.write(cv2.cvtColor(side, cv2.COLOR_RGB2BGR)) | |
| cap.release() | |
| depth_writer.release() | |
| preview_writer.release() | |
| first_frame_png = None | |
| if first_depth_frame is not None: | |
| png_path = os.path.join(tmp_dir, "first_frame.png") | |
| Image.fromarray(first_depth_frame).save(png_path) | |
| first_frame_png = png_path | |
| return depth_path, preview_path, first_frame_png | |
| css = """ | |
| body { background: #0a0a0a; color: #e0e0e0; font-family: 'Space Mono', monospace; } | |
| .gradio-container { max-width: 900px; margin: 0 auto; } | |
| h1 { color: #c8ff00; letter-spacing: 0.08em; font-size: 1.6rem; } | |
| button.primary { background: #c8ff00 !important; color: #0a0a0a !important; font-weight: 700; border-radius: 2px !important; } | |
| button.primary:hover { background: #b0e000 !important; } | |
| .footer { color: #444; font-size: 0.7rem; text-align: center; margin-top: 2rem; } | |
| """ | |
| with gr.Blocks(css=css, title="DepthShift — Depth Map Generator") as demo: | |
| gr.HTML(""" | |
| <link href="https://fonts.googleapis.com/css2?family=Space+Mono:wght@400;700&display=swap" rel="stylesheet"> | |
| <h1>⬛ DEPTHSHIFT / DEPTH GENERATOR</h1> | |
| <p style="color:#888; font-size:0.85rem; margin-top:-0.5rem;"> | |
| Upload an MP4 → get a grayscale depth map video ready for | |
| <a href="https://spatial-index.vercel.app" target="_blank" style="color:#c8ff00;">Spatial Index</a> | |
| </p> | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| video_in = gr.Video(label="Input Video (MP4)", interactive=True) | |
| with gr.Row(): | |
| fps_slider = gr.Slider(6, 24, value=12, step=1, label="Output FPS") | |
| frames_slider = gr.Slider(10, 60, value=30, step=5, label="Max Frames") | |
| run_btn = gr.Button("Generate Depth Map", variant="primary") | |
| with gr.Column(): | |
| depth_out = gr.Video(label="Depth Map (grayscale) — use this in DepthShift") | |
| preview_out = gr.Video(label="Preview (original | depth side-by-side)") | |
| frame_out = gr.Image(label="First Frame Depth PNG") | |
| gr.HTML(""" | |
| <div class="footer"> | |
| <b style="color:#c8ff00">HOW TO USE</b><br> | |
| 1. Upload your MP4 → | |
| 2. Download the depth map video → | |
| 3. Load both into <a href="https://spatial-index.vercel.app" style="color:#c8ff00">Spatial Index / DepthShift</a> | |
| <br><br>Processing runs on CPU — keep Max Frames ≤ 30 for reasonable wait times (~1–2 min). | |
| </div> | |
| """) | |
| run_btn.click( | |
| fn=process_video, | |
| inputs=[video_in, fps_slider, frames_slider], | |
| outputs=[depth_out, preview_out, frame_out], | |
| ) | |
| demo.launch() | |