Spaces:

niye4
/

depthmap

Build error

App Files Files Community

niye4 commited on Nov 29, 2025

Commit

cc36089

verified ·

1 Parent(s): 293f87c

Update app.py

Browse files

Files changed (1) hide show

app.py +93 -47

app.py CHANGED Viewed

@@ -1,22 +1,66 @@
 import os
 import shutil
-import subprocess
-import gradio as gr
-from gradio_imageslider import ImageSlider
-from PIL import Image
 import cv2
 import numpy as np
-# Output folder
-OUTPUT_DIR = "output"
-os.makedirs(OUTPUT_DIR, exist_ok=True)
-# Slider preview: luôn có ít nhất 1 frame
 def generate_slider_from_video(video_path, max_frames=30):
     frames = []
     cap = cv2.VideoCapture(video_path)
     if not cap.isOpened() or int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) == 0:
-        frames.append(Image.new("RGB", (256,256), color=(0,0,0)))
         return frames
     total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
@@ -27,64 +71,66 @@ def generate_slider_from_video(video_path, max_frames=30):
         if not ret:
             break
         if idx % step == 0:
-            frames.append(Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)))
         idx += 1
     cap.release()
     if len(frames) == 0:
-        frames.append(Image.new("RGB", (256,256), color=(0,0,0)))
     return frames
-# Xử lý video upload
 def process_video(video_file):
-    # Copy file upload vào local folder
     video_dest = os.path.join(OUTPUT_DIR, os.path.basename(video_file.name))
     shutil.copy(video_file.name, video_dest)
-    # Output video path
     output_video = os.path.join(OUTPUT_DIR, os.path.basename(video_dest).replace(".mp4","_depth.mp4"))
-    # Chạy run_video.py
-    cmd = [
-        "python", "run_video.py",
-        "--encoder", "vitb",
-        "--video-path", video_dest,
-        "--outdir", OUTPUT_DIR,
-        "--grayscale",
-        "--pred-only"
-    ]
-    try:
-        subprocess.run(cmd, check=True)
-    except subprocess.CalledProcessError:
-        print("run_video.py failed, will use dummy video")
-    # Nếu video output không tồn tại hoặc size=0 → tạo dummy video 1 frame
-    if not os.path.exists(output_video) or os.path.getsize(output_video) == 0:
-        dummy_path = os.path.join(OUTPUT_DIR, "dummy.mp4")
-        if not os.path.exists(dummy_path):
-            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-            out = cv2.VideoWriter(dummy_path, fourcc, 1.0, (256,256))
-            frame = np.zeros((256,256,3), np.uint8)
-            out.write(frame)
-            out.release()
-        output_video = dummy_path
-    # Slider preview
-    slider_images = generate_slider_from_video(output_video, max_frames=30)
     return slider_images, output_video
 # Gradio UI
-with gr.Blocks() as demo:
-    gr.Markdown("# Depth Anything V2 - Video Demo")
-    gr.Markdown("Upload an MP4 video and get a DepthMap video automatically.")
     video_input = gr.File(label="Upload MP4", file_types=['.mp4'])
-    depth_slider = ImageSlider(label="Depth Map Slider")
     video_output = gr.Video(label="DepthMap Video")
     submit = gr.Button("Render DepthMap")
     submit.click(fn=process_video, inputs=[video_input], outputs=[depth_slider, video_output])
 if __name__ == "__main__":
-    demo.queue().launch()

 import os
 import shutil
 import cv2
 import numpy as np
+import torch
+from PIL import Image
+import gradio as gr
+from gradio_imageslider import ImageSlider
+from huggingface_hub import hf_hub_download
+from depth_anything_v2.dpt import DepthAnythingV2
+# CSS giữ nguyên gốc
+css = """
+#img-display-container {
+    max-height: 100vh;
+}
+#img-display-input {
+    max-height: 80vh;
+}
+#img-display-output {
+    max-height: 80vh;
+}
+#download {
+    height: 62px;
+}
+"""
+DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
+# Load model
+model_configs = {
+    'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
+    'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
+    'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
+    'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
+}
+encoder2name = {'vits': 'Small', 'vitb': 'Base', 'vitl': 'Large', 'vitg': 'Giant'}
+encoder = 'vitl'
+model_name = encoder2name[encoder]
+model = DepthAnythingV2(**model_configs[encoder])
+filepath = hf_hub_download(
+    repo_id=f"depth-anything/Depth-Anything-V2-{model_name}",
+    filename=f"depth_anything_v2_{encoder}.pth",
+    repo_type="model"
+)
+state_dict = torch.load(filepath, map_location="cpu")
+model.load_state_dict(state_dict)
+model = model.to(DEVICE).eval()
+title = "# Depth Anything V2"
+description = "Upload a video to get Grayscale DepthMap video automatically."
+# Predict depth for one frame
+def predict_depth(frame_rgb):
+    return model.infer_image(frame_rgb)
+# Generate slider from video
 def generate_slider_from_video(video_path, max_frames=30):
     frames = []
     cap = cv2.VideoCapture(video_path)
     if not cap.isOpened() or int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) == 0:
+        frames.append(Image.new("L", (256,256), color=0))
         return frames
     total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
         if not ret:
             break
         if idx % step == 0:
+            # Convert to grayscale for slider
+            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+            frames.append(Image.fromarray(gray))
         idx += 1
     cap.release()
     if len(frames) == 0:
+        frames.append(Image.new("L", (256,256), color=0))
     return frames
+# Process video upload
 def process_video(video_file):
+    OUTPUT_DIR = "output"
+    os.makedirs(OUTPUT_DIR, exist_ok=True)
     video_dest = os.path.join(OUTPUT_DIR, os.path.basename(video_file.name))
     shutil.copy(video_file.name, video_dest)
     output_video = os.path.join(OUTPUT_DIR, os.path.basename(video_dest).replace(".mp4","_depth.mp4"))
+    cap = cv2.VideoCapture(video_dest)
+    if not cap.isOpened() or int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) == 0:
+        # Fallback dummy video
+        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+        out = cv2.VideoWriter(output_video, fourcc, 1.0, (256,256), isColor=False)
+        frame = np.zeros((256,256), np.uint8)
+        out.write(frame)
+        out.release()
+        return generate_slider_from_video(output_video), output_video
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+    out = cv2.VideoWriter(output_video, fourcc, fps, (width,height), isColor=False)
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            break
+        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        depth_map = predict_depth(frame_rgb)
+        # Normalize to 0-255 and convert to uint8
+        depth_gray = ((depth_map - depth_map.min()) / (depth_map.max() - depth_map.min()) * 255.0).astype(np.uint8)
+        out.write(depth_gray)
+    cap.release()
+    out.release()
+    slider_images = generate_slider_from_video(output_video)
     return slider_images, output_video
 # Gradio UI
+with gr.Blocks(css=css) as demo:
+    gr.Markdown(title)
+    gr.Markdown(description)
     video_input = gr.File(label="Upload MP4", file_types=['.mp4'])
+    depth_slider = ImageSlider(label="DepthMap Slider")
     video_output = gr.Video(label="DepthMap Video")
     submit = gr.Button("Render DepthMap")
     submit.click(fn=process_video, inputs=[video_input], outputs=[depth_slider, video_output])
 if __name__ == "__main__":
+    demo.queue().launch(share=True)