Spaces:

niye4
/

depthmap

Build error

App Files Files Community

niye4 commited on Nov 29, 2025

Commit

c2dcb0d

verified ·

1 Parent(s): cc36089

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -17

app.py CHANGED Viewed

@@ -6,10 +6,9 @@ import torch
 from PIL import Image
 import gradio as gr
 from gradio_imageslider import ImageSlider
-from huggingface_hub import hf_hub_download
 from depth_anything_v2.dpt import DepthAnythingV2
-# CSS giữ nguyên gốc
 css = """
 #img-display-container {
     max-height: 100vh;
@@ -27,31 +26,26 @@ css = """
 DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
-# Load model
 model_configs = {
     'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
     'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
     'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
     'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
 }
-encoder2name = {'vits': 'Small', 'vitb': 'Base', 'vitl': 'Large', 'vitg': 'Giant'}
-encoder = 'vitl'
-model_name = encoder2name[encoder]
 model = DepthAnythingV2(**model_configs[encoder])
-filepath = hf_hub_download(
-    repo_id=f"depth-anything/Depth-Anything-V2-{model_name}",
-    filename=f"depth_anything_v2_{encoder}.pth",
-    repo_type="model"
-)
-state_dict = torch.load(filepath, map_location="cpu")
 model.load_state_dict(state_dict)
 model = model.to(DEVICE).eval()
 title = "# Depth Anything V2"
 description = "Upload a video to get Grayscale DepthMap video automatically."
-# Predict depth for one frame
 def predict_depth(frame_rgb):
     return model.infer_image(frame_rgb)
@@ -71,7 +65,6 @@ def generate_slider_from_video(video_path, max_frames=30):
         if not ret:
             break
         if idx % step == 0:
-            # Convert to grayscale for slider
             gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
             frames.append(Image.fromarray(gray))
         idx += 1
@@ -91,7 +84,7 @@ def process_video(video_file):
     cap = cv2.VideoCapture(video_dest)
     if not cap.isOpened() or int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) == 0:
-        # Fallback dummy video
         fourcc = cv2.VideoWriter_fourcc(*'mp4v')
         out = cv2.VideoWriter(output_video, fourcc, 1.0, (256,256), isColor=False)
         frame = np.zeros((256,256), np.uint8)
@@ -111,7 +104,6 @@ def process_video(video_file):
             break
         frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
         depth_map = predict_depth(frame_rgb)
-        # Normalize to 0-255 and convert to uint8
         depth_gray = ((depth_map - depth_map.min()) / (depth_map.max() - depth_map.min()) * 255.0).astype(np.uint8)
         out.write(depth_gray)
     cap.release()
@@ -121,7 +113,9 @@ def process_video(video_file):
     return slider_images, output_video
 # Gradio UI
-with gr.Blocks(css=css) as demo:
     gr.Markdown(title)
     gr.Markdown(description)

 from PIL import Image
 import gradio as gr
 from gradio_imageslider import ImageSlider
 from depth_anything_v2.dpt import DepthAnythingV2
+# CSS – sẽ nhúng bằng HTML thay vì gr.Blocks(css=...)
 css = """
 #img-display-container {
     max-height: 100vh;
 DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
+# Model configs
 model_configs = {
     'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
     'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
     'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
     'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
 }
+# Load vitb local checkpoint
+encoder = 'vitb'
 model = DepthAnythingV2(**model_configs[encoder])
+checkpoint_path = f"checkpoints/depth_anything_v2_{encoder}.pth"
+state_dict = torch.load(checkpoint_path, map_location="cpu")
 model.load_state_dict(state_dict)
 model = model.to(DEVICE).eval()
 title = "# Depth Anything V2"
 description = "Upload a video to get Grayscale DepthMap video automatically."
+# Predict depth for single frame
 def predict_depth(frame_rgb):
     return model.infer_image(frame_rgb)
         if not ret:
             break
         if idx % step == 0:
             gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
             frames.append(Image.fromarray(gray))
         idx += 1
     cap = cv2.VideoCapture(video_dest)
     if not cap.isOpened() or int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) == 0:
+        # fallback dummy video
         fourcc = cv2.VideoWriter_fourcc(*'mp4v')
         out = cv2.VideoWriter(output_video, fourcc, 1.0, (256,256), isColor=False)
         frame = np.zeros((256,256), np.uint8)
             break
         frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
         depth_map = predict_depth(frame_rgb)
         depth_gray = ((depth_map - depth_map.min()) / (depth_map.max() - depth_map.min()) * 255.0).astype(np.uint8)
         out.write(depth_gray)
     cap.release()
     return slider_images, output_video
 # Gradio UI
+with gr.Blocks() as demo:
+    gr.HTML(f"<style>{css}</style>")  # inject CSS
     gr.Markdown(title)
     gr.Markdown(description)