Update app.py
Browse files
app.py
CHANGED
|
@@ -6,10 +6,9 @@ import torch
|
|
| 6 |
from PIL import Image
|
| 7 |
import gradio as gr
|
| 8 |
from gradio_imageslider import ImageSlider
|
| 9 |
-
from huggingface_hub import hf_hub_download
|
| 10 |
from depth_anything_v2.dpt import DepthAnythingV2
|
| 11 |
|
| 12 |
-
# CSS
|
| 13 |
css = """
|
| 14 |
#img-display-container {
|
| 15 |
max-height: 100vh;
|
|
@@ -27,31 +26,26 @@ css = """
|
|
| 27 |
|
| 28 |
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| 29 |
|
| 30 |
-
#
|
| 31 |
model_configs = {
|
| 32 |
'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
|
| 33 |
'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
|
| 34 |
'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
|
| 35 |
'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
|
| 36 |
}
|
| 37 |
-
encoder2name = {'vits': 'Small', 'vitb': 'Base', 'vitl': 'Large', 'vitg': 'Giant'}
|
| 38 |
-
encoder = 'vitl'
|
| 39 |
-
model_name = encoder2name[encoder]
|
| 40 |
|
|
|
|
|
|
|
| 41 |
model = DepthAnythingV2(**model_configs[encoder])
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
filename=f"depth_anything_v2_{encoder}.pth",
|
| 45 |
-
repo_type="model"
|
| 46 |
-
)
|
| 47 |
-
state_dict = torch.load(filepath, map_location="cpu")
|
| 48 |
model.load_state_dict(state_dict)
|
| 49 |
model = model.to(DEVICE).eval()
|
| 50 |
|
| 51 |
title = "# Depth Anything V2"
|
| 52 |
description = "Upload a video to get Grayscale DepthMap video automatically."
|
| 53 |
|
| 54 |
-
# Predict depth for
|
| 55 |
def predict_depth(frame_rgb):
|
| 56 |
return model.infer_image(frame_rgb)
|
| 57 |
|
|
@@ -71,7 +65,6 @@ def generate_slider_from_video(video_path, max_frames=30):
|
|
| 71 |
if not ret:
|
| 72 |
break
|
| 73 |
if idx % step == 0:
|
| 74 |
-
# Convert to grayscale for slider
|
| 75 |
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
| 76 |
frames.append(Image.fromarray(gray))
|
| 77 |
idx += 1
|
|
@@ -91,7 +84,7 @@ def process_video(video_file):
|
|
| 91 |
|
| 92 |
cap = cv2.VideoCapture(video_dest)
|
| 93 |
if not cap.isOpened() or int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) == 0:
|
| 94 |
-
#
|
| 95 |
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
| 96 |
out = cv2.VideoWriter(output_video, fourcc, 1.0, (256,256), isColor=False)
|
| 97 |
frame = np.zeros((256,256), np.uint8)
|
|
@@ -111,7 +104,6 @@ def process_video(video_file):
|
|
| 111 |
break
|
| 112 |
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 113 |
depth_map = predict_depth(frame_rgb)
|
| 114 |
-
# Normalize to 0-255 and convert to uint8
|
| 115 |
depth_gray = ((depth_map - depth_map.min()) / (depth_map.max() - depth_map.min()) * 255.0).astype(np.uint8)
|
| 116 |
out.write(depth_gray)
|
| 117 |
cap.release()
|
|
@@ -121,7 +113,9 @@ def process_video(video_file):
|
|
| 121 |
return slider_images, output_video
|
| 122 |
|
| 123 |
# Gradio UI
|
| 124 |
-
with gr.Blocks(
|
|
|
|
|
|
|
| 125 |
gr.Markdown(title)
|
| 126 |
gr.Markdown(description)
|
| 127 |
|
|
|
|
| 6 |
from PIL import Image
|
| 7 |
import gradio as gr
|
| 8 |
from gradio_imageslider import ImageSlider
|
|
|
|
| 9 |
from depth_anything_v2.dpt import DepthAnythingV2
|
| 10 |
|
| 11 |
+
# CSS – sẽ nhúng bằng HTML thay vì gr.Blocks(css=...)
|
| 12 |
css = """
|
| 13 |
#img-display-container {
|
| 14 |
max-height: 100vh;
|
|
|
|
| 26 |
|
| 27 |
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| 28 |
|
| 29 |
+
# Model configs
|
| 30 |
model_configs = {
|
| 31 |
'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
|
| 32 |
'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
|
| 33 |
'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
|
| 34 |
'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
|
| 35 |
}
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
+
# Load vitb local checkpoint
|
| 38 |
+
encoder = 'vitb'
|
| 39 |
model = DepthAnythingV2(**model_configs[encoder])
|
| 40 |
+
checkpoint_path = f"checkpoints/depth_anything_v2_{encoder}.pth"
|
| 41 |
+
state_dict = torch.load(checkpoint_path, map_location="cpu")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
model.load_state_dict(state_dict)
|
| 43 |
model = model.to(DEVICE).eval()
|
| 44 |
|
| 45 |
title = "# Depth Anything V2"
|
| 46 |
description = "Upload a video to get Grayscale DepthMap video automatically."
|
| 47 |
|
| 48 |
+
# Predict depth for single frame
|
| 49 |
def predict_depth(frame_rgb):
|
| 50 |
return model.infer_image(frame_rgb)
|
| 51 |
|
|
|
|
| 65 |
if not ret:
|
| 66 |
break
|
| 67 |
if idx % step == 0:
|
|
|
|
| 68 |
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
| 69 |
frames.append(Image.fromarray(gray))
|
| 70 |
idx += 1
|
|
|
|
| 84 |
|
| 85 |
cap = cv2.VideoCapture(video_dest)
|
| 86 |
if not cap.isOpened() or int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) == 0:
|
| 87 |
+
# fallback dummy video
|
| 88 |
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
| 89 |
out = cv2.VideoWriter(output_video, fourcc, 1.0, (256,256), isColor=False)
|
| 90 |
frame = np.zeros((256,256), np.uint8)
|
|
|
|
| 104 |
break
|
| 105 |
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 106 |
depth_map = predict_depth(frame_rgb)
|
|
|
|
| 107 |
depth_gray = ((depth_map - depth_map.min()) / (depth_map.max() - depth_map.min()) * 255.0).astype(np.uint8)
|
| 108 |
out.write(depth_gray)
|
| 109 |
cap.release()
|
|
|
|
| 113 |
return slider_images, output_video
|
| 114 |
|
| 115 |
# Gradio UI
|
| 116 |
+
with gr.Blocks() as demo:
|
| 117 |
+
gr.HTML(f"<style>{css}</style>") # inject CSS
|
| 118 |
+
|
| 119 |
gr.Markdown(title)
|
| 120 |
gr.Markdown(description)
|
| 121 |
|