niye4 commited on
Commit
cc36089
·
verified ·
1 Parent(s): 293f87c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -47
app.py CHANGED
@@ -1,22 +1,66 @@
1
  import os
2
  import shutil
3
- import subprocess
4
- import gradio as gr
5
- from gradio_imageslider import ImageSlider
6
- from PIL import Image
7
  import cv2
8
  import numpy as np
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- # Output folder
11
- OUTPUT_DIR = "output"
12
- os.makedirs(OUTPUT_DIR, exist_ok=True)
13
 
14
- # Slider preview: luôn ít nhất 1 frame
 
 
 
 
15
  def generate_slider_from_video(video_path, max_frames=30):
16
  frames = []
17
  cap = cv2.VideoCapture(video_path)
18
  if not cap.isOpened() or int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) == 0:
19
- frames.append(Image.new("RGB", (256,256), color=(0,0,0)))
20
  return frames
21
 
22
  total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
@@ -27,64 +71,66 @@ def generate_slider_from_video(video_path, max_frames=30):
27
  if not ret:
28
  break
29
  if idx % step == 0:
30
- frames.append(Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)))
 
 
31
  idx += 1
32
  cap.release()
33
-
34
  if len(frames) == 0:
35
- frames.append(Image.new("RGB", (256,256), color=(0,0,0)))
36
  return frames
37
 
38
- # Xử video upload
39
  def process_video(video_file):
40
- # Copy file upload vào local folder
 
41
  video_dest = os.path.join(OUTPUT_DIR, os.path.basename(video_file.name))
42
  shutil.copy(video_file.name, video_dest)
43
 
44
- # Output video path
45
  output_video = os.path.join(OUTPUT_DIR, os.path.basename(video_dest).replace(".mp4","_depth.mp4"))
46
 
47
- # Chạy run_video.py
48
- cmd = [
49
- "python", "run_video.py",
50
- "--encoder", "vitb",
51
- "--video-path", video_dest,
52
- "--outdir", OUTPUT_DIR,
53
- "--grayscale",
54
- "--pred-only"
55
- ]
56
- try:
57
- subprocess.run(cmd, check=True)
58
- except subprocess.CalledProcessError:
59
- print("run_video.py failed, will use dummy video")
60
-
61
- # Nếu video output không tồn tại hoặc size=0 tạo dummy video 1 frame
62
- if not os.path.exists(output_video) or os.path.getsize(output_video) == 0:
63
- dummy_path = os.path.join(OUTPUT_DIR, "dummy.mp4")
64
- if not os.path.exists(dummy_path):
65
- fourcc = cv2.VideoWriter_fourcc(*'mp4v')
66
- out = cv2.VideoWriter(dummy_path, fourcc, 1.0, (256,256))
67
- frame = np.zeros((256,256,3), np.uint8)
68
- out.write(frame)
69
- out.release()
70
- output_video = dummy_path
71
-
72
- # Slider preview
73
- slider_images = generate_slider_from_video(output_video, max_frames=30)
74
 
 
75
  return slider_images, output_video
76
 
77
  # Gradio UI
78
- with gr.Blocks() as demo:
79
- gr.Markdown("# Depth Anything V2 - Video Demo")
80
- gr.Markdown("Upload an MP4 video and get a DepthMap video automatically.")
81
 
82
  video_input = gr.File(label="Upload MP4", file_types=['.mp4'])
83
- depth_slider = ImageSlider(label="Depth Map Slider")
84
  video_output = gr.Video(label="DepthMap Video")
85
  submit = gr.Button("Render DepthMap")
86
 
87
  submit.click(fn=process_video, inputs=[video_input], outputs=[depth_slider, video_output])
88
 
89
  if __name__ == "__main__":
90
- demo.queue().launch()
 
1
  import os
2
  import shutil
 
 
 
 
3
  import cv2
4
  import numpy as np
5
+ import torch
6
+ from PIL import Image
7
+ import gradio as gr
8
+ from gradio_imageslider import ImageSlider
9
+ from huggingface_hub import hf_hub_download
10
+ from depth_anything_v2.dpt import DepthAnythingV2
11
+
12
+ # CSS giữ nguyên gốc
13
+ css = """
14
+ #img-display-container {
15
+ max-height: 100vh;
16
+ }
17
+ #img-display-input {
18
+ max-height: 80vh;
19
+ }
20
+ #img-display-output {
21
+ max-height: 80vh;
22
+ }
23
+ #download {
24
+ height: 62px;
25
+ }
26
+ """
27
+
28
+ DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
29
+
30
+ # Load model
31
+ model_configs = {
32
+ 'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
33
+ 'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
34
+ 'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
35
+ 'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
36
+ }
37
+ encoder2name = {'vits': 'Small', 'vitb': 'Base', 'vitl': 'Large', 'vitg': 'Giant'}
38
+ encoder = 'vitl'
39
+ model_name = encoder2name[encoder]
40
+
41
+ model = DepthAnythingV2(**model_configs[encoder])
42
+ filepath = hf_hub_download(
43
+ repo_id=f"depth-anything/Depth-Anything-V2-{model_name}",
44
+ filename=f"depth_anything_v2_{encoder}.pth",
45
+ repo_type="model"
46
+ )
47
+ state_dict = torch.load(filepath, map_location="cpu")
48
+ model.load_state_dict(state_dict)
49
+ model = model.to(DEVICE).eval()
50
 
51
+ title = "# Depth Anything V2"
52
+ description = "Upload a video to get Grayscale DepthMap video automatically."
 
53
 
54
+ # Predict depth for one frame
55
+ def predict_depth(frame_rgb):
56
+ return model.infer_image(frame_rgb)
57
+
58
+ # Generate slider from video
59
  def generate_slider_from_video(video_path, max_frames=30):
60
  frames = []
61
  cap = cv2.VideoCapture(video_path)
62
  if not cap.isOpened() or int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) == 0:
63
+ frames.append(Image.new("L", (256,256), color=0))
64
  return frames
65
 
66
  total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
 
71
  if not ret:
72
  break
73
  if idx % step == 0:
74
+ # Convert to grayscale for slider
75
+ gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
76
+ frames.append(Image.fromarray(gray))
77
  idx += 1
78
  cap.release()
 
79
  if len(frames) == 0:
80
+ frames.append(Image.new("L", (256,256), color=0))
81
  return frames
82
 
83
+ # Process video upload
84
  def process_video(video_file):
85
+ OUTPUT_DIR = "output"
86
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
87
  video_dest = os.path.join(OUTPUT_DIR, os.path.basename(video_file.name))
88
  shutil.copy(video_file.name, video_dest)
89
 
 
90
  output_video = os.path.join(OUTPUT_DIR, os.path.basename(video_dest).replace(".mp4","_depth.mp4"))
91
 
92
+ cap = cv2.VideoCapture(video_dest)
93
+ if not cap.isOpened() or int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) == 0:
94
+ # Fallback dummy video
95
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
96
+ out = cv2.VideoWriter(output_video, fourcc, 1.0, (256,256), isColor=False)
97
+ frame = np.zeros((256,256), np.uint8)
98
+ out.write(frame)
99
+ out.release()
100
+ return generate_slider_from_video(output_video), output_video
101
+
102
+ fps = cap.get(cv2.CAP_PROP_FPS)
103
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
104
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
105
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
106
+ out = cv2.VideoWriter(output_video, fourcc, fps, (width,height), isColor=False)
107
+
108
+ while True:
109
+ ret, frame = cap.read()
110
+ if not ret:
111
+ break
112
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
113
+ depth_map = predict_depth(frame_rgb)
114
+ # Normalize to 0-255 and convert to uint8
115
+ depth_gray = ((depth_map - depth_map.min()) / (depth_map.max() - depth_map.min()) * 255.0).astype(np.uint8)
116
+ out.write(depth_gray)
117
+ cap.release()
118
+ out.release()
119
 
120
+ slider_images = generate_slider_from_video(output_video)
121
  return slider_images, output_video
122
 
123
  # Gradio UI
124
+ with gr.Blocks(css=css) as demo:
125
+ gr.Markdown(title)
126
+ gr.Markdown(description)
127
 
128
  video_input = gr.File(label="Upload MP4", file_types=['.mp4'])
129
+ depth_slider = ImageSlider(label="DepthMap Slider")
130
  video_output = gr.Video(label="DepthMap Video")
131
  submit = gr.Button("Render DepthMap")
132
 
133
  submit.click(fn=process_video, inputs=[video_input], outputs=[depth_slider, video_output])
134
 
135
  if __name__ == "__main__":
136
+ demo.queue().launch(share=True)