niye4 commited on
Commit
c2dcb0d
·
verified ·
1 Parent(s): cc36089

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -17
app.py CHANGED
@@ -6,10 +6,9 @@ import torch
6
  from PIL import Image
7
  import gradio as gr
8
  from gradio_imageslider import ImageSlider
9
- from huggingface_hub import hf_hub_download
10
  from depth_anything_v2.dpt import DepthAnythingV2
11
 
12
- # CSS giữ nguyên gốc
13
  css = """
14
  #img-display-container {
15
  max-height: 100vh;
@@ -27,31 +26,26 @@ css = """
27
 
28
  DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
29
 
30
- # Load model
31
  model_configs = {
32
  'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
33
  'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
34
  'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
35
  'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
36
  }
37
- encoder2name = {'vits': 'Small', 'vitb': 'Base', 'vitl': 'Large', 'vitg': 'Giant'}
38
- encoder = 'vitl'
39
- model_name = encoder2name[encoder]
40
 
 
 
41
  model = DepthAnythingV2(**model_configs[encoder])
42
- filepath = hf_hub_download(
43
- repo_id=f"depth-anything/Depth-Anything-V2-{model_name}",
44
- filename=f"depth_anything_v2_{encoder}.pth",
45
- repo_type="model"
46
- )
47
- state_dict = torch.load(filepath, map_location="cpu")
48
  model.load_state_dict(state_dict)
49
  model = model.to(DEVICE).eval()
50
 
51
  title = "# Depth Anything V2"
52
  description = "Upload a video to get Grayscale DepthMap video automatically."
53
 
54
- # Predict depth for one frame
55
  def predict_depth(frame_rgb):
56
  return model.infer_image(frame_rgb)
57
 
@@ -71,7 +65,6 @@ def generate_slider_from_video(video_path, max_frames=30):
71
  if not ret:
72
  break
73
  if idx % step == 0:
74
- # Convert to grayscale for slider
75
  gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
76
  frames.append(Image.fromarray(gray))
77
  idx += 1
@@ -91,7 +84,7 @@ def process_video(video_file):
91
 
92
  cap = cv2.VideoCapture(video_dest)
93
  if not cap.isOpened() or int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) == 0:
94
- # Fallback dummy video
95
  fourcc = cv2.VideoWriter_fourcc(*'mp4v')
96
  out = cv2.VideoWriter(output_video, fourcc, 1.0, (256,256), isColor=False)
97
  frame = np.zeros((256,256), np.uint8)
@@ -111,7 +104,6 @@ def process_video(video_file):
111
  break
112
  frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
113
  depth_map = predict_depth(frame_rgb)
114
- # Normalize to 0-255 and convert to uint8
115
  depth_gray = ((depth_map - depth_map.min()) / (depth_map.max() - depth_map.min()) * 255.0).astype(np.uint8)
116
  out.write(depth_gray)
117
  cap.release()
@@ -121,7 +113,9 @@ def process_video(video_file):
121
  return slider_images, output_video
122
 
123
  # Gradio UI
124
- with gr.Blocks(css=css) as demo:
 
 
125
  gr.Markdown(title)
126
  gr.Markdown(description)
127
 
 
6
  from PIL import Image
7
  import gradio as gr
8
  from gradio_imageslider import ImageSlider
 
9
  from depth_anything_v2.dpt import DepthAnythingV2
10
 
11
+ # CSS sẽ nhúng bằng HTML thay vì gr.Blocks(css=...)
12
  css = """
13
  #img-display-container {
14
  max-height: 100vh;
 
26
 
27
  DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
28
 
29
+ # Model configs
30
  model_configs = {
31
  'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
32
  'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
33
  'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
34
  'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
35
  }
 
 
 
36
 
37
+ # Load vitb local checkpoint
38
+ encoder = 'vitb'
39
  model = DepthAnythingV2(**model_configs[encoder])
40
+ checkpoint_path = f"checkpoints/depth_anything_v2_{encoder}.pth"
41
+ state_dict = torch.load(checkpoint_path, map_location="cpu")
 
 
 
 
42
  model.load_state_dict(state_dict)
43
  model = model.to(DEVICE).eval()
44
 
45
  title = "# Depth Anything V2"
46
  description = "Upload a video to get Grayscale DepthMap video automatically."
47
 
48
+ # Predict depth for single frame
49
  def predict_depth(frame_rgb):
50
  return model.infer_image(frame_rgb)
51
 
 
65
  if not ret:
66
  break
67
  if idx % step == 0:
 
68
  gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
69
  frames.append(Image.fromarray(gray))
70
  idx += 1
 
84
 
85
  cap = cv2.VideoCapture(video_dest)
86
  if not cap.isOpened() or int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) == 0:
87
+ # fallback dummy video
88
  fourcc = cv2.VideoWriter_fourcc(*'mp4v')
89
  out = cv2.VideoWriter(output_video, fourcc, 1.0, (256,256), isColor=False)
90
  frame = np.zeros((256,256), np.uint8)
 
104
  break
105
  frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
106
  depth_map = predict_depth(frame_rgb)
 
107
  depth_gray = ((depth_map - depth_map.min()) / (depth_map.max() - depth_map.min()) * 255.0).astype(np.uint8)
108
  out.write(depth_gray)
109
  cap.release()
 
113
  return slider_images, output_video
114
 
115
  # Gradio UI
116
+ with gr.Blocks() as demo:
117
+ gr.HTML(f"<style>{css}</style>") # inject CSS
118
+
119
  gr.Markdown(title)
120
  gr.Markdown(description)
121