depth_all / app.py
VanNguyen1214's picture
Update app.py
a5b3059 verified
import gradio as gr
import os
from typing import Optional
from gradio_client import Client, handle_file
class VideoDepthApp:
def __init__(self):
self.client_url = "depth-anything/Video-Depth-Anything"
self.client = None
self.presets = {
"Fast": {
"max_len": 100,
"target_fps": 10,
"max_res": 640,
"grayscale": True,
"description": "Fast processing with lower quality"
},
"Balanced": {
"max_len": 300,
"target_fps": 15,
"max_res": 1024,
"grayscale": False,
"description": "Balanced quality and speed"
},
"High_quality": {
"max_len": 500,
"target_fps": 24,
"max_res": 1920,
"grayscale": False,
"description": "High quality processing (slower)"
},
"Mobile": {
"max_len": 200,
"target_fps": 12,
"max_res": 720,
"grayscale": False,
"description": "Optimized for mobile devices"
}
}
def connect_to_api(self):
try:
if not self.client:
self.client = Client(self.client_url)
return True, "Connected to API successfully!"
except Exception as e:
return False, f"Connected Error : {str(e)}"
def process_video(
self,
video_file,
preset: str = "Balanced",
custom_max_len: Optional[int] = 500,
custom_target_fps: Optional[int] = 30,
custom_max_res: Optional[int] = 1080,
custom_grayscale: Optional[bool] = None
):
if video_file is None:
return None, None, "❌Please upload a video file."
try:
# Kết nối API
success, message = self.connect_to_api()
if not success:
return None, None, message
# Lấy thông số từ preset hoặc custom
if preset in self.presets:
params = self.presets[preset].copy()
else:
params = self.presets["balanced"].copy()
# Override với custom parameters nếu có
if custom_max_len is not None:
params["max_len"] = custom_max_len
if custom_target_fps is not None:
params["target_fps"] = custom_target_fps
if custom_max_res is not None:
params["max_res"] = custom_max_res
if custom_grayscale is not None:
params["grayscale"] = custom_grayscale
# Xử lý video
result = self.client.predict(
input_video={"video": handle_file(video_file)},
max_len=params["max_len"],
target_fps=params["target_fps"],
max_res=params["max_res"],
grayscale=params["grayscale"],
api_name="/infer_video_depth"
)
# Xử lý kết quả - API trả về tuple với 2 video
src_video = None
depth_video = None
if isinstance(result, tuple) and len(result) >= 2:
# result[0] là video gốc, result[1] là video depth
if result[0] and 'video' in result[0]:
src_video = result[0]['video']
if result[1] and 'video' in result[1]:
depth_video = result[1]['video']
# Tạo thông tin kết quả
info = f"""
✅ **Success!**
**Information:**
- Preset: {preset}
- Max Length: {params['max_len']} frames
- Target FPS: {params['target_fps']}
- Max Resolution: {params['max_res']}
- Grayscale: {'Yes' if params['grayscale'] else 'No'}
**Generate Video:**
- Source: {'✅' if src_video else '❌'}
- Video Depth: {'✅' if depth_video else '❌'}
"""
# Trả về cả 2 video
return src_video, depth_video, info
except Exception as e:
error_msg = f"❌ **Video generate error:**\n\n{str(e)}**\nCould be :\n- Server busy\n- Video verry highh\n- Internet connection unstable"
return None, None, error_msg
def get_preset_info(self, preset_name: str):
"""Lấy thông tin preset"""
if preset_name in self.presets:
preset = self.presets[preset_name]
return f"""
**{preset_name.upper()}:**
- {preset['description']}
- Max Length: {preset['max_len']} frames
- Target FPS: {preset['target_fps']}
- Max Resolution: {preset['max_res']}
- Grayscale: {'Yes' if preset['grayscale'] else 'No'}
"""
return "Preset not found."
def create_interface():
app = VideoDepthApp()
with gr.Blocks(title="Video Depth Estimation", theme=gr.themes.Soft()) as interface:
gr.Markdown("""
# 🎬 Video Depth Estimation
App uses the **Video-Depth-Anything** API to estimate depth from videos.
## 🚀 How to use
1. Upload a video file (MP4 format recommended).
2. Select a preset configuration or customize parameters.
3. Click "Process Video" to start depth estimation.
4. Wait for the processing to complete (may take a few minutes).
5. View the output videos: original and depth estimation.
""")
with gr.Row():
with gr.Column(scale=1):
# Input
gr.Markdown("### 📤 Input")
video_input = gr.Video(
label="Upload Video",
height=500
)
# Preset selection
gr.Markdown("### ⚙️ Settings")
preset_dropdown = gr.Dropdown(
choices=list(app.presets.keys()),
value="Balanced",
label="Select Preset",
info="Select a preset configuration for processing the video."
)
preset_info = gr.Markdown(
app.get_preset_info("Balanced")
)
# Custom parameters
with gr.Accordion("🔧 Custom parameter ", open=False):
custom_max_len = gr.Slider(
minimum=50,
maximum=1000,
step=50,
value=500,
label="Max Length (frames)",
info="Maximum length of the video in frames (default: 500)"
)
custom_target_fps = gr.Slider(
minimum=5,
maximum=30,
step=1,
value=15,
label="Target FPS",
info="Target frames per second for processing (default: 15)"
)
custom_max_res = gr.Slider(
minimum=480,
maximum=1920,
step=160,
value=1280,
label="Max Resolution",
info="Maximum resolution for processing (default: 1280)"
)
custom_grayscale = gr.Checkbox(
label="Grayscale",
info="Output video in grayscale (default: False)",
value=False
)
# Process button
process_btn = gr.Button(
"🚀 Process Video",
variant="primary",
size="lg"
)
with gr.Column(scale=1):
# Output
gr.Markdown("### 📥 Output")
with gr.Row():
video_src_output = gr.Video(
label="Video Source",
height=200
)
video_depth_output = gr.Video(
label="Video Depth Estimation",
height=200
)
result_info = gr.Markdown(
"### 📊 Result Information"
)
# Examples
gr.Markdown("### 📋 Examples")
gr.Examples(
examples=[
['videos/davis_rollercoaster.mp4'],
['videos/Tokyo-Walk_rgb.mp4'],
['videos/4158877-uhd_3840_2160_30fps_rgb.mp4'],
['videos/4511004-uhd_3840_2160_24fps_rgb.mp4'],
['videos/1753029-hd_1920_1080_30fps.mp4'],
['videos/davis_burnout.mp4'],
['videos/example_5473765-l.mp4'],
['videos/Istanbul-26920.mp4'],
['videos/obj_1.mp4'],
['videos/sheep_cut1.mp4'],
],
inputs=[video_input],
label="Video Examples",
)
# Event handlers
preset_dropdown.change(
fn=app.get_preset_info,
inputs=[preset_dropdown],
outputs=[preset_info]
)
process_btn.click(
fn=app.process_video,
inputs=[
video_input,
preset_dropdown,
custom_max_len,
custom_target_fps,
custom_max_res,
custom_grayscale
],
outputs=[video_src_output, video_depth_output, result_info]
)
return interface
if __name__ == "__main__":
# Tạo và chạy ứng dụng
interface = create_interface()
interface.launch(
server_name="0.0.0.0",
server_port=7860,
share=False
)