import gradio as gr import os from typing import Optional from gradio_client import Client, handle_file class VideoDepthApp: def __init__(self): self.client_url = "depth-anything/Video-Depth-Anything" self.client = None self.presets = { "Fast": { "max_len": 100, "target_fps": 10, "max_res": 640, "grayscale": True, "description": "Fast processing with lower quality" }, "Balanced": { "max_len": 300, "target_fps": 15, "max_res": 1024, "grayscale": False, "description": "Balanced quality and speed" }, "High_quality": { "max_len": 500, "target_fps": 24, "max_res": 1920, "grayscale": False, "description": "High quality processing (slower)" }, "Mobile": { "max_len": 200, "target_fps": 12, "max_res": 720, "grayscale": False, "description": "Optimized for mobile devices" } } def connect_to_api(self): try: if not self.client: self.client = Client(self.client_url) return True, "Connected to API successfully!" except Exception as e: return False, f"Connected Error : {str(e)}" def process_video( self, video_file, preset: str = "Balanced", custom_max_len: Optional[int] = 500, custom_target_fps: Optional[int] = 30, custom_max_res: Optional[int] = 1080, custom_grayscale: Optional[bool] = None ): if video_file is None: return None, None, "❌Please upload a video file." try: # Kết nối API success, message = self.connect_to_api() if not success: return None, None, message # Lấy thông số từ preset hoặc custom if preset in self.presets: params = self.presets[preset].copy() else: params = self.presets["balanced"].copy() # Override với custom parameters nếu có if custom_max_len is not None: params["max_len"] = custom_max_len if custom_target_fps is not None: params["target_fps"] = custom_target_fps if custom_max_res is not None: params["max_res"] = custom_max_res if custom_grayscale is not None: params["grayscale"] = custom_grayscale # Xử lý video result = self.client.predict( input_video={"video": handle_file(video_file)}, max_len=params["max_len"], target_fps=params["target_fps"], max_res=params["max_res"], grayscale=params["grayscale"], api_name="/infer_video_depth" ) # Xử lý kết quả - API trả về tuple với 2 video src_video = None depth_video = None if isinstance(result, tuple) and len(result) >= 2: # result[0] là video gốc, result[1] là video depth if result[0] and 'video' in result[0]: src_video = result[0]['video'] if result[1] and 'video' in result[1]: depth_video = result[1]['video'] # Tạo thông tin kết quả info = f""" ✅ **Success!** **Information:** - Preset: {preset} - Max Length: {params['max_len']} frames - Target FPS: {params['target_fps']} - Max Resolution: {params['max_res']} - Grayscale: {'Yes' if params['grayscale'] else 'No'} **Generate Video:** - Source: {'✅' if src_video else '❌'} - Video Depth: {'✅' if depth_video else '❌'} """ # Trả về cả 2 video return src_video, depth_video, info except Exception as e: error_msg = f"❌ **Video generate error:**\n\n{str(e)}**\nCould be :\n- Server busy\n- Video verry highh\n- Internet connection unstable" return None, None, error_msg def get_preset_info(self, preset_name: str): """Lấy thông tin preset""" if preset_name in self.presets: preset = self.presets[preset_name] return f""" **{preset_name.upper()}:** - {preset['description']} - Max Length: {preset['max_len']} frames - Target FPS: {preset['target_fps']} - Max Resolution: {preset['max_res']} - Grayscale: {'Yes' if preset['grayscale'] else 'No'} """ return "Preset not found." def create_interface(): app = VideoDepthApp() with gr.Blocks(title="Video Depth Estimation", theme=gr.themes.Soft()) as interface: gr.Markdown(""" # 🎬 Video Depth Estimation App uses the **Video-Depth-Anything** API to estimate depth from videos. ## 🚀 How to use 1. Upload a video file (MP4 format recommended). 2. Select a preset configuration or customize parameters. 3. Click "Process Video" to start depth estimation. 4. Wait for the processing to complete (may take a few minutes). 5. View the output videos: original and depth estimation. """) with gr.Row(): with gr.Column(scale=1): # Input gr.Markdown("### 📤 Input") video_input = gr.Video( label="Upload Video", height=500 ) # Preset selection gr.Markdown("### ⚙️ Settings") preset_dropdown = gr.Dropdown( choices=list(app.presets.keys()), value="Balanced", label="Select Preset", info="Select a preset configuration for processing the video." ) preset_info = gr.Markdown( app.get_preset_info("Balanced") ) # Custom parameters with gr.Accordion("🔧 Custom parameter ", open=False): custom_max_len = gr.Slider( minimum=50, maximum=1000, step=50, value=500, label="Max Length (frames)", info="Maximum length of the video in frames (default: 500)" ) custom_target_fps = gr.Slider( minimum=5, maximum=30, step=1, value=15, label="Target FPS", info="Target frames per second for processing (default: 15)" ) custom_max_res = gr.Slider( minimum=480, maximum=1920, step=160, value=1280, label="Max Resolution", info="Maximum resolution for processing (default: 1280)" ) custom_grayscale = gr.Checkbox( label="Grayscale", info="Output video in grayscale (default: False)", value=False ) # Process button process_btn = gr.Button( "🚀 Process Video", variant="primary", size="lg" ) with gr.Column(scale=1): # Output gr.Markdown("### 📥 Output") with gr.Row(): video_src_output = gr.Video( label="Video Source", height=200 ) video_depth_output = gr.Video( label="Video Depth Estimation", height=200 ) result_info = gr.Markdown( "### 📊 Result Information" ) # Examples gr.Markdown("### 📋 Examples") gr.Examples( examples=[ ['videos/davis_rollercoaster.mp4'], ['videos/Tokyo-Walk_rgb.mp4'], ['videos/4158877-uhd_3840_2160_30fps_rgb.mp4'], ['videos/4511004-uhd_3840_2160_24fps_rgb.mp4'], ['videos/1753029-hd_1920_1080_30fps.mp4'], ['videos/davis_burnout.mp4'], ['videos/example_5473765-l.mp4'], ['videos/Istanbul-26920.mp4'], ['videos/obj_1.mp4'], ['videos/sheep_cut1.mp4'], ], inputs=[video_input], label="Video Examples", ) # Event handlers preset_dropdown.change( fn=app.get_preset_info, inputs=[preset_dropdown], outputs=[preset_info] ) process_btn.click( fn=app.process_video, inputs=[ video_input, preset_dropdown, custom_max_len, custom_target_fps, custom_max_res, custom_grayscale ], outputs=[video_src_output, video_depth_output, result_info] ) return interface if __name__ == "__main__": # Tạo và chạy ứng dụng interface = create_interface() interface.launch( server_name="0.0.0.0", server_port=7860, share=False )