File size: 10,066 Bytes
09704f9
c20a590
 
 
09704f9
c20a590
 
 
 
 
 
 
 
 
725fc19
c20a590
 
 
 
09704f9
c20a590
725fc19
c20a590
 
 
 
09704f9
c20a590
725fc19
c20a590
 
 
 
09704f9
c20a590
725fc19
c20a590
 
 
 
09704f9
c20a590
 
 
 
 
 
 
09704f9
c20a590
09704f9
c20a590
 
 
 
725fc19
09704f9
 
 
 
c20a590
09704f9
c20a590
09704f9
c20a590
 
 
 
 
af74578
c20a590
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
af74578
 
 
 
 
 
 
 
 
 
 
c20a590
 
09704f9
c20a590
09704f9
c20a590
 
 
 
09704f9
c20a590
09704f9
 
 
c20a590
 
af74578
 
c20a590
 
09704f9
af74578
c20a590
 
 
 
 
 
 
 
 
 
 
09704f9
c20a590
09704f9
c20a590
 
 
09704f9
c20a590
 
 
09704f9
c20a590
 
 
 
09704f9
 
 
 
 
 
 
c20a590
 
 
 
 
 
 
 
725fc19
c20a590
 
 
09704f9
c20a590
 
09704f9
 
 
c20a590
 
 
725fc19
c20a590
 
 
09704f9
c20a590
 
 
 
ab40e41
c20a590
09704f9
c20a590
 
 
 
 
ab40e41
c20a590
09704f9
c20a590
 
 
 
 
ab40e41
c20a590
09704f9
c20a590
 
 
09704f9
 
c20a590
 
 
 
09704f9
c20a590
 
 
 
 
 
 
af74578
 
 
09704f9
af74578
 
 
 
 
 
c20a590
 
09704f9
c20a590
 
 
09704f9
c20a590
 
a5b3059
 
 
 
 
 
 
 
 
 
c20a590
 
09704f9
c20a590
 
a5b3059
c20a590
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
af74578
c20a590
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296


import gradio as gr
import os
from typing import Optional
from gradio_client import Client, handle_file


class VideoDepthApp:
    
    def __init__(self):
        self.client_url = "depth-anything/Video-Depth-Anything"
        self.client = None
        self.presets = {
            "Fast": {
                "max_len": 100,
                "target_fps": 10,
                "max_res": 640,
                "grayscale": True,
                "description": "Fast processing with lower quality"
            },
            "Balanced": {
                "max_len": 300,
                "target_fps": 15,
                "max_res": 1024,
                "grayscale": False,
                "description": "Balanced quality and speed"
            },
            "High_quality": {
                "max_len": 500,
                "target_fps": 24,
                "max_res": 1920,
                "grayscale": False,
                "description": "High quality processing (slower)"
            },
            "Mobile": {
                "max_len": 200,
                "target_fps": 12,
                "max_res": 720,
                "grayscale": False,
                "description": "Optimized for mobile devices"
            }
        }
    
    def connect_to_api(self):
        try:
            if not self.client:
                self.client = Client(self.client_url)
            return True, "Connected to API successfully!"
        except Exception as e:
            return False, f"Connected Error : {str(e)}"
    
    def process_video(
        self,
        video_file,
        preset: str = "Balanced",
        custom_max_len: Optional[int] = 500,
        custom_target_fps: Optional[int] = 30,
        custom_max_res: Optional[int] = 1080,
        custom_grayscale: Optional[bool] = None
    ):

        if video_file is None:
            return None, None, "❌Please upload a video file."
        
        try:
            # Kết nối API
            success, message = self.connect_to_api()
            if not success:
                return None, None, message
            
            # Lấy thông số từ preset hoặc custom
            if preset in self.presets:
                params = self.presets[preset].copy()
            else:
                params = self.presets["balanced"].copy()
            
            # Override với custom parameters nếu có
            if custom_max_len is not None:
                params["max_len"] = custom_max_len
            if custom_target_fps is not None:
                params["target_fps"] = custom_target_fps
            if custom_max_res is not None:
                params["max_res"] = custom_max_res
            if custom_grayscale is not None:
                params["grayscale"] = custom_grayscale
            
            # Xử lý video
            result = self.client.predict(
                input_video={"video": handle_file(video_file)},
                max_len=params["max_len"],
                target_fps=params["target_fps"],
                max_res=params["max_res"],
                grayscale=params["grayscale"],
                api_name="/infer_video_depth"
            )
            
            # Xử lý kết quả - API trả về tuple với 2 video
            src_video = None
            depth_video = None
            
            if isinstance(result, tuple) and len(result) >= 2:
                # result[0] là video gốc, result[1] là video depth
                if result[0] and 'video' in result[0]:
                    src_video = result[0]['video']
                if result[1] and 'video' in result[1]:
                    depth_video = result[1]['video']
            
            # Tạo thông tin kết quả
            info = f"""
✅ **Success!**

**Information:**
- Preset: {preset}
- Max Length: {params['max_len']} frames
- Target FPS: {params['target_fps']}
- Max Resolution: {params['max_res']}
- Grayscale: {'Yes' if params['grayscale'] else 'No'}

**Generate Video:**
- Source: {'✅' if src_video else '❌'}
- Video Depth: {'✅' if depth_video else '❌'}
"""
            
            # Trả về cả 2 video
            return src_video, depth_video, info
                
        except Exception as e:
            error_msg = f"❌ **Video generate error:**\n\n{str(e)}**\nCould be :\n- Server busy\n- Video verry highh\n- Internet connection unstable"
            return None, None, error_msg
    
    def get_preset_info(self, preset_name: str):
        """Lấy thông tin preset"""
        if preset_name in self.presets:
            preset = self.presets[preset_name]
            return f"""
**{preset_name.upper()}:**
- {preset['description']}
- Max Length: {preset['max_len']} frames
- Target FPS: {preset['target_fps']}
- Max Resolution: {preset['max_res']}
- Grayscale: {'Yes' if preset['grayscale'] else 'No'}
"""
        return "Preset not found."


def create_interface():

    
    app = VideoDepthApp()
    
    with gr.Blocks(title="Video Depth Estimation", theme=gr.themes.Soft()) as interface:
        
        gr.Markdown("""
        # 🎬 Video Depth Estimation
        
        App uses the **Video-Depth-Anything** API to estimate depth from videos.
        ## 🚀 How to use
        1. Upload a video file (MP4 format recommended).
        2. Select a preset configuration or customize parameters.
        3. Click "Process Video" to start depth estimation.
        4. Wait for the processing to complete (may take a few minutes).
        5. View the output videos: original and depth estimation.
        """)
        
        with gr.Row():
            with gr.Column(scale=1):
                # Input
                gr.Markdown("### 📤 Input")
                video_input = gr.Video(
                    label="Upload Video",
                    height=500
                )
                
                # Preset selection
                gr.Markdown("### ⚙️ Settings")
                preset_dropdown = gr.Dropdown(
                    choices=list(app.presets.keys()),
                    value="Balanced",
                    label="Select Preset",
                    info="Select a preset configuration for processing the video."
                )
                
                preset_info = gr.Markdown(
                    app.get_preset_info("Balanced")
                )
                
                # Custom parameters
                with gr.Accordion("🔧 Custom parameter ", open=False):
                    custom_max_len = gr.Slider(
                        minimum=50,
                        maximum=1000,
                        step=50,
                        value=500,
                        label="Max Length (frames)",
                        info="Maximum length of the video in frames (default: 500)"
                    )
                    custom_target_fps = gr.Slider(
                        minimum=5,
                        maximum=30,
                        step=1,
                        value=15,
                        label="Target FPS",
                        info="Target frames per second for processing (default: 15)"
                    )
                    custom_max_res = gr.Slider(
                        minimum=480,
                        maximum=1920,
                        step=160,
                        value=1280,
                        label="Max Resolution",
                        info="Maximum resolution for processing (default: 1280)"
                    )
                    custom_grayscale = gr.Checkbox(
                        label="Grayscale",
                        info="Output video in grayscale (default: False)",
                        value=False
                    )
                
                # Process button
                process_btn = gr.Button(
                    "🚀 Process Video",
                    variant="primary",
                    size="lg"
                )
            
            with gr.Column(scale=1):
                # Output
                gr.Markdown("### 📥 Output")
                
                with gr.Row():
                    video_src_output = gr.Video(
                        label="Video Source",
                        height=200
                    )
                    video_depth_output = gr.Video(
                        label="Video Depth Estimation",
                        height=200
                    )
                
                result_info = gr.Markdown(
                    "### 📊 Result Information"
                )
        
        # Examples
        gr.Markdown("### 📋 Examples")
        gr.Examples(
            examples=[
            ['videos/davis_rollercoaster.mp4'],
            ['videos/Tokyo-Walk_rgb.mp4'],
            ['videos/4158877-uhd_3840_2160_30fps_rgb.mp4'],
            ['videos/4511004-uhd_3840_2160_24fps_rgb.mp4'],
            ['videos/1753029-hd_1920_1080_30fps.mp4'],
            ['videos/davis_burnout.mp4'],
            ['videos/example_5473765-l.mp4'],
            ['videos/Istanbul-26920.mp4'],
            ['videos/obj_1.mp4'],
            ['videos/sheep_cut1.mp4'],
            ],
            inputs=[video_input],
            label="Video Examples",
        )
        
        
        # Event handlers
        preset_dropdown.change(
            fn=app.get_preset_info,
            inputs=[preset_dropdown],
            outputs=[preset_info]
        )
        
        process_btn.click(
            fn=app.process_video,
            inputs=[
                video_input,
                preset_dropdown,
                custom_max_len,
                custom_target_fps,
                custom_max_res,
                custom_grayscale
            ],
            outputs=[video_src_output, video_depth_output, result_info]
        )
        
    return interface


if __name__ == "__main__":
    # Tạo và chạy ứng dụng
    interface = create_interface()
    interface.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False
    )