Tohru127 commited on
Commit
1cfa754
·
verified ·
1 Parent(s): 6fe6662

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -591
app.py DELETED
@@ -1,591 +0,0 @@
1
- """
2
- Insta360 3D Reconstruction - Hugging Face Space Version
3
- Optimized for longer videos with intelligent frame sampling
4
- """
5
-
6
- import gradio as gr
7
- import numpy as np
8
- import torch
9
- from PIL import Image
10
- from transformers import DPTForDepthEstimation, DPTImageProcessor
11
- import open3d as o3d
12
- import plotly.graph_objects as go
13
- import cv2
14
- import tempfile
15
- from pathlib import Path
16
- import time
17
- import warnings
18
- from scipy import ndimage
19
- from scipy.ndimage import gaussian_filter
20
-
21
- warnings.filterwarnings('ignore')
22
-
23
- # Load model
24
- print("🔄 Loading depth estimation model...")
25
- try:
26
- dpt_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
27
- dpt_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
28
- if torch.cuda.is_available():
29
- dpt_model = dpt_model.cuda()
30
- print("✓ GPU detected and enabled")
31
- else:
32
- print("ℹ Running on CPU (slower but works)")
33
- dpt_model.eval()
34
- print("✅ Model loaded successfully!")
35
- except Exception as e:
36
- print(f"❌ Error loading model: {e}")
37
- dpt_processor = None
38
- dpt_model = None
39
-
40
- # Enhanced depth processing functions
41
- def bilateral_filter_depth(depth_map, d=9, sigma_color=75, sigma_space=75):
42
- """Apply bilateral filter to preserve edges while smoothing depth"""
43
- depth_norm = ((depth_map - depth_map.min()) / (depth_map.max() - depth_map.min()) * 255).astype(np.uint8)
44
- filtered = cv2.bilateralFilter(depth_norm, d, sigma_color, sigma_space)
45
- filtered = filtered.astype(np.float32) / 255.0
46
- filtered = filtered * (depth_map.max() - depth_map.min()) + depth_map.min()
47
- return filtered
48
-
49
- def multi_scale_depth_refinement(depth_map, scales=[1.0, 0.5]):
50
- """Process depth at multiple scales and fuse"""
51
- h, w = depth_map.shape
52
- refined_depths = []
53
- weights = []
54
-
55
- for scale in scales:
56
- if scale == 1.0:
57
- scaled_depth = depth_map
58
- else:
59
- new_h, new_w = int(h * scale), int(w * scale)
60
- scaled_depth = cv2.resize(depth_map, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
61
- scaled_depth = cv2.resize(scaled_depth, (w, h), interpolation=cv2.INTER_LINEAR)
62
-
63
- filtered_depth = bilateral_filter_depth(scaled_depth)
64
- refined_depths.append(filtered_depth)
65
- weights.append(scale)
66
-
67
- weights = np.array(weights)
68
- weights = weights / weights.sum()
69
-
70
- final_depth = np.zeros_like(depth_map)
71
- for depth, weight in zip(refined_depths, weights):
72
- final_depth += depth * weight
73
-
74
- return final_depth
75
-
76
- def estimate_depth_confidence(depth_map):
77
- """Estimate confidence map based on depth consistency"""
78
- grad_x = cv2.Sobel(depth_map, cv2.CV_64F, 1, 0, ksize=3)
79
- grad_y = cv2.Sobel(depth_map, cv2.CV_64F, 0, 1, ksize=3)
80
- grad_mag = np.sqrt(grad_x**2 + grad_y**2)
81
- confidence = 1.0 / (1.0 + grad_mag / grad_mag.max())
82
- confidence = gaussian_filter(confidence, sigma=2)
83
- return confidence
84
-
85
- def intelligent_frame_sampling(video_path, target_frames=6, max_frames=100):
86
- """
87
- Intelligently sample frames from video based on motion and content
88
- For long videos, this prevents processing too many similar frames
89
- """
90
- cap = cv2.VideoCapture(video_path)
91
- total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
92
- fps = cap.get(cv2.CAP_PROP_FPS)
93
- duration = total_frames / fps if fps > 0 else 0
94
-
95
- # For very long videos, sample more intelligently
96
- if duration > 120: # 2 minutes
97
- # Sample every N seconds instead of uniformly
98
- sample_interval = max(int(fps * 15), 1) # Every 15 seconds
99
- frame_indices = list(range(0, total_frames, sample_interval))
100
- else:
101
- # Uniform sampling
102
- frame_indices = np.linspace(0, total_frames - 1, min(target_frames, total_frames), dtype=int)
103
-
104
- # Limit to max_frames to prevent timeout
105
- if len(frame_indices) > max_frames:
106
- frame_indices = frame_indices[::len(frame_indices)//max_frames][:max_frames]
107
-
108
- cap.release()
109
- return frame_indices, total_frames, fps, duration
110
-
111
- def extract_frames_smart(video_path, target_frames=6):
112
- """Extract frames intelligently based on video length"""
113
- frame_indices, total_frames, fps, duration = intelligent_frame_sampling(video_path, target_frames)
114
-
115
- cap = cv2.VideoCapture(video_path)
116
- frames = []
117
-
118
- for idx in frame_indices:
119
- cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
120
- ret, frame = cap.read()
121
- if ret:
122
- frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
123
- frames.append(frame_rgb)
124
-
125
- cap.release()
126
-
127
- info = {
128
- 'total_frames': total_frames,
129
- 'extracted_frames': len(frames),
130
- 'fps': fps,
131
- 'duration': duration,
132
- 'frame_indices': frame_indices
133
- }
134
-
135
- return frames, info
136
-
137
- def equirectangular_to_perspective(equirect_img, fov=90, theta=0, phi=0, height=512, width=512):
138
- """Convert equirectangular image to perspective view"""
139
- equ_h, equ_w = equirect_img.shape[:2]
140
-
141
- y, x = np.meshgrid(np.arange(height), np.arange(width), indexing='ij')
142
- x_norm = (2.0 * x / width - 1.0)
143
- y_norm = (2.0 * y / height - 1.0)
144
-
145
- fov_rad = np.radians(fov)
146
- focal = 0.5 * width / np.tan(0.5 * fov_rad)
147
-
148
- z_cam = focal
149
- x_cam = x_norm * width / 2.0
150
- y_cam = y_norm * height / 2.0
151
-
152
- norm = np.sqrt(x_cam**2 + y_cam**2 + z_cam**2)
153
- x_cam /= norm
154
- y_cam /= norm
155
- z_cam /= norm
156
-
157
- theta_rad = np.radians(theta)
158
- phi_rad = np.radians(phi)
159
-
160
- rot_y = np.array([
161
- [np.cos(theta_rad), 0, np.sin(theta_rad)],
162
- [0, 1, 0],
163
- [-np.sin(theta_rad), 0, np.cos(theta_rad)]
164
- ])
165
-
166
- rot_x = np.array([
167
- [1, 0, 0],
168
- [0, np.cos(phi_rad), -np.sin(phi_rad)],
169
- [0, np.sin(phi_rad), np.cos(phi_rad)]
170
- ])
171
-
172
- rot = rot_y @ rot_x
173
- rays = np.stack([x_cam, y_cam, z_cam], axis=-1)
174
- rays_rot = rays @ rot.T
175
-
176
- x_rot = rays_rot[..., 0]
177
- y_rot = rays_rot[..., 1]
178
- z_rot = rays_rot[..., 2]
179
-
180
- lon = np.arctan2(x_rot, z_rot)
181
- lat = np.arcsin(np.clip(y_rot, -1, 1))
182
-
183
- equ_x = (lon / np.pi + 1) * 0.5 * (equ_w - 1)
184
- equ_y = (0.5 - lat / np.pi) * (equ_h - 1)
185
-
186
- equ_x = np.clip(equ_x, 0, equ_w - 1)
187
- equ_y = np.clip(equ_y, 0, equ_h - 1)
188
-
189
- perspective_img = np.zeros((height, width, equirect_img.shape[2]), dtype=equirect_img.dtype)
190
-
191
- for c in range(equirect_img.shape[2]):
192
- perspective_img[..., c] = ndimage.map_coordinates(
193
- equirect_img[..., c],
194
- [equ_y, equ_x],
195
- order=1,
196
- mode='wrap'
197
- )
198
-
199
- return perspective_img
200
-
201
- def estimate_depth_enhanced(image, processor, model):
202
- """Enhanced depth estimation with multi-scale processing"""
203
- inputs = processor(images=image, return_tensors="pt")
204
-
205
- if torch.cuda.is_available():
206
- inputs = {k: v.cuda() for k, v in inputs.items()}
207
-
208
- with torch.no_grad():
209
- outputs = model(**inputs)
210
- predicted_depth = outputs.predicted_depth
211
-
212
- prediction = torch.nn.functional.interpolate(
213
- predicted_depth.unsqueeze(1),
214
- size=image.shape[:2],
215
- mode="bicubic",
216
- align_corners=False,
217
- )
218
-
219
- depth_map = prediction.squeeze().cpu().numpy()
220
- depth_map = multi_scale_depth_refinement(depth_map)
221
- confidence = estimate_depth_confidence(depth_map)
222
-
223
- return depth_map, confidence
224
-
225
- def depth_to_point_cloud_enhanced(depth, color, confidence, camera_params):
226
- """Enhanced point cloud generation with confidence weighting"""
227
- height, width = depth.shape
228
- fx, fy = camera_params['fx'], camera_params['fy']
229
- cx, cy = camera_params['cx'], camera_params['cy']
230
- R_matrix = camera_params.get('R', np.eye(3))
231
- t_vector = camera_params.get('t', np.zeros(3))
232
-
233
- u, v = np.meshgrid(np.arange(width), np.arange(height))
234
-
235
- z = depth
236
- x = (u - cx) * z / fx
237
- y = (v - cy) * z / fy
238
-
239
- points_cam = np.stack([x, y, z], axis=-1)
240
- points_world = points_cam @ R_matrix.T + t_vector
241
-
242
- conf_threshold = np.percentile(confidence, 30)
243
- valid_mask = confidence > conf_threshold
244
-
245
- points = points_world[valid_mask]
246
- colors = color[valid_mask]
247
-
248
- return points, colors
249
-
250
- def create_realistic_mesh(points, colors, progress_callback):
251
- """Create high-quality mesh using Poisson reconstruction"""
252
- progress_callback("🎨 Creating realistic mesh...")
253
-
254
- pcd = o3d.geometry.PointCloud()
255
- pcd.points = o3d.utility.Vector3dVector(points)
256
- pcd.colors = o3d.utility.Vector3dVector(colors / 255.0)
257
-
258
- progress_callback(" • Removing outliers...")
259
- pcd, _ = pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0)
260
-
261
- progress_callback(" • Estimating normals...")
262
- pcd.estimate_normals(
263
- search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.1, max_nn=30)
264
- )
265
- pcd.orient_normals_consistent_tangent_plane(k=15)
266
-
267
- progress_callback(" • Performing Poisson reconstruction...")
268
- mesh, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
269
- pcd, depth=9, width=0, scale=1.1, linear_fit=False
270
- )
271
-
272
- progress_callback(" • Cleaning mesh...")
273
- densities = np.asarray(densities)
274
- density_threshold = np.percentile(densities, 10)
275
- vertices_to_remove = densities < density_threshold
276
- mesh.remove_vertices_by_mask(vertices_to_remove)
277
-
278
- mesh = mesh.filter_smooth_simple(number_of_iterations=5)
279
- mesh.compute_vertex_normals()
280
-
281
- # Transfer colors
282
- mesh_points = np.asarray(mesh.vertices)
283
- pcd_tree = o3d.geometry.KDTreeFlann(pcd)
284
- pcd_colors = np.asarray(pcd.colors)
285
-
286
- mesh_colors = np.zeros_like(mesh_points)
287
- for i, point in enumerate(mesh_points):
288
- [_, idx, _] = pcd_tree.search_knn_vector_3d(point, 1)
289
- mesh_colors[i] = pcd_colors[idx[0]]
290
-
291
- mesh.vertex_colors = o3d.utility.Vector3dVector(mesh_colors)
292
-
293
- return mesh
294
-
295
- def process_video(video_path, num_frames, num_views, quality, progress=gr.Progress()):
296
- """Main processing function optimized for Hugging Face"""
297
- if dpt_model is None:
298
- return None, None, None, "❌ Model not loaded properly", None
299
-
300
- if video_path is None:
301
- return None, None, None, "❌ Please upload a video first", None
302
-
303
- status = []
304
- start_time = time.time()
305
-
306
- def update_status(msg):
307
- status.append(msg)
308
- progress(0.1, desc=msg)
309
- return "\n".join(status)
310
-
311
- try:
312
- status_text = update_status("="*60)
313
- status_text = update_status("🎬 STARTING REALISTIC 3D RECONSTRUCTION")
314
- status_text = update_status("="*60)
315
-
316
- # Check video
317
- cap = cv2.VideoCapture(video_path)
318
- if not cap.isOpened():
319
- return None, None, None, "❌ Cannot open video file", None
320
-
321
- total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
322
- fps = cap.get(cv2.CAP_PROP_FPS)
323
- duration = total_frames / fps if fps > 0 else 0
324
- cap.release()
325
-
326
- status_text = update_status(f"\n📹 Video Info:")
327
- status_text = update_status(f" • Duration: {duration:.1f}s ({total_frames} frames)")
328
- status_text = update_status(f" • FPS: {fps:.1f}")
329
-
330
- # Warn about long videos
331
- if duration > 300:
332
- status_text = update_status(f"\n⚠️ WARNING: Very long video ({duration:.0f}s)")
333
- status_text = update_status(f" • Processing will be slower")
334
- status_text = update_status(f" • Consider using a shorter clip")
335
-
336
- # Extract frames intelligently
337
- status_text = update_status(f"\n📹 Extracting frames intelligently...")
338
- frames, video_info = extract_frames_smart(video_path, num_frames)
339
-
340
- if not frames:
341
- return None, None, None, "❌ Failed to extract frames", None
342
-
343
- status_text = update_status(f"✅ Extracted {len(frames)} frames")
344
- status_text = update_status(f" • Sampling strategy: {'Intelligent (long video)' if duration > 120 else 'Uniform'}")
345
-
346
- preview_img = Image.fromarray(frames[0])
347
-
348
- # Quality settings
349
- quality_configs = {
350
- 'low': {'resolution': 384, 'fov': 90},
351
- 'medium': {'resolution': 512, 'fov': 90},
352
- 'high': {'resolution': 640, 'fov': 85}
353
- }
354
- config = quality_configs[quality]
355
-
356
- status_text = update_status(f"\n⚙️ Settings: {len(frames)} frames × {num_views} views × {config['resolution']}px")
357
-
358
- # Process frames
359
- all_points = []
360
- all_colors = []
361
-
362
- total_views = len(frames) * num_views
363
- processed_views = 0
364
-
365
- for frame_idx, frame in enumerate(frames):
366
- progress((frame_idx + 1) / len(frames), desc=f"Processing frame {frame_idx+1}/{len(frames)}")
367
-
368
- status_text = update_status(f"\n📐 Frame {frame_idx + 1}/{len(frames)}:")
369
-
370
- # Generate view angles
371
- view_angles = [(360.0 / num_views * i, 0) for i in range(num_views)]
372
-
373
- frame_points = []
374
- frame_colors = []
375
-
376
- for view_idx, (theta, phi) in enumerate(view_angles):
377
- # Convert to perspective
378
- persp_img = equirectangular_to_perspective(
379
- frame, fov=config['fov'], theta=theta, phi=phi,
380
- height=config['resolution'], width=config['resolution']
381
- )
382
-
383
- # Depth estimation
384
- depth_map, confidence = estimate_depth_enhanced(persp_img, dpt_processor, dpt_model)
385
-
386
- # Camera params
387
- focal = config['resolution'] / (2 * np.tan(np.radians(config['fov']) / 2))
388
- from scipy.spatial.transform import Rotation as R
389
- rot = R.from_euler('yz', [theta, phi], degrees=True)
390
- R_matrix = rot.as_matrix()
391
-
392
- camera_params = {
393
- 'fx': focal, 'fy': focal,
394
- 'cx': config['resolution'] / 2,
395
- 'cy': config['resolution'] / 2,
396
- 'R': R_matrix,
397
- 't': np.zeros(3)
398
- }
399
-
400
- # Generate points
401
- points, colors = depth_to_point_cloud_enhanced(
402
- depth_map, persp_img, confidence, camera_params
403
- )
404
-
405
- frame_points.append(points)
406
- frame_colors.append(colors)
407
-
408
- processed_views += 1
409
-
410
- if (view_idx + 1) % 2 == 0:
411
- status_text = update_status(f" • Processed {view_idx + 1}/{num_views} views")
412
-
413
- all_points.append(np.vstack(frame_points))
414
- all_colors.append(np.vstack(frame_colors))
415
-
416
- # Combine all
417
- status_text = update_status(f"\n🔗 Combining {len(frames)} frames...")
418
- final_points = np.vstack(all_points)
419
- final_colors = np.vstack(all_colors)
420
-
421
- status_text = update_status(f"✅ Total points: {len(final_points):,}")
422
-
423
- # Filter
424
- status_text = update_status(f"\n🎯 Filtering and cleaning...")
425
-
426
- # Remove duplicates
427
- unique_indices = np.unique(final_points, axis=0, return_index=True)[1]
428
- final_points = final_points[unique_indices]
429
- final_colors = final_colors[unique_indices]
430
-
431
- # Statistical outlier removal
432
- pcd_temp = o3d.geometry.PointCloud()
433
- pcd_temp.points = o3d.utility.Vector3dVector(final_points)
434
- pcd_temp, inlier_indices = pcd_temp.remove_statistical_outlier(nb_neighbors=30, std_ratio=2.0)
435
- final_points = final_points[inlier_indices]
436
- final_colors = final_colors[inlier_indices]
437
-
438
- status_text = update_status(f"✅ Filtered to {len(final_points):,} points")
439
-
440
- # Downsample if huge
441
- if len(final_points) > 500000:
442
- keep_ratio = 500000 / len(final_points)
443
- keep_indices = np.random.choice(len(final_points), size=int(len(final_points) * keep_ratio), replace=False)
444
- final_points = final_points[keep_indices]
445
- final_colors = final_colors[keep_indices]
446
- status_text = update_status(f" • Downsampled to {len(final_points):,} points")
447
-
448
- # Visualization
449
- status_text = update_status(f"\n📊 Creating 3D visualization...")
450
-
451
- vis_sample = min(50000, len(final_points))
452
- vis_indices = np.random.choice(len(final_points), vis_sample, replace=False)
453
- vis_points = final_points[vis_indices]
454
- vis_colors = final_colors[vis_indices]
455
-
456
- fig = go.Figure(data=[go.Scatter3d(
457
- x=vis_points[:, 0], y=vis_points[:, 1], z=vis_points[:, 2],
458
- mode='markers',
459
- marker=dict(
460
- size=2,
461
- color=[f'rgb({int(c[0])},{int(c[1])},{int(c[2])})' for c in vis_colors],
462
- opacity=0.8
463
- )
464
- )])
465
-
466
- fig.update_layout(
467
- title=f"3D Reconstruction ({len(final_points):,} points)",
468
- scene=dict(xaxis_title='X', yaxis_title='Y', zaxis_title='Z', aspectmode='data'),
469
- height=700
470
- )
471
-
472
- # Save point cloud
473
- status_text = update_status(f"\n💾 Saving outputs...")
474
- pcd = o3d.geometry.PointCloud()
475
- pcd.points = o3d.utility.Vector3dVector(final_points)
476
- pcd.colors = o3d.utility.Vector3dVector(final_colors / 255.0)
477
- pcd.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.1, max_nn=30))
478
-
479
- ply_path = Path(tempfile.mkdtemp()) / "reconstruction.ply"
480
- o3d.io.write_point_cloud(str(ply_path), pcd)
481
- status_text = update_status(f"✅ Point cloud saved")
482
-
483
- # Create mesh
484
- obj_path = None
485
- elapsed = time.time() - start_time
486
- if elapsed < 180: # Only if under 3 minutes so far
487
- try:
488
- def mesh_progress(msg):
489
- nonlocal status_text
490
- status_text = update_status(msg)
491
-
492
- mesh = create_realistic_mesh(final_points, final_colors, mesh_progress)
493
- obj_path = Path(tempfile.mkdtemp()) / "reconstruction.obj"
494
- o3d.io.write_triangle_mesh(str(obj_path), mesh)
495
- status_text = update_status(f"✅ Mesh created: {len(mesh.vertices):,} vertices")
496
- except Exception as e:
497
- status_text = update_status(f"⚠️ Mesh generation failed: {str(e)}")
498
- else:
499
- status_text = update_status("⚠️ Mesh skipped (time limit)")
500
-
501
- # Final stats
502
- elapsed = time.time() - start_time
503
- status_text = update_status(f"\n{'='*60}")
504
- status_text = update_status(f"🎉 SUCCESS! Completed in {elapsed:.1f}s")
505
- status_text = update_status(f"📊 Final: {len(final_points):,} points")
506
- status_text = update_status(f"{'='*60}")
507
-
508
- return fig, ply_path, obj_path, status_text, preview_img
509
-
510
- except Exception as e:
511
- import traceback
512
- error_msg = f"❌ ERROR: {str(e)}\n\n{traceback.format_exc()}"
513
- return None, None, None, error_msg, None
514
-
515
- # Create Gradio interface
516
- with gr.Blocks(title="Insta360 3D Reconstruction", theme=gr.themes.Soft()) as demo:
517
- gr.Markdown("""
518
- # 🌍 Insta360 3D Reconstruction
519
- ### Transform 360° videos into realistic 3D models
520
-
521
- **Optimized for videos of any length** - Uses intelligent frame sampling for longer videos
522
- """)
523
-
524
- gr.Markdown("""
525
- ### ⚠️ For 8-Minute Videos:
526
- - Processing will take 10-15 minutes
527
- - Uses intelligent frame sampling (every 15 seconds)
528
- - Recommended: Use lower quality settings first
529
- - Consider trimming to 1-2 minutes for faster results
530
- """)
531
-
532
- with gr.Row():
533
- with gr.Column():
534
- video_input = gr.Video(label="Upload 360° Video")
535
-
536
- with gr.Accordion("⚙️ Settings", open=True):
537
- num_frames = gr.Slider(
538
- minimum=4, maximum=12, value=6, step=2,
539
- label="Target Frames (auto-adjusted for long videos)"
540
- )
541
- num_views = gr.Slider(
542
- minimum=4, maximum=8, value=6, step=2,
543
- label="Views per Frame"
544
- )
545
- quality = gr.Radio(
546
- choices=['low', 'medium', 'high'],
547
- value='medium',
548
- label="Quality (Start with 'medium' for 8-min videos)"
549
- )
550
-
551
- process_btn = gr.Button("🚀 Start Reconstruction", variant="primary", size="lg")
552
-
553
- with gr.Column():
554
- status_output = gr.Textbox(label="Processing Status", lines=20, max_lines=25)
555
- preview_output = gr.Image(label="Video Preview")
556
-
557
- with gr.Row():
558
- visualization_output = gr.Plot(label="3D Visualization")
559
-
560
- with gr.Row():
561
- ply_output = gr.File(label="📦 Download Point Cloud (.ply)")
562
- obj_output = gr.File(label="📦 Download Mesh (.obj)")
563
-
564
- process_btn.click(
565
- fn=process_video,
566
- inputs=[video_input, num_frames, num_views, quality],
567
- outputs=[visualization_output, ply_output, obj_output, status_output, preview_output]
568
- )
569
-
570
- gr.Markdown("""
571
- ### 💡 Tips for Best Results
572
-
573
- **For 8-minute videos:**
574
- - Start with Medium quality (faster)
575
- - Uses intelligent sampling (~ every 15 seconds)
576
- - Total processing: 10-15 minutes
577
- - Or trim to 1-2 minutes for 3-5 min processing
578
-
579
- **Quality Guide:**
580
- - **Low**: 2-4 min (quick preview)
581
- - **Medium**: 5-10 min (good balance)
582
- - **High**: 10-20 min (best quality)
583
-
584
- **Video Requirements:**
585
- - Format: MP4 (equirectangular 360°)
586
- - Aspect Ratio: 2:1
587
- - Any length (optimized for long videos)
588
- """)
589
-
590
- if __name__ == "__main__":
591
- demo.launch()