Tohru127 commited on
Commit
f271a29
Β·
verified Β·
1 Parent(s): a11860d

Upload 2 files

Browse files
Files changed (2) hide show
  1. app-8.py +593 -0
  2. requirements-6.txt +10 -0
app-8.py ADDED
@@ -0,0 +1,593 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Insta360 3D Reconstruction - Hugging Face Space Version
3
+ Optimized for longer videos with intelligent frame sampling
4
+ """
5
+
6
+ import gradio as gr
7
+ import numpy as np
8
+ import torch
9
+ from PIL import Image
10
+ from transformers import DPTForDepthEstimation, DPTImageProcessor
11
+ import open3d as o3d
12
+ import plotly.graph_objects as go
13
+ import cv2
14
+ import tempfile
15
+ from pathlib import Path
16
+ import time
17
+ import warnings
18
+ from scipy import ndimage
19
+ from scipy.ndimage import gaussian_filter
20
+
21
+ warnings.filterwarnings('ignore')
22
+
23
+ # Load model
24
+ print("πŸ”„ Loading depth estimation model...")
25
+ try:
26
+ dpt_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
27
+ dpt_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
28
+ if torch.cuda.is_available():
29
+ dpt_model = dpt_model.cuda()
30
+ print("βœ“ GPU detected and enabled")
31
+ else:
32
+ print("β„Ή Running on CPU (slower but works)")
33
+ dpt_model.eval()
34
+ print("βœ… Model loaded successfully!")
35
+ except Exception as e:
36
+ print(f"❌ Error loading model: {e}")
37
+ dpt_processor = None
38
+ dpt_model = None
39
+
40
+ # Enhanced depth processing functions
41
+ def bilateral_filter_depth(depth_map, d=9, sigma_color=75, sigma_space=75):
42
+ """Apply bilateral filter to preserve edges while smoothing depth"""
43
+ depth_norm = ((depth_map - depth_map.min()) / (depth_map.max() - depth_map.min()) * 255).astype(np.uint8)
44
+ filtered = cv2.bilateralFilter(depth_norm, d, sigma_color, sigma_space)
45
+ filtered = filtered.astype(np.float32) / 255.0
46
+ filtered = filtered * (depth_map.max() - depth_map.min()) + depth_map.min()
47
+ return filtered
48
+
49
+ def multi_scale_depth_refinement(depth_map, scales=[1.0, 0.5]):
50
+ """Process depth at multiple scales and fuse"""
51
+ h, w = depth_map.shape
52
+ refined_depths = []
53
+ weights = []
54
+
55
+ for scale in scales:
56
+ if scale == 1.0:
57
+ scaled_depth = depth_map
58
+ else:
59
+ new_h, new_w = int(h * scale), int(w * scale)
60
+ scaled_depth = cv2.resize(depth_map, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
61
+ scaled_depth = cv2.resize(scaled_depth, (w, h), interpolation=cv2.INTER_LINEAR)
62
+
63
+ filtered_depth = bilateral_filter_depth(scaled_depth)
64
+ refined_depths.append(filtered_depth)
65
+ weights.append(scale)
66
+
67
+ weights = np.array(weights)
68
+ weights = weights / weights.sum()
69
+
70
+ final_depth = np.zeros_like(depth_map)
71
+ for depth, weight in zip(refined_depths, weights):
72
+ final_depth += depth * weight
73
+
74
+ return final_depth
75
+
76
+ def estimate_depth_confidence(depth_map):
77
+ """Estimate confidence map based on depth consistency"""
78
+ grad_x = cv2.Sobel(depth_map, cv2.CV_64F, 1, 0, ksize=3)
79
+ grad_y = cv2.Sobel(depth_map, cv2.CV_64F, 0, 1, ksize=3)
80
+ grad_mag = np.sqrt(grad_x**2 + grad_y**2)
81
+ confidence = 1.0 / (1.0 + grad_mag / grad_mag.max())
82
+ confidence = gaussian_filter(confidence, sigma=2)
83
+ return confidence
84
+
85
+ def intelligent_frame_sampling(video_path, target_frames=6, max_frames=100):
86
+ """
87
+ Intelligently sample frames from video based on motion and content
88
+ For long videos, this prevents processing too many similar frames
89
+ """
90
+ cap = cv2.VideoCapture(video_path)
91
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
92
+ fps = cap.get(cv2.CAP_PROP_FPS)
93
+ duration = total_frames / fps if fps > 0 else 0
94
+
95
+ # For very long videos, sample more intelligently
96
+ if duration > 120: # 2 minutes
97
+ # Sample every N seconds instead of uniformly
98
+ sample_interval = max(int(fps * 15), 1) # Every 15 seconds
99
+ frame_indices = list(range(0, total_frames, sample_interval))
100
+ else:
101
+ # Uniform sampling
102
+ frame_indices = np.linspace(0, total_frames - 1, min(target_frames, total_frames), dtype=int)
103
+
104
+ # Limit to max_frames to prevent timeout
105
+ if len(frame_indices) > max_frames:
106
+ frame_indices = frame_indices[::len(frame_indices)//max_frames][:max_frames]
107
+
108
+ cap.release()
109
+ return frame_indices, total_frames, fps, duration
110
+
111
+ def extract_frames_smart(video_path, target_frames=6):
112
+ """Extract frames intelligently based on video length"""
113
+ frame_indices, total_frames, fps, duration = intelligent_frame_sampling(video_path, target_frames)
114
+
115
+ cap = cv2.VideoCapture(video_path)
116
+ frames = []
117
+
118
+ for idx in frame_indices:
119
+ cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
120
+ ret, frame = cap.read()
121
+ if ret:
122
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
123
+ frames.append(frame_rgb)
124
+
125
+ cap.release()
126
+
127
+ info = {
128
+ 'total_frames': total_frames,
129
+ 'extracted_frames': len(frames),
130
+ 'fps': fps,
131
+ 'duration': duration,
132
+ 'frame_indices': frame_indices
133
+ }
134
+
135
+ return frames, info
136
+
137
+ def equirectangular_to_perspective(equirect_img, fov=90, theta=0, phi=0, height=512, width=512):
138
+ """Convert equirectangular image to perspective view"""
139
+ equ_h, equ_w = equirect_img.shape[:2]
140
+
141
+ y, x = np.meshgrid(np.arange(height), np.arange(width), indexing='ij')
142
+ x_norm = (2.0 * x / width - 1.0)
143
+ y_norm = (2.0 * y / height - 1.0)
144
+
145
+ fov_rad = np.radians(fov)
146
+ focal = 0.5 * width / np.tan(0.5 * fov_rad)
147
+
148
+ z_cam = focal
149
+ x_cam = x_norm * width / 2.0
150
+ y_cam = y_norm * height / 2.0
151
+
152
+ norm = np.sqrt(x_cam**2 + y_cam**2 + z_cam**2)
153
+ x_cam /= norm
154
+ y_cam /= norm
155
+ z_cam /= norm
156
+
157
+ theta_rad = np.radians(theta)
158
+ phi_rad = np.radians(phi)
159
+
160
+ rot_y = np.array([
161
+ [np.cos(theta_rad), 0, np.sin(theta_rad)],
162
+ [0, 1, 0],
163
+ [-np.sin(theta_rad), 0, np.cos(theta_rad)]
164
+ ])
165
+
166
+ rot_x = np.array([
167
+ [1, 0, 0],
168
+ [0, np.cos(phi_rad), -np.sin(phi_rad)],
169
+ [0, np.sin(phi_rad), np.cos(phi_rad)]
170
+ ])
171
+
172
+ rot = rot_y @ rot_x
173
+ rays = np.stack([x_cam, y_cam, z_cam], axis=-1)
174
+ rays_rot = rays @ rot.T
175
+
176
+ x_rot = rays_rot[..., 0]
177
+ y_rot = rays_rot[..., 1]
178
+ z_rot = rays_rot[..., 2]
179
+
180
+ lon = np.arctan2(x_rot, z_rot)
181
+ lat = np.arcsin(np.clip(y_rot, -1, 1))
182
+
183
+ equ_x = (lon / np.pi + 1) * 0.5 * (equ_w - 1)
184
+ equ_y = (0.5 - lat / np.pi) * (equ_h - 1)
185
+
186
+ equ_x = np.clip(equ_x, 0, equ_w - 1)
187
+ equ_y = np.clip(equ_y, 0, equ_h - 1)
188
+
189
+ perspective_img = np.zeros((height, width, equirect_img.shape[2]), dtype=equirect_img.dtype)
190
+
191
+ for c in range(equirect_img.shape[2]):
192
+ perspective_img[..., c] = ndimage.map_coordinates(
193
+ equirect_img[..., c],
194
+ [equ_y, equ_x],
195
+ order=1,
196
+ mode='wrap'
197
+ )
198
+
199
+ return perspective_img
200
+
201
+ def estimate_depth_enhanced(image, processor, model):
202
+ """Enhanced depth estimation with multi-scale processing"""
203
+ inputs = processor(images=image, return_tensors="pt")
204
+
205
+ if torch.cuda.is_available():
206
+ inputs = {k: v.cuda() for k, v in inputs.items()}
207
+
208
+ with torch.no_grad():
209
+ outputs = model(**inputs)
210
+ predicted_depth = outputs.predicted_depth
211
+
212
+ prediction = torch.nn.functional.interpolate(
213
+ predicted_depth.unsqueeze(1),
214
+ size=image.shape[:2],
215
+ mode="bicubic",
216
+ align_corners=False,
217
+ )
218
+
219
+ depth_map = prediction.squeeze().cpu().numpy()
220
+ depth_map = multi_scale_depth_refinement(depth_map)
221
+ confidence = estimate_depth_confidence(depth_map)
222
+
223
+ return depth_map, confidence
224
+
225
+ def depth_to_point_cloud_enhanced(depth, color, confidence, camera_params):
226
+ """Enhanced point cloud generation with confidence weighting"""
227
+ height, width = depth.shape
228
+ fx, fy = camera_params['fx'], camera_params['fy']
229
+ cx, cy = camera_params['cx'], camera_params['cy']
230
+ R_matrix = camera_params.get('R', np.eye(3))
231
+ t_vector = camera_params.get('t', np.zeros(3))
232
+
233
+ u, v = np.meshgrid(np.arange(width), np.arange(height))
234
+
235
+ z = depth
236
+ x = (u - cx) * z / fx
237
+ y = (v - cy) * z / fy
238
+
239
+ points_cam = np.stack([x, y, z], axis=-1)
240
+ points_world = points_cam @ R_matrix.T + t_vector
241
+
242
+ conf_threshold = np.percentile(confidence, 30)
243
+ valid_mask = confidence > conf_threshold
244
+
245
+ points = points_world[valid_mask]
246
+ colors = color[valid_mask]
247
+
248
+ return points, colors
249
+
250
+ def create_realistic_mesh(points, colors, progress_callback):
251
+ """Create high-quality mesh using Poisson reconstruction"""
252
+ progress_callback("🎨 Creating realistic mesh...")
253
+
254
+ pcd = o3d.geometry.PointCloud()
255
+ pcd.points = o3d.utility.Vector3dVector(points)
256
+ pcd.colors = o3d.utility.Vector3dVector(colors / 255.0)
257
+
258
+ progress_callback(" β€’ Removing outliers...")
259
+ pcd, _ = pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0)
260
+
261
+ progress_callback(" β€’ Estimating normals...")
262
+ pcd.estimate_normals(
263
+ search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.1, max_nn=30)
264
+ )
265
+ pcd.orient_normals_consistent_tangent_plane(k=15)
266
+
267
+ progress_callback(" β€’ Performing Poisson reconstruction...")
268
+ mesh, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
269
+ pcd, depth=9, width=0, scale=1.1, linear_fit=False
270
+ )
271
+
272
+ progress_callback(" β€’ Cleaning mesh...")
273
+ densities = np.asarray(densities)
274
+ density_threshold = np.percentile(densities, 10)
275
+ vertices_to_remove = densities < density_threshold
276
+ mesh.remove_vertices_by_mask(vertices_to_remove)
277
+
278
+ mesh = mesh.filter_smooth_simple(number_of_iterations=5)
279
+ mesh.compute_vertex_normals()
280
+
281
+ # Transfer colors
282
+ mesh_points = np.asarray(mesh.vertices)
283
+ pcd_tree = o3d.geometry.KDTreeFlann(pcd)
284
+ pcd_colors = np.asarray(pcd.colors)
285
+
286
+ mesh_colors = np.zeros_like(mesh_points)
287
+ for i, point in enumerate(mesh_points):
288
+ [_, idx, _] = pcd_tree.search_knn_vector_3d(point, 1)
289
+ mesh_colors[i] = pcd_colors[idx[0]]
290
+
291
+ mesh.vertex_colors = o3d.utility.Vector3dVector(mesh_colors)
292
+
293
+ return mesh
294
+
295
+ def process_video(video_path, num_frames, num_views, quality, progress=gr.Progress()):
296
+ """Main processing function optimized for Hugging Face"""
297
+ if dpt_model is None:
298
+ return None, None, None, "❌ Model not loaded properly", None
299
+
300
+ if video_path is None:
301
+ return None, None, None, "❌ Please upload a video first", None
302
+
303
+ status = []
304
+ start_time = time.time()
305
+
306
+ def update_status(msg):
307
+ status.append(msg)
308
+ progress(0.1, desc=msg)
309
+ return "\n".join(status)
310
+
311
+ try:
312
+ status_text = update_status("="*60)
313
+ status_text = update_status("🎬 STARTING REALISTIC 3D RECONSTRUCTION")
314
+ status_text = update_status("="*60)
315
+
316
+ # Check video
317
+ cap = cv2.VideoCapture(video_path)
318
+ if not cap.isOpened():
319
+ return None, None, None, "❌ Cannot open video file", None
320
+
321
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
322
+ fps = cap.get(cv2.CAP_PROP_FPS)
323
+ duration = total_frames / fps if fps > 0 else 0
324
+ cap.release()
325
+
326
+ status_text = update_status(f"\nπŸ“Ή Video Info:")
327
+ status_text = update_status(f" β€’ Duration: {duration:.1f}s ({total_frames} frames)")
328
+ status_text = update_status(f" β€’ FPS: {fps:.1f}")
329
+
330
+ # Warn about long videos
331
+ if duration > 300:
332
+ status_text = update_status(f"\n⚠️ WARNING: Very long video ({duration:.0f}s)")
333
+ status_text = update_status(f" β€’ Processing will be slower")
334
+ status_text = update_status(f" β€’ Consider using a shorter clip")
335
+
336
+ # Extract frames intelligently
337
+ status_text = update_status(f"\nπŸ“Ή Extracting frames intelligently...")
338
+ frames, video_info = extract_frames_smart(video_path, num_frames)
339
+
340
+ if not frames:
341
+ return None, None, None, "❌ Failed to extract frames", None
342
+
343
+ status_text = update_status(f"βœ… Extracted {len(frames)} frames")
344
+ status_text = update_status(f" β€’ Sampling strategy: {'Intelligent (long video)' if duration > 120 else 'Uniform'}")
345
+
346
+ preview_img = Image.fromarray(frames[0])
347
+
348
+ # Quality settings
349
+ quality_configs = {
350
+ 'low': {'resolution': 384, 'fov': 90},
351
+ 'medium': {'resolution': 512, 'fov': 90},
352
+ 'high': {'resolution': 640, 'fov': 85}
353
+ }
354
+ config = quality_configs[quality]
355
+
356
+ status_text = update_status(f"\nβš™οΈ Settings: {len(frames)} frames Γ— {num_views} views Γ— {config['resolution']}px")
357
+
358
+ # Process frames
359
+ all_points = []
360
+ all_colors = []
361
+
362
+ total_views = len(frames) * num_views
363
+ processed_views = 0
364
+
365
+ for frame_idx, frame in enumerate(frames):
366
+ progress((frame_idx + 1) / len(frames), desc=f"Processing frame {frame_idx+1}/{len(frames)}")
367
+
368
+ status_text = update_status(f"\nπŸ“ Frame {frame_idx + 1}/{len(frames)}:")
369
+
370
+ # Generate view angles
371
+ view_angles = [(360.0 / num_views * i, 0) for i in range(num_views)]
372
+
373
+ frame_points = []
374
+ frame_colors = []
375
+
376
+ for view_idx, (theta, phi) in enumerate(view_angles):
377
+ # Convert to perspective
378
+ persp_img = equirectangular_to_perspective(
379
+ frame, fov=config['fov'], theta=theta, phi=phi,
380
+ height=config['resolution'], width=config['resolution']
381
+ )
382
+
383
+ # Depth estimation
384
+ depth_map, confidence = estimate_depth_enhanced(persp_img, dpt_processor, dpt_model)
385
+
386
+ # Camera params
387
+ focal = config['resolution'] / (2 * np.tan(np.radians(config['fov']) / 2))
388
+ from scipy.spatial.transform import Rotation as R
389
+ rot = R.from_euler('yz', [theta, phi], degrees=True)
390
+ R_matrix = rot.as_matrix()
391
+
392
+ camera_params = {
393
+ 'fx': focal, 'fy': focal,
394
+ 'cx': config['resolution'] / 2,
395
+ 'cy': config['resolution'] / 2,
396
+ 'R': R_matrix,
397
+ 't': np.zeros(3)
398
+ }
399
+
400
+ # Generate points
401
+ points, colors = depth_to_point_cloud_enhanced(
402
+ depth_map, persp_img, confidence, camera_params
403
+ )
404
+
405
+ frame_points.append(points)
406
+ frame_colors.append(colors)
407
+
408
+ processed_views += 1
409
+
410
+ if (view_idx + 1) % 2 == 0:
411
+ status_text = update_status(f" β€’ Processed {view_idx + 1}/{num_views} views")
412
+
413
+ all_points.append(np.vstack(frame_points))
414
+ all_colors.append(np.vstack(frame_colors))
415
+
416
+ # Combine all
417
+ status_text = update_status(f"\nπŸ”— Combining {len(frames)} frames...")
418
+ final_points = np.vstack(all_points)
419
+ final_colors = np.vstack(all_colors)
420
+
421
+ status_text = update_status(f"βœ… Total points: {len(final_points):,}")
422
+
423
+ # Filter
424
+ status_text = update_status(f"\n🎯 Filtering and cleaning...")
425
+
426
+ # Remove duplicates
427
+ unique_indices = np.unique(final_points, axis=0, return_index=True)[1]
428
+ final_points = final_points[unique_indices]
429
+ final_colors = final_colors[unique_indices]
430
+
431
+ # Statistical outlier removal
432
+ pcd_temp = o3d.geometry.PointCloud()
433
+ pcd_temp.points = o3d.utility.Vector3dVector(final_points)
434
+ pcd_temp, inlier_indices = pcd_temp.remove_statistical_outlier(nb_neighbors=30, std_ratio=2.0)
435
+ final_points = final_points[inlier_indices]
436
+ final_colors = final_colors[inlier_indices]
437
+
438
+ status_text = update_status(f"βœ… Filtered to {len(final_points):,} points")
439
+
440
+ # Downsample if huge
441
+ if len(final_points) > 500000:
442
+ keep_ratio = 500000 / len(final_points)
443
+ keep_indices = np.random.choice(len(final_points), size=int(len(final_points) * keep_ratio), replace=False)
444
+ final_points = final_points[keep_indices]
445
+ final_colors = final_colors[keep_indices]
446
+ status_text = update_status(f" β€’ Downsampled to {len(final_points):,} points")
447
+
448
+ # Visualization
449
+ status_text = update_status(f"\nπŸ“Š Creating 3D visualization...")
450
+
451
+ vis_sample = min(50000, len(final_points))
452
+ vis_indices = np.random.choice(len(final_points), vis_sample, replace=False)
453
+ vis_points = final_points[vis_indices]
454
+ vis_colors = final_colors[vis_indices]
455
+
456
+ fig = go.Figure(data=[go.Scatter3d(
457
+ x=vis_points[:, 0], y=vis_points[:, 1], z=vis_points[:, 2],
458
+ mode='markers',
459
+ marker=dict(
460
+ size=2,
461
+ color=[f'rgb({int(c[0])},{int(c[1])},{int(c[2])})' for c in vis_colors],
462
+ opacity=0.8
463
+ )
464
+ )])
465
+
466
+ fig.update_layout(
467
+ title=f"3D Reconstruction ({len(final_points):,} points)",
468
+ scene=dict(xaxis_title='X', yaxis_title='Y', zaxis_title='Z', aspectmode='data'),
469
+ height=700
470
+ )
471
+
472
+ # Save point cloud
473
+ status_text = update_status(f"\nπŸ’Ύ Saving outputs...")
474
+ pcd = o3d.geometry.PointCloud()
475
+ pcd.points = o3d.utility.Vector3dVector(final_points)
476
+ pcd.colors = o3d.utility.Vector3dVector(final_colors / 255.0)
477
+ pcd.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.1, max_nn=30))
478
+
479
+ ply_path = Path(tempfile.mkdtemp()) / "reconstruction.ply"
480
+ o3d.io.write_point_cloud(str(ply_path), pcd)
481
+ ply_path = str(ply_path) # Convert Path to string for Gradio
482
+ status_text = update_status(f"βœ… Point cloud saved")
483
+
484
+ # Create mesh
485
+ obj_path = None
486
+ elapsed = time.time() - start_time
487
+ if elapsed < 180: # Only if under 3 minutes so far
488
+ try:
489
+ def mesh_progress(msg):
490
+ nonlocal status_text
491
+ status_text = update_status(msg)
492
+
493
+ mesh = create_realistic_mesh(final_points, final_colors, mesh_progress)
494
+ obj_path = Path(tempfile.mkdtemp()) / "reconstruction.obj"
495
+ o3d.io.write_triangle_mesh(str(obj_path), mesh)
496
+ obj_path = str(obj_path) # Convert Path to string for Gradio
497
+ status_text = update_status(f"βœ… Mesh created: {len(mesh.vertices):,} vertices")
498
+ except Exception as e:
499
+ status_text = update_status(f"⚠️ Mesh generation failed: {str(e)}")
500
+ else:
501
+ status_text = update_status("⚠️ Mesh skipped (time limit)")
502
+
503
+ # Final stats
504
+ elapsed = time.time() - start_time
505
+ status_text = update_status(f"\n{'='*60}")
506
+ status_text = update_status(f"πŸŽ‰ SUCCESS! Completed in {elapsed:.1f}s")
507
+ status_text = update_status(f"πŸ“Š Final: {len(final_points):,} points")
508
+ status_text = update_status(f"{'='*60}")
509
+
510
+ return fig, ply_path, obj_path, status_text, preview_img
511
+
512
+ except Exception as e:
513
+ import traceback
514
+ error_msg = f"❌ ERROR: {str(e)}\n\n{traceback.format_exc()}"
515
+ return None, None, None, error_msg, None
516
+
517
+ # Create Gradio interface
518
+ with gr.Blocks(title="Insta360 3D Reconstruction", theme=gr.themes.Soft()) as demo:
519
+ gr.Markdown("""
520
+ # 🌍 Insta360 3D Reconstruction
521
+ ### Transform 360Β° videos into realistic 3D models
522
+
523
+ **Optimized for videos of any length** - Uses intelligent frame sampling for longer videos
524
+ """)
525
+
526
+ gr.Markdown("""
527
+ ### ⚠️ For 8-Minute Videos:
528
+ - Processing will take 10-15 minutes
529
+ - Uses intelligent frame sampling (every 15 seconds)
530
+ - Recommended: Use lower quality settings first
531
+ - Consider trimming to 1-2 minutes for faster results
532
+ """)
533
+
534
+ with gr.Row():
535
+ with gr.Column():
536
+ video_input = gr.Video(label="Upload 360Β° Video")
537
+
538
+ with gr.Accordion("βš™οΈ Settings", open=True):
539
+ num_frames = gr.Slider(
540
+ minimum=4, maximum=12, value=6, step=2,
541
+ label="Target Frames (auto-adjusted for long videos)"
542
+ )
543
+ num_views = gr.Slider(
544
+ minimum=4, maximum=8, value=6, step=2,
545
+ label="Views per Frame"
546
+ )
547
+ quality = gr.Radio(
548
+ choices=['low', 'medium', 'high'],
549
+ value='medium',
550
+ label="Quality (Start with 'medium' for 8-min videos)"
551
+ )
552
+
553
+ process_btn = gr.Button("πŸš€ Start Reconstruction", variant="primary", size="lg")
554
+
555
+ with gr.Column():
556
+ status_output = gr.Textbox(label="Processing Status", lines=20, max_lines=25)
557
+ preview_output = gr.Image(label="Video Preview")
558
+
559
+ with gr.Row():
560
+ visualization_output = gr.Plot(label="3D Visualization")
561
+
562
+ with gr.Row():
563
+ ply_output = gr.File(label="πŸ“¦ Download Point Cloud (.ply)")
564
+ obj_output = gr.File(label="πŸ“¦ Download Mesh (.obj)")
565
+
566
+ process_btn.click(
567
+ fn=process_video,
568
+ inputs=[video_input, num_frames, num_views, quality],
569
+ outputs=[visualization_output, ply_output, obj_output, status_output, preview_output]
570
+ )
571
+
572
+ gr.Markdown("""
573
+ ### πŸ’‘ Tips for Best Results
574
+
575
+ **For 8-minute videos:**
576
+ - Start with Medium quality (faster)
577
+ - Uses intelligent sampling (~ every 15 seconds)
578
+ - Total processing: 10-15 minutes
579
+ - Or trim to 1-2 minutes for 3-5 min processing
580
+
581
+ **Quality Guide:**
582
+ - **Low**: 2-4 min (quick preview)
583
+ - **Medium**: 5-10 min (good balance)
584
+ - **High**: 10-20 min (best quality)
585
+
586
+ **Video Requirements:**
587
+ - Format: MP4 (equirectangular 360Β°)
588
+ - Aspect Ratio: 2:1
589
+ - Any length (optimized for long videos)
590
+ """)
591
+
592
+ if __name__ == "__main__":
593
+ demo.launch()
requirements-6.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio==4.8.0
2
+ torch==2.1.2
3
+ torchvision==0.16.2
4
+ transformers==4.37.0
5
+ open3d==0.18.0
6
+ plotly==5.18.0
7
+ opencv-python-headless==4.8.1.78
8
+ scipy==1.11.4
9
+ numpy==1.24.3
10
+ pillow==10.1.0