|
|
""" |
|
|
Insta360 Video Complete 3D Reconstruction with Responsible AI Features (OPTIMIZED) |
|
|
|
|
|
This tool processes Insta360 360-degree videos to create complete 3D reconstructions |
|
|
by extracting frames, estimating depth from multiple viewpoints, and fusing point clouds. |
|
|
|
|
|
OPTIMIZATIONS: |
|
|
- Reduced default processing parameters |
|
|
- Added timeout handling |
|
|
- Batch processing for efficiency |
|
|
- Progress tracking |
|
|
- Early stopping options |
|
|
""" |
|
|
|
|
|
import gradio as gr |
|
|
import numpy as np |
|
|
import torch |
|
|
from PIL import Image |
|
|
from transformers import DPTForDepthEstimation, DPTImageProcessor |
|
|
import open3d as o3d |
|
|
import plotly.graph_objects as go |
|
|
import matplotlib.pyplot as plt |
|
|
import io |
|
|
import json |
|
|
import time |
|
|
from pathlib import Path |
|
|
import tempfile |
|
|
import zipfile |
|
|
import hashlib |
|
|
from datetime import datetime |
|
|
import cv2 |
|
|
from scipy.spatial.transform import Rotation as R |
|
|
from scipy import ndimage |
|
|
import warnings |
|
|
warnings.filterwarnings('ignore') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
RESPONSIBLE_AI_NOTICE = """ |
|
|
## β οΈ Responsible Use Guidelines for 360Β° Video Reconstruction |
|
|
|
|
|
### Privacy & Consent |
|
|
- **Do not upload videos containing identifiable people without their explicit consent** |
|
|
- **Do not use for surveillance, tracking, or monitoring individuals** |
|
|
- 360Β° videos capture wide areas - extra privacy considerations apply |
|
|
- Remove metadata that may contain location or personal information |
|
|
- Consider privacy of all individuals visible in 360Β° footage |
|
|
|
|
|
### Ethical Use |
|
|
- This tool is for **educational, research, and creative purposes only** |
|
|
- **Prohibited uses:** |
|
|
- Creating misleading 3D reconstructions |
|
|
- Unauthorized documentation of private property |
|
|
- Circumventing security systems |
|
|
- Surveillance or tracking applications |
|
|
- Commercial use without proper rights to source videos |
|
|
|
|
|
### Limitations & Bias |
|
|
- Models trained primarily on standard camera perspectives |
|
|
- 360Β° content may have distortions at poles (top/bottom) |
|
|
- Scale is relative, not absolute |
|
|
- Reconstruction quality depends on camera motion and scene complexity |
|
|
|
|
|
### Data Usage |
|
|
- Videos are processed locally during your session |
|
|
- No videos are stored or transmitted to external servers |
|
|
- You retain all rights to your uploaded videos and generated 3D models |
|
|
|
|
|
**By using this tool, you agree to these responsible use guidelines.** |
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def check_video_safety(video_path): |
|
|
"""Basic safety checks for uploaded videos""" |
|
|
warnings = [] |
|
|
|
|
|
cap = cv2.VideoCapture(video_path) |
|
|
if not cap.isOpened(): |
|
|
return False, "Unable to open video file" |
|
|
|
|
|
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) |
|
|
fps = cap.get(cv2.CAP_PROP_FPS) |
|
|
duration = frame_count / fps if fps > 0 else 0 |
|
|
|
|
|
if duration > 300: |
|
|
warnings.append("β οΈ Very long video - processing may take significant time. Consider using shorter clips.") |
|
|
|
|
|
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) |
|
|
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) |
|
|
|
|
|
aspect_ratio = width / height |
|
|
if 1.8 < aspect_ratio < 2.2: |
|
|
warnings.append("β Detected equirectangular 360Β° format") |
|
|
else: |
|
|
warnings.append("β οΈ Video aspect ratio suggests this may not be 360Β° footage") |
|
|
|
|
|
cap.release() |
|
|
|
|
|
return True, "\n".join(warnings) if warnings else "Video checks passed" |
|
|
|
|
|
def generate_session_id(): |
|
|
"""Generate anonymous session ID for logging""" |
|
|
return hashlib.sha256(str(datetime.now()).encode()).hexdigest()[:16] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("Loading DPT depth estimation model (optimized for 360Β°)...") |
|
|
try: |
|
|
dpt_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large") |
|
|
dpt_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large") |
|
|
if torch.cuda.is_available(): |
|
|
dpt_model = dpt_model.cuda() |
|
|
print("β Using GPU acceleration") |
|
|
dpt_model.eval() |
|
|
print("β DPT model loaded successfully!") |
|
|
except Exception as e: |
|
|
print(f"Error loading model: {e}") |
|
|
dpt_processor = None |
|
|
dpt_model = None |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def extract_frames_from_video(video_path, max_frames=30, sample_method='uniform'): |
|
|
""" |
|
|
Extract frames from video for reconstruction |
|
|
|
|
|
Args: |
|
|
video_path: Path to video file |
|
|
max_frames: Maximum number of frames to extract |
|
|
sample_method: 'uniform' or 'keyframe' |
|
|
""" |
|
|
cap = cv2.VideoCapture(video_path) |
|
|
if not cap.isOpened(): |
|
|
return None, "Failed to open video" |
|
|
|
|
|
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) |
|
|
fps = cap.get(cv2.CAP_PROP_FPS) |
|
|
|
|
|
frames = [] |
|
|
frame_indices = [] |
|
|
|
|
|
if sample_method == 'uniform': |
|
|
|
|
|
step = max(1, frame_count // max_frames) |
|
|
indices = range(0, frame_count, step)[:max_frames] |
|
|
else: |
|
|
|
|
|
indices = np.linspace(0, frame_count - 1, max_frames, dtype=int) |
|
|
|
|
|
for idx in indices: |
|
|
cap.set(cv2.CAP_PROP_POS_FRAMES, idx) |
|
|
ret, frame = cap.read() |
|
|
if ret: |
|
|
|
|
|
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) |
|
|
frames.append(frame_rgb) |
|
|
frame_indices.append(idx) |
|
|
|
|
|
cap.release() |
|
|
|
|
|
info = { |
|
|
'total_frames': frame_count, |
|
|
'extracted_frames': len(frames), |
|
|
'fps': fps, |
|
|
'duration': frame_count / fps if fps > 0 else 0, |
|
|
'frame_indices': frame_indices |
|
|
} |
|
|
|
|
|
return frames, info |
|
|
|
|
|
def equirectangular_to_perspective(equirect_img, fov=90, theta=0, phi=0, height=512, width=512): |
|
|
""" |
|
|
Convert equirectangular image to perspective view |
|
|
|
|
|
Args: |
|
|
equirect_img: Equirectangular image (H, W, 3) |
|
|
fov: Field of view in degrees |
|
|
theta: Horizontal rotation (azimuth) in degrees |
|
|
phi: Vertical rotation (elevation) in degrees |
|
|
height, width: Output image size |
|
|
""" |
|
|
equ_h, equ_w = equirect_img.shape[:2] |
|
|
|
|
|
|
|
|
y, x = np.meshgrid(np.arange(height), np.arange(width), indexing='ij') |
|
|
|
|
|
|
|
|
x_norm = (2.0 * x / width - 1.0) |
|
|
y_norm = (2.0 * y / height - 1.0) |
|
|
|
|
|
|
|
|
fov_rad = np.radians(fov) |
|
|
focal = 0.5 * width / np.tan(0.5 * fov_rad) |
|
|
|
|
|
|
|
|
z = focal |
|
|
x_3d = x_norm * width |
|
|
y_3d = y_norm * height |
|
|
|
|
|
|
|
|
norm = np.sqrt(x_3d**2 + y_3d**2 + z**2) |
|
|
x_3d /= norm |
|
|
y_3d /= norm |
|
|
z_3d = z / norm |
|
|
|
|
|
|
|
|
theta_rad = np.radians(theta) |
|
|
phi_rad = np.radians(phi) |
|
|
|
|
|
|
|
|
x_rot = x_3d * np.cos(theta_rad) + z_3d * np.sin(theta_rad) |
|
|
y_rot = y_3d |
|
|
z_rot = -x_3d * np.sin(theta_rad) + z_3d * np.cos(theta_rad) |
|
|
|
|
|
|
|
|
x_final = x_rot |
|
|
y_final = y_rot * np.cos(phi_rad) - z_rot * np.sin(phi_rad) |
|
|
z_final = y_rot * np.sin(phi_rad) + z_rot * np.cos(phi_rad) |
|
|
|
|
|
|
|
|
longitude = np.arctan2(x_final, z_final) |
|
|
latitude = np.arcsin(np.clip(y_final, -1, 1)) |
|
|
|
|
|
|
|
|
u = (longitude / (2 * np.pi) + 0.5) * equ_w |
|
|
v = (0.5 - latitude / np.pi) * equ_h |
|
|
|
|
|
|
|
|
u = np.clip(u, 0, equ_w - 1).astype(np.float32) |
|
|
v = np.clip(v, 0, equ_h - 1).astype(np.float32) |
|
|
|
|
|
|
|
|
perspective = cv2.remap(equirect_img, u, v, cv2.INTER_LINEAR) |
|
|
|
|
|
return perspective |
|
|
|
|
|
def estimate_depth_dpt(image_rgb, processor, model): |
|
|
""" |
|
|
Estimate depth using DPT model (OPTIMIZED) |
|
|
|
|
|
Args: |
|
|
image_rgb: RGB image (H, W, 3) |
|
|
processor: DPT processor |
|
|
model: DPT model |
|
|
|
|
|
Returns: |
|
|
depth_map: Normalized depth map (H, W) |
|
|
""" |
|
|
with torch.no_grad(): |
|
|
inputs = processor(images=image_rgb, return_tensors="pt") |
|
|
|
|
|
if torch.cuda.is_available(): |
|
|
inputs = {k: v.cuda() for k, v in inputs.items()} |
|
|
|
|
|
outputs = model(**inputs) |
|
|
predicted_depth = outputs.predicted_depth |
|
|
|
|
|
|
|
|
depth = torch.nn.functional.interpolate( |
|
|
predicted_depth.unsqueeze(1), |
|
|
size=image_rgb.shape[:2], |
|
|
mode="bicubic", |
|
|
align_corners=False, |
|
|
).squeeze() |
|
|
|
|
|
depth = depth.cpu().numpy() |
|
|
|
|
|
|
|
|
depth = (depth - depth.min()) / (depth.max() - depth.min() + 1e-8) |
|
|
|
|
|
return depth |
|
|
|
|
|
def depth_to_pointcloud(depth, color_image, fov=90, max_points=50000): |
|
|
""" |
|
|
Convert depth map to 3D point cloud (OPTIMIZED) |
|
|
|
|
|
Args: |
|
|
depth: Depth map (H, W) |
|
|
color_image: RGB image (H, W, 3) |
|
|
fov: Field of view in degrees |
|
|
max_points: Maximum number of points to keep |
|
|
|
|
|
Returns: |
|
|
points: Point cloud (N, 3) |
|
|
colors: Point colors (N, 3) |
|
|
""" |
|
|
h, w = depth.shape |
|
|
|
|
|
|
|
|
if h * w > max_points: |
|
|
scale = np.sqrt(max_points / (h * w)) |
|
|
new_h, new_w = int(h * scale), int(w * scale) |
|
|
depth = cv2.resize(depth, (new_w, new_h), interpolation=cv2.INTER_LINEAR) |
|
|
color_image = cv2.resize(color_image, (new_w, new_h), interpolation=cv2.INTER_LINEAR) |
|
|
h, w = new_h, new_w |
|
|
|
|
|
|
|
|
y, x = np.meshgrid(np.arange(h), np.arange(w), indexing='ij') |
|
|
|
|
|
|
|
|
fov_rad = np.radians(fov) |
|
|
focal = 0.5 * w / np.tan(0.5 * fov_rad) |
|
|
cx = w / 2 |
|
|
cy = h / 2 |
|
|
|
|
|
|
|
|
z = depth |
|
|
x_3d = (x - cx) * z / focal |
|
|
y_3d = (y - cy) * z / focal |
|
|
|
|
|
|
|
|
points = np.stack([x_3d.flatten(), y_3d.flatten(), z.flatten()], axis=1) |
|
|
colors = color_image.reshape(-1, 3) / 255.0 |
|
|
|
|
|
|
|
|
valid_mask = (points[:, 2] > 0.01) & (points[:, 2] < 0.99) |
|
|
points = points[valid_mask] |
|
|
colors = colors[valid_mask] |
|
|
|
|
|
return points, colors |
|
|
|
|
|
def create_point_cloud_o3d(points, colors): |
|
|
"""Create Open3D point cloud object""" |
|
|
pcd = o3d.geometry.PointCloud() |
|
|
pcd.points = o3d.utility.Vector3dVector(points) |
|
|
pcd.colors = o3d.utility.Vector3dVector(colors) |
|
|
return pcd |
|
|
|
|
|
def align_point_clouds_simple(source_pcd, target_pcd): |
|
|
""" |
|
|
Simple point cloud alignment without ICP (FASTER) |
|
|
Just uses initial transformation |
|
|
""" |
|
|
|
|
|
transformation = np.eye(4) |
|
|
return transformation |
|
|
|
|
|
def visualize_point_cloud_plotly(points, colors, max_points=10000, title="3D Reconstruction"): |
|
|
""" |
|
|
Create interactive 3D visualization using Plotly (OPTIMIZED) |
|
|
|
|
|
Args: |
|
|
points: Point cloud (N, 3) |
|
|
colors: Point colors (N, 3) |
|
|
max_points: Maximum points to display |
|
|
title: Plot title |
|
|
""" |
|
|
|
|
|
if len(points) > max_points: |
|
|
indices = np.random.choice(len(points), max_points, replace=False) |
|
|
points = points[indices] |
|
|
colors = colors[indices] |
|
|
|
|
|
|
|
|
colors_rgb = [f'rgb({int(c[0]*255)},{int(c[1]*255)},{int(c[2]*255)})' for c in colors] |
|
|
|
|
|
fig = go.Figure(data=[go.Scatter3d( |
|
|
x=points[:, 0], |
|
|
y=points[:, 1], |
|
|
z=points[:, 2], |
|
|
mode='markers', |
|
|
marker=dict( |
|
|
size=2, |
|
|
color=colors_rgb, |
|
|
), |
|
|
text=[f'Point {i}' for i in range(len(points))], |
|
|
hoverinfo='text' |
|
|
)]) |
|
|
|
|
|
fig.update_layout( |
|
|
title=title, |
|
|
scene=dict( |
|
|
xaxis_title='X', |
|
|
yaxis_title='Y', |
|
|
zaxis_title='Z', |
|
|
aspectmode='data' |
|
|
), |
|
|
height=600, |
|
|
margin=dict(l=0, r=0, b=0, t=30) |
|
|
) |
|
|
|
|
|
return fig |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def process_insta360_video(video_path, num_frames=4, num_views=4, quality='low', timeout=180): |
|
|
""" |
|
|
Complete reconstruction pipeline (OPTIMIZED FOR SPEED) |
|
|
|
|
|
Args: |
|
|
video_path: Path to 360Β° video |
|
|
num_frames: Number of frames to extract (reduced default) |
|
|
num_views: Number of views per frame (reduced default) |
|
|
quality: 'low', 'medium', or 'high' |
|
|
timeout: Maximum processing time in seconds |
|
|
|
|
|
Returns: |
|
|
Visualization, PLY file, OBJ file, status message, preview image |
|
|
""" |
|
|
start_time = time.time() |
|
|
session_id = generate_session_id() |
|
|
|
|
|
status_messages = [] |
|
|
|
|
|
def add_status(msg): |
|
|
status_messages.append(f"[{time.time()-start_time:.1f}s] {msg}") |
|
|
print(msg) |
|
|
return "\n".join(status_messages) |
|
|
|
|
|
|
|
|
def check_timeout(): |
|
|
if time.time() - start_time > timeout: |
|
|
raise TimeoutError(f"Processing exceeded {timeout}s timeout") |
|
|
|
|
|
try: |
|
|
|
|
|
add_status("π Running safety checks...") |
|
|
is_safe, safety_msg = check_video_safety(video_path) |
|
|
if not is_safe: |
|
|
return None, None, None, f"β Safety check failed: {safety_msg}", None |
|
|
add_status(f"β Safety checks passed\n{safety_msg}") |
|
|
|
|
|
check_timeout() |
|
|
|
|
|
|
|
|
add_status(f"πΉ Extracting {num_frames} frames from video...") |
|
|
frames, info = extract_frames_from_video(video_path, max_frames=num_frames) |
|
|
if frames is None: |
|
|
return None, None, None, f"β {info}", None |
|
|
|
|
|
add_status(f"β Extracted {info['extracted_frames']} frames from {info['duration']:.1f}s video") |
|
|
|
|
|
|
|
|
preview_img = Image.fromarray(frames[0]) |
|
|
|
|
|
check_timeout() |
|
|
|
|
|
|
|
|
if quality == 'low': |
|
|
view_size = 256 |
|
|
voxel_size = 0.05 |
|
|
elif quality == 'medium': |
|
|
view_size = 320 |
|
|
voxel_size = 0.03 |
|
|
else: |
|
|
view_size = 384 |
|
|
voxel_size = 0.02 |
|
|
|
|
|
|
|
|
add_status(f"π Processing {num_frames} frames Γ {num_views} views = {num_frames * num_views} total depth maps...") |
|
|
add_status(f"βοΈ Quality: {quality} ({view_size}px per view)") |
|
|
|
|
|
all_points = [] |
|
|
all_colors = [] |
|
|
|
|
|
|
|
|
if num_views == 4: |
|
|
angles = [(0, 0), (90, 0), (180, 0), (270, 0)] |
|
|
elif num_views == 6: |
|
|
angles = [(0, 0), (90, 0), (180, 0), (270, 0), (0, 30), (0, -30)] |
|
|
else: |
|
|
angles = [(0, 0), (45, 0), (90, 0), (135, 0), (180, 0), (225, 0), (270, 0), (315, 0)] |
|
|
|
|
|
for frame_idx, frame in enumerate(frames): |
|
|
check_timeout() |
|
|
|
|
|
add_status(f" Frame {frame_idx+1}/{len(frames)}...") |
|
|
|
|
|
for view_idx, (theta, phi) in enumerate(angles): |
|
|
check_timeout() |
|
|
|
|
|
|
|
|
perspective = equirectangular_to_perspective( |
|
|
frame, fov=90, theta=theta, phi=phi, |
|
|
height=view_size, width=view_size |
|
|
) |
|
|
|
|
|
|
|
|
depth = estimate_depth_dpt(perspective, dpt_processor, dpt_model) |
|
|
|
|
|
|
|
|
points, colors = depth_to_pointcloud(depth, perspective, fov=90, max_points=30000) |
|
|
|
|
|
|
|
|
theta_rad = np.radians(theta) |
|
|
phi_rad = np.radians(phi) |
|
|
|
|
|
|
|
|
R_y = np.array([ |
|
|
[np.cos(theta_rad), 0, np.sin(theta_rad)], |
|
|
[0, 1, 0], |
|
|
[-np.sin(theta_rad), 0, np.cos(theta_rad)] |
|
|
]) |
|
|
|
|
|
R_x = np.array([ |
|
|
[1, 0, 0], |
|
|
[0, np.cos(phi_rad), -np.sin(phi_rad)], |
|
|
[0, np.sin(phi_rad), np.cos(phi_rad)] |
|
|
]) |
|
|
|
|
|
R_total = R_y @ R_x |
|
|
points = points @ R_total.T |
|
|
|
|
|
|
|
|
points[:, 2] += frame_idx * 0.5 |
|
|
|
|
|
all_points.append(points) |
|
|
all_colors.append(colors) |
|
|
|
|
|
check_timeout() |
|
|
|
|
|
|
|
|
add_status(f"π Merging {len(all_points)} point clouds...") |
|
|
merged_points = np.vstack(all_points) |
|
|
merged_colors = np.vstack(all_colors) |
|
|
|
|
|
add_status(f"β Total points before filtering: {len(merged_points):,}") |
|
|
|
|
|
check_timeout() |
|
|
|
|
|
|
|
|
add_status(f"π§Ή Downsampling with voxel size {voxel_size}...") |
|
|
pcd = create_point_cloud_o3d(merged_points, merged_colors) |
|
|
pcd = pcd.voxel_down_sample(voxel_size=voxel_size) |
|
|
|
|
|
|
|
|
if time.time() - start_time < timeout - 30: |
|
|
add_status("π§Ή Removing outliers...") |
|
|
pcd, _ = pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0) |
|
|
|
|
|
final_points = np.asarray(pcd.points) |
|
|
final_colors = np.asarray(pcd.colors) |
|
|
|
|
|
add_status(f"β Final point cloud: {len(final_points):,} points") |
|
|
|
|
|
check_timeout() |
|
|
|
|
|
|
|
|
add_status("π Creating 3D visualization...") |
|
|
fig = visualize_point_cloud_plotly(final_points, final_colors, max_points=15000, |
|
|
title=f"3D Reconstruction ({len(final_points):,} points)") |
|
|
|
|
|
check_timeout() |
|
|
|
|
|
|
|
|
add_status("πΎ Exporting PLY file...") |
|
|
ply_path = tempfile.mktemp(suffix='.ply') |
|
|
o3d.io.write_point_cloud(ply_path, pcd) |
|
|
|
|
|
|
|
|
obj_path = None |
|
|
if time.time() - start_time < timeout - 20: |
|
|
add_status("πΎ Generating mesh (Poisson)...") |
|
|
try: |
|
|
mesh, densities = pcd.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.1, max_nn=30)) |
|
|
mesh, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(pcd, depth=8) |
|
|
|
|
|
obj_path = tempfile.mktemp(suffix='.obj') |
|
|
o3d.io.write_triangle_mesh(obj_path, mesh) |
|
|
add_status("β OBJ mesh exported") |
|
|
except Exception as e: |
|
|
add_status(f"β οΈ Mesh generation skipped: {str(e)}") |
|
|
else: |
|
|
add_status("β οΈ Mesh generation skipped due to time limit") |
|
|
|
|
|
|
|
|
elapsed = time.time() - start_time |
|
|
add_status(f"\nπ SUCCESS! Processing completed in {elapsed:.1f}s") |
|
|
add_status(f"π Final Stats:") |
|
|
add_status(f" β’ Frames processed: {len(frames)}") |
|
|
add_status(f" β’ Views per frame: {num_views}") |
|
|
add_status(f" β’ Total depth maps: {len(frames) * num_views}") |
|
|
add_status(f" β’ Final points: {len(final_points):,}") |
|
|
|
|
|
return fig, ply_path, obj_path, "\n".join(status_messages), preview_img |
|
|
|
|
|
except TimeoutError as e: |
|
|
return None, None, None, f"β±οΈ TIMEOUT: {str(e)}\n\nTry reducing:\nβ’ Number of frames\nβ’ Number of views\nβ’ Quality setting", None |
|
|
except Exception as e: |
|
|
import traceback |
|
|
error_msg = f"β ERROR: {str(e)}\n\n{traceback.format_exc()}" |
|
|
return None, None, None, error_msg, None |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def create_interface(): |
|
|
"""Create Gradio interface""" |
|
|
|
|
|
with gr.Blocks(title="Insta360 3D Reconstruction (Optimized)", theme=gr.themes.Soft()) as demo: |
|
|
|
|
|
gr.Markdown("# π Insta360 Complete 3D Reconstruction (OPTIMIZED)") |
|
|
gr.Markdown("### Transform 360Β° Videos into Full 3D Point Clouds and Meshes") |
|
|
gr.Markdown("**Optimized Version**: Faster processing with timeout handling") |
|
|
|
|
|
with gr.Tab("π₯ Reconstruction"): |
|
|
gr.Markdown(RESPONSIBLE_AI_NOTICE) |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
consent_checkbox = gr.Checkbox( |
|
|
label="β
I have read and agree to the Responsible Use Guidelines", |
|
|
value=False |
|
|
) |
|
|
video_input = gr.Video( |
|
|
label="Upload 360Β° Video", |
|
|
height=300 |
|
|
) |
|
|
|
|
|
with gr.Accordion("βοΈ Settings (OPTIMIZED)", open=True): |
|
|
num_frames = gr.Slider( |
|
|
minimum=2, maximum=8, value=4, step=2, |
|
|
label="Number of Frames (reduced for speed)" |
|
|
) |
|
|
num_views = gr.Slider( |
|
|
minimum=4, maximum=8, value=4, step=2, |
|
|
label="Views per Frame (reduced for speed)" |
|
|
) |
|
|
quality = gr.Radio( |
|
|
choices=['low', 'medium', 'high'], |
|
|
value='low', |
|
|
label="Reconstruction Quality (start with 'low')" |
|
|
) |
|
|
timeout_slider = gr.Slider( |
|
|
minimum=60, maximum=600, value=180, step=30, |
|
|
label="Max Processing Time (seconds)" |
|
|
) |
|
|
|
|
|
reconstruct_btn = gr.Button("π Start Reconstruction", variant="primary", size="lg") |
|
|
|
|
|
with gr.Column(scale=1): |
|
|
status_output = gr.Textbox(label="Status", lines=15) |
|
|
preview_output = gr.Image(label="Video Preview") |
|
|
|
|
|
with gr.Row(): |
|
|
visualization_output = gr.Plot(label="3D Visualization") |
|
|
|
|
|
with gr.Row(): |
|
|
ply_output = gr.File(label="π¦ Download Point Cloud (.ply)") |
|
|
obj_output = gr.File(label="π¦ Download Mesh (.obj)") |
|
|
|
|
|
def check_and_process(video, consent, frames, views, qual, timeout): |
|
|
if not consent: |
|
|
return None, None, None, "β Please agree to the Responsible Use Guidelines first.", None |
|
|
if video is None: |
|
|
return None, None, None, "β Please upload a video first.", None |
|
|
return process_insta360_video(video, frames, views, qual, timeout) |
|
|
|
|
|
reconstruct_btn.click( |
|
|
fn=check_and_process, |
|
|
inputs=[video_input, consent_checkbox, num_frames, num_views, quality, timeout_slider], |
|
|
outputs=[visualization_output, ply_output, obj_output, status_output, preview_output] |
|
|
) |
|
|
|
|
|
with gr.Tab("π Optimization Guide"): |
|
|
gr.Markdown(""" |
|
|
## How to Avoid Timeouts |
|
|
|
|
|
### Quick Start (Fast Processing) |
|
|
- **Frames**: 2-4 |
|
|
- **Views**: 4 |
|
|
- **Quality**: Low |
|
|
- **Expected time**: 30-60 seconds |
|
|
|
|
|
### Balanced (Medium Processing) |
|
|
- **Frames**: 4-6 |
|
|
- **Views**: 6 |
|
|
- **Quality**: Medium |
|
|
- **Expected time**: 1-2 minutes |
|
|
|
|
|
### Best Quality (Slow Processing) |
|
|
- **Frames**: 6-8 |
|
|
- **Views**: 8 |
|
|
- **Quality**: High |
|
|
- **Expected time**: 3-5 minutes |
|
|
|
|
|
### Key Optimizations |
|
|
|
|
|
1. **Reduced Defaults**: Default settings are now much faster |
|
|
2. **Timeout Handling**: Processing stops gracefully if time limit exceeded |
|
|
3. **No ICP Alignment**: Removed slow alignment algorithm |
|
|
4. **Downsampling**: Automatic point reduction for large scenes |
|
|
5. **Conditional Mesh**: Mesh generation skipped if running out of time |
|
|
|
|
|
### Tips for Success |
|
|
|
|
|
β
**Start with low settings** and increase gradually |
|
|
β
**Use shorter videos** (<30 seconds works best) |
|
|
β
**Increase timeout** if you have time to wait |
|
|
β
**GPU helps** if available (automatic detection) |
|
|
|
|
|
β **Don't start with max settings** - will timeout |
|
|
β **Don't use very long videos** - extract clips first |
|
|
β **Don't expect instant results** - 3D reconstruction is complex |
|
|
|
|
|
### Understanding the Process |
|
|
|
|
|
- Each frame Γ view combination requires one depth estimation |
|
|
- 4 frames Γ 4 views = 16 depth estimations (fast) |
|
|
- 8 frames Γ 8 views = 64 depth estimations (slow) |
|
|
|
|
|
The more frames and views, the better quality but longer processing time. |
|
|
""") |
|
|
|
|
|
with gr.Tab("π Ethics & Privacy"): |
|
|
gr.Markdown(""" |
|
|
## Ethical Considerations for 360Β° Reconstruction |
|
|
|
|
|
### Enhanced Privacy Concerns |
|
|
360Β° videos capture significantly more information than standard videos: |
|
|
- **Full sphere visibility**: Everything around the camera is recorded |
|
|
- **Bystander capture**: People may be recorded unintentionally |
|
|
- **Private spaces**: Entire rooms and spaces are documented |
|
|
|
|
|
### Your Responsibilities |
|
|
|
|
|
1. **Obtain Consent** |
|
|
- Get explicit permission from everyone visible in the video |
|
|
- Inform people that 3D reconstruction will be performed |
|
|
- Consider privacy implications of complete spatial capture |
|
|
|
|
|
2. **Respect Private Property** |
|
|
- Only record spaces you have permission to document |
|
|
- Be aware of intellectual property in architectural designs |
|
|
- Don't reconstruct commercial spaces without authorization |
|
|
|
|
|
3. **Data Security** |
|
|
- 3D models can reveal sensitive spatial information |
|
|
- Store reconstructions securely |
|
|
- Be cautious about sharing 3D models publicly |
|
|
|
|
|
4. **Prohibited Uses** |
|
|
- Surveillance or monitoring without consent |
|
|
- Creating unauthorized digital twins of spaces |
|
|
- Bypassing security through spatial understanding |
|
|
- Any deceptive or manipulative applications |
|
|
|
|
|
### Transparency |
|
|
|
|
|
This tool processes all data locally. No videos or reconstructions are stored on external servers. |
|
|
You maintain full ownership and control of your data. |
|
|
""") |
|
|
|
|
|
return demo |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
print("="*60) |
|
|
print("INSTA360 3D RECONSTRUCTION (OPTIMIZED)") |
|
|
print("="*60) |
|
|
print("β Faster processing with reduced defaults") |
|
|
print("β Timeout handling") |
|
|
print("β Progress tracking") |
|
|
print("β Graceful degradation") |
|
|
print("="*60) |
|
|
|
|
|
demo = create_interface() |
|
|
demo.launch(share=True) |