Spaces:

Tohru127
/

3d-360

Sleeping

App Files Files Community

3d-360 / app.py

Tohru127

Update app.py

a6f791b verified 6 months ago

raw

history blame contribute delete

19.7 kB

	"""
	360° Video Frame Extraction + 3D Reconstruction for Outdoor Scenes
	Robust version with better error handling
	"""

	import gradio as gr
	import numpy as np
	import cv2
	from PIL import Image
	import tempfile
	import zipfile
	import os
	from datetime import datetime
	import torch
	from transformers import DPTForDepthEstimation, DPTImageProcessor
	import open3d as o3d
	import plotly.graph_objects as go
	import warnings
	import traceback
	warnings.filterwarnings('ignore')

	# ============================================================================
	# MODEL LOADING
	# ============================================================================

	print("Loading depth estimation model...")
	MODEL_LOADED = False
	try:
	dpt_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
	dpt_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
	if torch.cuda.is_available():
	dpt_model = dpt_model.cuda()
	print("✓ Using GPU")
	dpt_model.eval()
	MODEL_LOADED = True
	print("✓ Model loaded!")
	except Exception as e:
	print(f"⚠️ Model loading failed: {e}")
	dpt_processor = None
	dpt_model = None

	# ============================================================================
	# FRAME EXTRACTION
	# ============================================================================

	def extract_frames_from_360_video(video_path, frame_step=30, max_frames=150):
	"""Extract frames from 360° video"""
	try:
	if not os.path.exists(video_path):
	return [], None, 0, 0, f"Error: Video file not found at {video_path}"

	cap = cv2.VideoCapture(video_path)

	if not cap.isOpened():
	return [], None, 0, 0, "Error: Could not open video file. Check format (MP4 recommended)"

	fps = cap.get(cv2.CAP_PROP_FPS)
	total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

	if fps == 0 or total_frames == 0:
	cap.release()
	return [], None, 0, 0, "Error: Invalid video file"

	frames_dir = tempfile.mkdtemp()
	extracted_frames = []
	frame_count = 0
	saved_count = 0

	while cap.isOpened() and saved_count < max_frames:
	ret, frame = cap.read()

	if not ret:
	break

	if frame_count % frame_step == 0:
	frame_filename = os.path.join(frames_dir, f"frame_{saved_count:04d}.jpg")
	success = cv2.imwrite(frame_filename, frame, [cv2.IMWRITE_JPEG_QUALITY, 95])
	if success:
	extracted_frames.append(frame_filename)
	saved_count += 1

	frame_count += 1

	cap.release()

	if len(extracted_frames) == 0:
	return [], None, fps, total_frames, "Error: No frames could be extracted"

	return extracted_frames, frames_dir, fps, total_frames, "Success"

	except Exception as e:
	return [], None, 0, 0, f"Error during extraction: {str(e)}"

	# ============================================================================
	# 3D RECONSTRUCTION
	# ============================================================================

	def estimate_depth(image, processor, model):
	"""Estimate depth for a single image"""
	try:
	inputs = processor(images=image, return_tensors="pt")

	if torch.cuda.is_available():
	inputs = {k: v.cuda() for k, v in inputs.items()}

	with torch.no_grad():
	outputs = model(**inputs)
	predicted_depth = outputs.predicted_depth

	prediction = torch.nn.functional.interpolate(
	predicted_depth.unsqueeze(1),
	size=image.shape[:2],
	mode="bicubic",
	align_corners=False,
	)

	depth = prediction.squeeze().cpu().numpy()
	depth = (depth - depth.min()) / (depth.max() - depth.min())

	return depth
	except Exception as e:
	print(f"Depth estimation error: {e}")
	return None

	def depth_to_point_cloud(image, depth):
	"""Convert depth map to 3D point cloud"""
	h, w = depth.shape

	x = np.linspace(0, w-1, w)
	y = np.linspace(0, h-1, h)
	xv, yv = np.meshgrid(x, y)

	x_flat = xv.flatten()
	y_flat = yv.flatten()
	z_flat = depth.flatten()

	points = np.stack([x_flat, y_flat, z_flat], axis=-1)

	if len(image.shape) == 3:
	colors = image.reshape(-1, 3) / 255.0
	else:
	colors = np.stack([image.flatten()/255.0]*3, axis=-1)

	return points, colors

	def create_3d_model(frames, max_frames_for_3d=5):
	"""Create 3D model from extracted frames"""
	if not MODEL_LOADED or dpt_model is None or dpt_processor is None:
	return None, None, "❌ Depth model not loaded. Use Quick Mode instead."

	try:
	all_points = []
	all_colors = []

	frames_to_process = frames[:max_frames_for_3d]

	for idx, frame_path in enumerate(frames_to_process):
	print(f"Processing frame {idx+1}/{len(frames_to_process)}...")

	if not os.path.exists(frame_path):
	continue

	img = cv2.imread(frame_path)
	if img is None:
	continue

	img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
	img_small = cv2.resize(img_rgb, (512, 256))

	depth = estimate_depth(img_small, dpt_processor, dpt_model)
	if depth is None:
	continue

	points, colors = depth_to_point_cloud(img_small, depth)
	points[:, 0] += idx * 600

	all_points.append(points)
	all_colors.append(colors)

	if len(all_points) == 0:
	return None, None, "❌ No frames could be processed"

	final_points = np.vstack(all_points)
	final_colors = np.vstack(all_colors)

	# Downsample
	if len(final_points) > 100000:
	indices = np.random.choice(len(final_points), 100000, replace=False)
	final_points = final_points[indices]
	final_colors = final_colors[indices]

	# Create visualization
	fig = go.Figure(data=[go.Scatter3d(
	x=final_points[:, 0],
	y=final_points[:, 1],
	z=final_points[:, 2],
	mode='markers',
	marker=dict(size=1, color=final_colors, opacity=0.8)
	)])

	fig.update_layout(
	title="3D Reconstruction",
	scene=dict(xaxis_title="X", yaxis_title="Y", zaxis_title="Depth"),
	width=800,
	height=600
	)

	# Save PLY
	ply_path = os.path.join(tempfile.gettempdir(), f"3d_model_{datetime.now().strftime('%Y%m%d_%H%M%S')}.ply")
	pcd = o3d.geometry.PointCloud()
	pcd.points = o3d.utility.Vector3dVector(final_points)
	pcd.colors = o3d.utility.Vector3dVector(final_colors)
	o3d.io.write_point_cloud(ply_path, pcd)

	return fig, ply_path, f"✅ Created {len(final_points):,} points"

	except Exception as e:
	return None, None, f"❌ 3D creation error: {str(e)}"

	# ============================================================================
	# PACKAGE CREATION
	# ============================================================================

	def create_download_package(frames_dir, video_info):
	"""Create ZIP with frames"""
	try:
	zip_path = os.path.join(tempfile.gettempdir(), f"360_frames_{datetime.now().strftime('%Y%m%d_%H%M%S')}.zip")

	readme_content = f"""360° OUTDOOR PHOTOGRAMMETRY PACKAGE
	Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}

	VIDEO INFO:
	- FPS: {video_info['fps']:.2f}
	- Extracted Frames: {video_info['extracted_frames']}
	- Interval: ~{video_info['frame_step']/video_info['fps']:.2f}s

	METASHAPE WORKFLOW:
	1. Import Photos
	2. Set Camera Type to "Spherical"
	3. Align Photos (High accuracy, Sequential)
	4. Build Dense Cloud
	5. Build Mesh
	6. Build Texture

	SOFTWARE: Agisoft Metashape ($179)
	Good luck! 🌍📸
	"""

	with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
	readme_path = os.path.join(tempfile.gettempdir(), "README.txt")
	with open(readme_path, 'w') as f:
	f.write(readme_content)
	zipf.write(readme_path, "README.txt")

	for frame_file in os.listdir(frames_dir):
	if frame_file.endswith('.jpg'):
	frame_path = os.path.join(frames_dir, frame_file)
	if os.path.exists(frame_path):
	zipf.write(frame_path, f"frames/{frame_file}")

	return zip_path
	except Exception as e:
	print(f"ZIP creation error: {e}")
	return None

	# ============================================================================
	# MAIN PROCESSING FUNCTIONS
	# ============================================================================

	def process_video_frames_only(video_file, frame_interval_seconds, max_frames):
	"""Quick frame extraction only"""
	try:
	print(f"Starting frame extraction. Video: {video_file}")

	if video_file is None:
	return None, "⚠️ Please upload a video file", None

	# Check file exists and size
	if not os.path.exists(video_file):
	return None, f"❌ Video file not found: {video_file}", None

	file_size = os.path.getsize(video_file) / (1024*1024) # MB
	print(f"Video file size: {file_size:.2f} MB")

	if file_size > 1000:
	return None, f"❌ Video too large ({file_size:.0f}MB). Max 1GB. Please compress the video.", None

	status = f"📹 Processing video ({file_size:.1f}MB)...\n\n"

	# Get video info
	cap = cv2.VideoCapture(video_file)
	if not cap.isOpened():
	return None, status + "❌ Could not open video. Try MP4 format.", None

	fps = cap.get(cv2.CAP_PROP_FPS)
	total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
	duration = total_frames / fps if fps > 0 else 0
	cap.release()

	if fps == 0:
	return None, status + "❌ Invalid video file", None

	status += f"✓ Video: {duration:.1f}s, {fps:.1f} FPS, {total_frames} frames\n\n"

	frame_step = max(1, int(fps * frame_interval_seconds))
	estimated_frames = min(max_frames, total_frames // frame_step)

	status += f"⚙️ Extracting ~{estimated_frames} frames...\n"
	status += f" (every {frame_step} frames = ~{frame_interval_seconds}s interval)\n\n"

	# Extract frames
	extracted_frames, frames_dir, video_fps, _, extract_status = extract_frames_from_360_video(
	video_file, frame_step=frame_step, max_frames=max_frames
	)

	if extract_status != "Success":
	return None, status + f"❌ {extract_status}", None

	status += f"✓ Extracted {len(extracted_frames)} frames\n\n"

	# Create preview
	first_frame = cv2.imread(extracted_frames[0])
	first_frame_rgb = cv2.cvtColor(first_frame, cv2.COLOR_BGR2RGB)
	preview_img = Image.fromarray(first_frame_rgb)

	# Create ZIP
	status += "📦 Creating download package...\n"

	video_info = {
	'fps': video_fps,
	'total_frames': total_frames,
	'extracted_frames': len(extracted_frames),
	'frame_step': frame_step
	}

	zip_path = create_download_package(frames_dir, video_info)

	if zip_path is None:
	return preview_img, status + "❌ Could not create ZIP", None

	zip_size = os.path.getsize(zip_path) / (1024*1024)

	result = f"""✅ SUCCESS!

	📊 Summary:
	• Extracted: {len(extracted_frames)} frames
	• Interval: ~{frame_interval_seconds}s
	• ZIP size: {zip_size:.1f}MB

	📦 Download ZIP below
	🎯 Import to Metashape for 3D model

	Next: Use Agisoft Metashape ($179) to create professional 3D model
	"""

	return preview_img, status + result, zip_path

	except Exception as e:
	error_trace = traceback.format_exc()
	return None, f"❌ ERROR:\n{str(e)}\n\n{error_trace}", None

	def process_video_with_3d(video_file, frame_interval_seconds, max_frames, max_frames_3d):
	"""Extract frames AND create 3D model"""
	try:
	print(f"Starting full 3D processing. Video: {video_file}")

	if video_file is None:
	return None, "⚠️ Please upload a video file", None, None, None

	if not MODEL_LOADED:
	return None, "❌ 3D model not loaded. Use Quick Mode instead.", None, None, None

	if not os.path.exists(video_file):
	return None, f"❌ Video file not found: {video_file}", None, None, None

	file_size = os.path.getsize(video_file) / (1024*1024)

	if file_size > 500:
	return None, f"❌ Video too large for 3D mode ({file_size:.0f}MB). Max 500MB. Use Quick Mode or compress video.", None, None, None

	status = f"📹 Full 3D Processing ({file_size:.1f}MB)...\n\n"

	# Extract frames first
	cap = cv2.VideoCapture(video_file)
	fps = cap.get(cv2.CAP_PROP_FPS)
	total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
	cap.release()

	frame_step = max(1, int(fps * frame_interval_seconds))

	status += f"⚙️ Step 1/3: Extracting frames...\n"

	extracted_frames, frames_dir, video_fps, _, extract_status = extract_frames_from_360_video(
	video_file, frame_step=frame_step, max_frames=max_frames
	)

	if extract_status != "Success":
	return None, status + f"❌ {extract_status}", None, None, None

	status += f"✓ Extracted {len(extracted_frames)} frames\n\n"

	# Preview
	first_frame = cv2.imread(extracted_frames[0])
	first_frame_rgb = cv2.cvtColor(first_frame, cv2.COLOR_BGR2RGB)
	preview_img = Image.fromarray(first_frame_rgb)

	# Create 3D
	status += f"⚙️ Step 2/3: Creating 3D model (using {min(max_frames_3d, len(extracted_frames))} frames)...\n"
	status += "This may take 5-10 minutes...\n\n"

	fig, ply_path, model_status = create_3d_model(extracted_frames, max_frames_3d)

	status += f"{model_status}\n\n"

	# Create ZIP
	status += f"⚙️ Step 3/3: Creating frame package...\n"

	video_info = {
	'fps': video_fps,
	'total_frames': total_frames,
	'extracted_frames': len(extracted_frames),
	'frame_step': frame_step
	}

	zip_path = create_download_package(frames_dir, video_info)

	result = f"""✅ COMPLETE!

	📊 Results:
	• Frames: {len(extracted_frames)}
	• 3D points: {model_status}

	📦 Downloads:
	• ZIP: Frames for Metashape
	• PLY: 3D point cloud

	Note: This is a basic preview. Use Metashape for professional quality!
	"""

	return preview_img, status + result, zip_path, fig, ply_path

	except Exception as e:
	error_trace = traceback.format_exc()
	return None, f"❌ ERROR:\n{str(e)}\n\n{error_trace}", None, None, None

	# ============================================================================
	# INTERFACE
	# ============================================================================

	with gr.Blocks(title="360° Outdoor Photogrammetry + 3D") as demo:

	gr.Markdown("# 🌍 360° Video: Frame Extraction + 3D Reconstruction")
	gr.Markdown("Two modes: Quick frames (30s) OR Full 3D (5-10min)")
	gr.Markdown("⚠️ Max file size: Quick Mode: 1GB \| Full 3D: 500MB \| 8-minute videos OK!")

	with gr.Tabs():
	with gr.Tab("🚀 Quick - Frames Only (RECOMMENDED)"):
	gr.Markdown("""
	### Fast & Free!
	- Extract frames in 30-60 seconds
	- Works on FREE tier
	- Best for professional Metashape workflow
	""")

	with gr.Row():
	with gr.Column():
	video1 = gr.Video(label="Upload 360° Video (MP4 recommended, max 1GB - 8 min videos OK!)")
	interval1 = gr.Slider(0.5, 5.0, 2.0, step=0.5, label="Frame Interval (seconds) - 2s good for 8min videos")
	max_frames1 = gr.Slider(20, 500, 150, step=10, label="Max Frames - 150-200 good for 8min")
	btn1 = gr.Button("🎬 Extract Frames", variant="primary", size="lg")

	with gr.Column():
	status1 = gr.Textbox(label="Status", lines=15)
	preview1 = gr.Image(label="Preview (First Frame)")

	download1 = gr.File(label="📦 Download Frames (ZIP)")

	btn1.click(
	fn=process_video_frames_only,
	inputs=[video1, interval1, max_frames1],
	outputs=[preview1, status1, download1]
	)

	with gr.Tab("🎨 Full - Frames + 3D (SLOW, NEEDS GPU)"):
	gr.Markdown("""
	### Creates 3D Preview
	- Takes 5-10 minutes
	- Requires GPU upgrade ($0.60/hour)
	- Basic quality (Metashape is better!)
	""")

	with gr.Row():
	with gr.Column():
	video2 = gr.Video(label="Upload 360° Video (MP4, max 500MB - compress long videos)")
	interval2 = gr.Slider(0.5, 5.0, 2.0, step=0.5, label="Frame Interval (seconds)")
	max_frames2 = gr.Slider(20, 100, 30, step=10, label="Max Frames")
	max_3d = gr.Slider(2, 8, 4, step=1, label="Frames for 3D (fewer = faster)")
	btn2 = gr.Button("🎨 Extract + Create 3D", variant="primary")

	with gr.Column():
	status2 = gr.Textbox(label="Status", lines=15)
	preview2 = gr.Image(label="Preview")

	with gr.Row():
	viz = gr.Plot(label="3D Visualization")

	with gr.Row():
	download2 = gr.File(label="📦 Frames (ZIP)")
	ply_download = gr.File(label="📦 3D Model (PLY)")

	btn2.click(
	fn=process_video_with_3d,
	inputs=[video2, interval2, max_frames2, max_3d],
	outputs=[preview2, status2, download2, viz, ply_download]
	)

	gr.Markdown("""
	---
	### 💡 Tips for 8-Minute Videos:
	- Quick Mode - Handles up to 1GB (8 min at 5K: ~400-600MB)
	- Frame interval: 2-3 seconds - Gets 160-240 frames from 8 min
	- Use MP4 format - Best compatibility
	- If over 1GB - Compress with HandBrake (target 5-8 Mbps)
	- For best 3D quality - Use Metashape with extracted frames

	### 📐 Expected Frames from 8-Min Video:
	- 1s interval: ~480 frames (very dense, slow processing)
	- 2s interval: ~240 frames (recommended for outdoor)
	- 3s interval: ~160 frames (good for large landscapes)

	Made for outdoor photogrammetry! 🏔️
	""")

	if __name__ == "__main__":
	demo.launch()