Spaces:

Tohru127
/

3d-insta360

Sleeping

App Files Files Community

Tohru127 commited on Nov 4, 2025

Commit

081a183

verified ·

1 Parent(s): ab6e931

Create app.py

Browse files

Files changed (1) hide show

app.py +210 -0

app.py ADDED Viewed

	@@ -0,0 +1,210 @@

+"""
+Hugging Face Space Application for Insta360 3D Reconstruction
+"""
+import gradio as gr
+import torch
+import cv2
+import numpy as np
+from PIL import Image
+import os
+import tempfile
+from pathlib import Path
+from tqdm import tqdm
+from transformers import pipeline
+import zipfile
+import shutil
+class Insta360Reconstructor:
+    def __init__(self):
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        print(f"Initializing on device: {self.device}")
+        # Load depth estimation model
+        self.depth_estimator = pipeline(
+            "depth-estimation",
+            model="depth-anything/Depth-Anything-V2-Large-hf",
+            device=0 if self.device == "cuda" else -1
+        )
+    def process_video(self, video_path, sample_rate=30, max_frames=100):
+        """Process video and return depth maps and point cloud"""
+        # Create temporary directories
+        temp_dir = tempfile.mkdtemp()
+        frames_dir = os.path.join(temp_dir, "frames")
+        depth_dir = os.path.join(temp_dir, "depth")
+        os.makedirs(frames_dir, exist_ok=True)
+        os.makedirs(depth_dir, exist_ok=True)
+        # Extract frames
+        cap = cv2.VideoCapture(video_path)
+        fps = cap.get(cv2.CAP_PROP_FPS)
+        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        frame_paths = []
+        frame_count = 0
+        saved_count = 0
+        print(f"Extracting frames (every {sample_rate} frames)...")
+        while cap.isOpened() and saved_count < max_frames:
+            ret, frame = cap.read()
+            if not ret:
+                break
+            if frame_count % sample_rate == 0:
+                frame_path = os.path.join(frames_dir, f"frame_{saved_count:04d}.jpg")
+                cv2.imwrite(frame_path, frame)
+                frame_paths.append(frame_path)
+                saved_count += 1
+            frame_count += 1
+        cap.release()
+        # Process depth estimation
+        print(f"Processing {len(frame_paths)} frames for depth estimation...")
+        depth_outputs = []
+        sample_images = []
+        for i, frame_path in enumerate(frame_paths):
+            # Load image
+            image = Image.open(frame_path)
+            # Estimate depth
+            depth_result = self.depth_estimator(image)
+            depth_map = depth_result["depth"]
+            # Save depth visualization
+            depth_vis_path = os.path.join(depth_dir, f"depth_{i:04d}.jpg")
+            depth_map.save(depth_vis_path)
+            # Collect samples for display (first 9)
+            if i < 9:
+                sample_images.append(depth_vis_path)
+            # Save depth as numpy array
+            depth_npy_path = os.path.join(depth_dir, f"depth_{i:04d}.npy")
+            np.save(depth_npy_path, np.array(depth_map))
+            depth_outputs.append(depth_npy_path)
+            # Clear cache periodically
+            if i % 10 == 0 and self.device == "cuda":
+                torch.cuda.empty_cache()
+        # Create ZIP file with all outputs
+        zip_path = os.path.join(temp_dir, "reconstruction_output.zip")
+        with zipfile.ZipFile(zip_path, 'w') as zipf:
+            # Add frames
+            for frame_path in frame_paths:
+                zipf.write(frame_path, os.path.join("frames", os.path.basename(frame_path)))
+            # Add depth maps
+            for depth_path in Path(depth_dir).glob("*.jpg"):
+                zipf.write(depth_path, os.path.join("depth_maps", depth_path.name))
+            for depth_path in Path(depth_dir).glob("*.npy"):
+                zipf.write(depth_path, os.path.join("depth_arrays", depth_path.name))
+        return sample_images, zip_path, f"Processed {len(frame_paths)} frames successfully!"
+# Initialize reconstructor
+reconstructor = Insta360Reconstructor()
+def process_video_interface(video, sample_rate, max_frames, progress=gr.Progress()):
+    """Gradio interface function"""
+    if video is None:
+        return None, None, "Please upload a video file"
+    progress(0, desc="Starting processing...")
+    try:
+        # Process video
+        sample_images, zip_path, status_msg = reconstructor.process_video(
+            video,
+            sample_rate=int(sample_rate),
+            max_frames=int(max_frames)
+        )
+        progress(1.0, desc="Complete!")
+        return sample_images, zip_path, status_msg
+    except Exception as e:
+        return None, None, f"Error: {str(e)}"
+# Create Gradio interface
+with gr.Blocks(title="Insta360 3D Reconstruction") as demo:
+    gr.Markdown("""
+    # 🎥 Insta360 Video 3D Reconstruction
+    Upload your Insta360 outdoor video for depth estimation and 3D reconstruction.
+    **Note:** For large videos (7+ GB), processing may take significant time.
+    Adjust sample rate and max frames to control processing time.
+    ### Instructions:
+    1. Upload your Insta360 video
+    2. Set sample rate (higher = faster but fewer frames)
+    3. Set max frames to process (fewer = faster)
+    4. Click "Process Video"
+    5. Download the ZIP file with all outputs
+    """)
+    with gr.Row():
+        with gr.Column():
+            video_input = gr.Video(label="Upload Insta360 Video")
+            sample_rate = gr.Slider(
+                minimum=1,
+                maximum=120,
+                value=30,
+                step=1,
+                label="Sample Rate (process every N frames)",
+                info="Higher values = faster processing but fewer frames"
+            )
+            max_frames = gr.Slider(
+                minimum=10,
+                maximum=500,
+                value=100,
+                step=10,
+                label="Maximum Frames to Process",
+                info="Limit total frames for faster processing"
+            )
+            process_btn = gr.Button("🚀 Process Video", variant="primary")
+        with gr.Column():
+            status_output = gr.Textbox(label="Status", lines=2)
+            download_output = gr.File(label="Download Results (ZIP)")
+    gallery_output = gr.Gallery(
+        label="Sample Depth Maps (first 9 frames)",
+        columns=3,
+        rows=3,
+        height="auto"
+    )
+    process_btn.click(
+        fn=process_video_interface,
+        inputs=[video_input, sample_rate, max_frames],
+        outputs=[gallery_output, download_output, status_output]
+    )
+    gr.Markdown("""
+    ### Output Contents:
+    - **frames/**: Extracted RGB frames
+    - **depth_maps/**: Visualized depth maps (JPG)
+    - **depth_arrays/**: Raw depth data (NumPy arrays)
+    ### Tips for Large Videos:
+    - Start with sample_rate=60 and max_frames=50 for testing
+    - Gradually increase for full processing
+    - Each frame takes ~2-5 seconds to process
+    """)
+if __name__ == "__main__":
+    demo.launch()