Tohru127 commited on
Commit
081a183
·
verified ·
1 Parent(s): ab6e931

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +210 -0
app.py ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Hugging Face Space Application for Insta360 3D Reconstruction
3
+ """
4
+
5
+ import gradio as gr
6
+ import torch
7
+ import cv2
8
+ import numpy as np
9
+ from PIL import Image
10
+ import os
11
+ import tempfile
12
+ from pathlib import Path
13
+ from tqdm import tqdm
14
+ from transformers import pipeline
15
+ import zipfile
16
+ import shutil
17
+
18
+ class Insta360Reconstructor:
19
+ def __init__(self):
20
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
21
+ print(f"Initializing on device: {self.device}")
22
+
23
+ # Load depth estimation model
24
+ self.depth_estimator = pipeline(
25
+ "depth-estimation",
26
+ model="depth-anything/Depth-Anything-V2-Large-hf",
27
+ device=0 if self.device == "cuda" else -1
28
+ )
29
+
30
+ def process_video(self, video_path, sample_rate=30, max_frames=100):
31
+ """Process video and return depth maps and point cloud"""
32
+
33
+ # Create temporary directories
34
+ temp_dir = tempfile.mkdtemp()
35
+ frames_dir = os.path.join(temp_dir, "frames")
36
+ depth_dir = os.path.join(temp_dir, "depth")
37
+ os.makedirs(frames_dir, exist_ok=True)
38
+ os.makedirs(depth_dir, exist_ok=True)
39
+
40
+ # Extract frames
41
+ cap = cv2.VideoCapture(video_path)
42
+ fps = cap.get(cv2.CAP_PROP_FPS)
43
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
44
+
45
+ frame_paths = []
46
+ frame_count = 0
47
+ saved_count = 0
48
+
49
+ print(f"Extracting frames (every {sample_rate} frames)...")
50
+
51
+ while cap.isOpened() and saved_count < max_frames:
52
+ ret, frame = cap.read()
53
+ if not ret:
54
+ break
55
+
56
+ if frame_count % sample_rate == 0:
57
+ frame_path = os.path.join(frames_dir, f"frame_{saved_count:04d}.jpg")
58
+ cv2.imwrite(frame_path, frame)
59
+ frame_paths.append(frame_path)
60
+ saved_count += 1
61
+
62
+ frame_count += 1
63
+
64
+ cap.release()
65
+
66
+ # Process depth estimation
67
+ print(f"Processing {len(frame_paths)} frames for depth estimation...")
68
+ depth_outputs = []
69
+ sample_images = []
70
+
71
+ for i, frame_path in enumerate(frame_paths):
72
+ # Load image
73
+ image = Image.open(frame_path)
74
+
75
+ # Estimate depth
76
+ depth_result = self.depth_estimator(image)
77
+ depth_map = depth_result["depth"]
78
+
79
+ # Save depth visualization
80
+ depth_vis_path = os.path.join(depth_dir, f"depth_{i:04d}.jpg")
81
+ depth_map.save(depth_vis_path)
82
+
83
+ # Collect samples for display (first 9)
84
+ if i < 9:
85
+ sample_images.append(depth_vis_path)
86
+
87
+ # Save depth as numpy array
88
+ depth_npy_path = os.path.join(depth_dir, f"depth_{i:04d}.npy")
89
+ np.save(depth_npy_path, np.array(depth_map))
90
+ depth_outputs.append(depth_npy_path)
91
+
92
+ # Clear cache periodically
93
+ if i % 10 == 0 and self.device == "cuda":
94
+ torch.cuda.empty_cache()
95
+
96
+ # Create ZIP file with all outputs
97
+ zip_path = os.path.join(temp_dir, "reconstruction_output.zip")
98
+ with zipfile.ZipFile(zip_path, 'w') as zipf:
99
+ # Add frames
100
+ for frame_path in frame_paths:
101
+ zipf.write(frame_path, os.path.join("frames", os.path.basename(frame_path)))
102
+
103
+ # Add depth maps
104
+ for depth_path in Path(depth_dir).glob("*.jpg"):
105
+ zipf.write(depth_path, os.path.join("depth_maps", depth_path.name))
106
+
107
+ for depth_path in Path(depth_dir).glob("*.npy"):
108
+ zipf.write(depth_path, os.path.join("depth_arrays", depth_path.name))
109
+
110
+ return sample_images, zip_path, f"Processed {len(frame_paths)} frames successfully!"
111
+
112
+ # Initialize reconstructor
113
+ reconstructor = Insta360Reconstructor()
114
+
115
+ def process_video_interface(video, sample_rate, max_frames, progress=gr.Progress()):
116
+ """Gradio interface function"""
117
+
118
+ if video is None:
119
+ return None, None, "Please upload a video file"
120
+
121
+ progress(0, desc="Starting processing...")
122
+
123
+ try:
124
+ # Process video
125
+ sample_images, zip_path, status_msg = reconstructor.process_video(
126
+ video,
127
+ sample_rate=int(sample_rate),
128
+ max_frames=int(max_frames)
129
+ )
130
+
131
+ progress(1.0, desc="Complete!")
132
+
133
+ return sample_images, zip_path, status_msg
134
+
135
+ except Exception as e:
136
+ return None, None, f"Error: {str(e)}"
137
+
138
+ # Create Gradio interface
139
+ with gr.Blocks(title="Insta360 3D Reconstruction") as demo:
140
+ gr.Markdown("""
141
+ # 🎥 Insta360 Video 3D Reconstruction
142
+
143
+ Upload your Insta360 outdoor video for depth estimation and 3D reconstruction.
144
+
145
+ **Note:** For large videos (7+ GB), processing may take significant time.
146
+ Adjust sample rate and max frames to control processing time.
147
+
148
+ ### Instructions:
149
+ 1. Upload your Insta360 video
150
+ 2. Set sample rate (higher = faster but fewer frames)
151
+ 3. Set max frames to process (fewer = faster)
152
+ 4. Click "Process Video"
153
+ 5. Download the ZIP file with all outputs
154
+ """)
155
+
156
+ with gr.Row():
157
+ with gr.Column():
158
+ video_input = gr.Video(label="Upload Insta360 Video")
159
+
160
+ sample_rate = gr.Slider(
161
+ minimum=1,
162
+ maximum=120,
163
+ value=30,
164
+ step=1,
165
+ label="Sample Rate (process every N frames)",
166
+ info="Higher values = faster processing but fewer frames"
167
+ )
168
+
169
+ max_frames = gr.Slider(
170
+ minimum=10,
171
+ maximum=500,
172
+ value=100,
173
+ step=10,
174
+ label="Maximum Frames to Process",
175
+ info="Limit total frames for faster processing"
176
+ )
177
+
178
+ process_btn = gr.Button("🚀 Process Video", variant="primary")
179
+
180
+ with gr.Column():
181
+ status_output = gr.Textbox(label="Status", lines=2)
182
+ download_output = gr.File(label="Download Results (ZIP)")
183
+
184
+ gallery_output = gr.Gallery(
185
+ label="Sample Depth Maps (first 9 frames)",
186
+ columns=3,
187
+ rows=3,
188
+ height="auto"
189
+ )
190
+
191
+ process_btn.click(
192
+ fn=process_video_interface,
193
+ inputs=[video_input, sample_rate, max_frames],
194
+ outputs=[gallery_output, download_output, status_output]
195
+ )
196
+
197
+ gr.Markdown("""
198
+ ### Output Contents:
199
+ - **frames/**: Extracted RGB frames
200
+ - **depth_maps/**: Visualized depth maps (JPG)
201
+ - **depth_arrays/**: Raw depth data (NumPy arrays)
202
+
203
+ ### Tips for Large Videos:
204
+ - Start with sample_rate=60 and max_frames=50 for testing
205
+ - Gradually increase for full processing
206
+ - Each frame takes ~2-5 seconds to process
207
+ """)
208
+
209
+ if __name__ == "__main__":
210
+ demo.launch()