EdBianchi commited on
Commit
a7bcb92
·
verified ·
1 Parent(s): bbca361

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +346 -0
app.py ADDED
@@ -0,0 +1,346 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import av
3
+ import numpy as np
4
+ from PIL import Image
5
+ import tempfile
6
+ import os
7
+
8
+ def sample_frame_indices(num_frames, fps, total_frames):
9
+ """
10
+ Fallback sampling function for basic frame selection.
11
+
12
+ Args:
13
+ num_frames (int): Number of frames to sample
14
+ fps (float): Frames per second (not used in basic implementation)
15
+ total_frames (int): Total frames in video
16
+
17
+ Returns:
18
+ list: Frame indices
19
+ """
20
+ if total_frames <= num_frames:
21
+ return list(range(total_frames))
22
+
23
+ # Simple uniform sampling
24
+ indices = np.linspace(0, total_frames - 1, num_frames, dtype=int)
25
+ return indices.tolist()
26
+
27
+ def sample_frame_indices_efficient_segments(num_frames, segment_duration, num_segments, container):
28
+ """
29
+ Enhanced frame sampling strategy that distributes frames across temporal segments
30
+ of the video for better temporal coverage and content diversity.
31
+
32
+ Args:
33
+ num_frames (int): Total number of frames to sample
34
+ segment_duration (float): Duration of each segment in seconds
35
+ num_segments (int): Number of segments to sample from
36
+ container (av.container): PyAV container object
37
+
38
+ Returns:
39
+ list: Exactly num_frames frame indices
40
+ """
41
+ # Get video properties
42
+ video_stream = container.streams.video[0]
43
+ video_fps = float(video_stream.average_rate)
44
+ total_video_frames = video_stream.frames
45
+ video_duration = total_video_frames / video_fps
46
+
47
+ # Fallback to original sampling if video is too short or has issues
48
+ if total_video_frames < num_frames or video_duration <= 0:
49
+ return sample_frame_indices(num_frames, 4, total_video_frames)
50
+
51
+ # Calculate frames per segment - ensure we get exactly num_frames
52
+ base_frames_per_segment = num_frames // num_segments
53
+ extra_frames = num_frames % num_segments
54
+
55
+ # Ensure segment duration doesn't exceed video duration, but adjust if needed
56
+ max_segment_duration = video_duration / num_segments * 0.8 # Leave some buffer
57
+ effective_segment_duration = min(segment_duration, max_segment_duration)
58
+
59
+ # If segments would be too small, fall back to original sampling
60
+ if effective_segment_duration < 0.5: # Less than 0.5 seconds per segment
61
+ return sample_frame_indices(num_frames, 4, total_video_frames)
62
+
63
+ # Calculate segment start times distributed across the video
64
+ if num_segments == 1:
65
+ segment_starts = [0]
66
+ else:
67
+ # Distribute segments evenly, ensuring they don't go beyond video end
68
+ max_start_time = max(0, video_duration - effective_segment_duration)
69
+ segment_starts = np.linspace(0, max_start_time, num_segments)
70
+
71
+ all_indices = []
72
+ frames_collected = 0
73
+
74
+ for i, start_time in enumerate(segment_starts):
75
+ # Calculate number of frames for this segment
76
+ segment_frames = base_frames_per_segment + (1 if i < extra_frames else 0)
77
+
78
+ if segment_frames == 0:
79
+ continue
80
+
81
+ # Convert time to frame indices
82
+ start_frame = int(start_time * video_fps)
83
+ end_frame = min(int((start_time + effective_segment_duration) * video_fps), total_video_frames)
84
+
85
+ # Ensure we have a valid range
86
+ if start_frame >= end_frame:
87
+ end_frame = min(start_frame + int(0.5 * video_fps), total_video_frames) # At least 0.5 seconds
88
+
89
+ # Ensure end_frame is within bounds
90
+ end_frame = min(end_frame, total_video_frames)
91
+
92
+ # Sample frames within this segment
93
+ if segment_frames == 1:
94
+ # Single frame: take middle of segment
95
+ frame_idx = start_frame + (end_frame - start_frame) // 2
96
+ segment_indices = [min(frame_idx, total_video_frames - 1)]
97
+ elif end_frame - start_frame <= segment_frames:
98
+ # If segment is too short, take all available frames and pad
99
+ available_frames = list(range(start_frame, end_frame))
100
+ while len(available_frames) < segment_frames and available_frames:
101
+ # Duplicate frames if needed
102
+ available_frames.extend(available_frames[:segment_frames - len(available_frames)])
103
+ segment_indices = available_frames[:segment_frames]
104
+ else:
105
+ # Multiple frames: distribute evenly within segment
106
+ segment_indices = np.linspace(start_frame, end_frame - 1, segment_frames, dtype=int).tolist()
107
+
108
+ all_indices.extend(segment_indices)
109
+ frames_collected += len(segment_indices)
110
+
111
+ # Safety check to prevent infinite loops
112
+ if frames_collected >= num_frames:
113
+ break
114
+
115
+ # Convert to numpy array for easier manipulation
116
+ all_indices = np.array(all_indices)
117
+
118
+ # Ensure we have exactly num_frames - this is critical
119
+ if len(all_indices) != num_frames:
120
+ if len(all_indices) > num_frames:
121
+ # Too many frames: select exactly num_frames uniformly
122
+ step = len(all_indices) / num_frames
123
+ selected_indices = [all_indices[int(i * step)] for i in range(num_frames)]
124
+ all_indices = np.array(selected_indices)
125
+ else:
126
+ # Too few frames: pad by repeating frames
127
+ needed = num_frames - len(all_indices)
128
+ if len(all_indices) > 0:
129
+ # Repeat existing frames cyclically
130
+ additional_indices = []
131
+ for i in range(needed):
132
+ additional_indices.append(all_indices[i % len(all_indices)])
133
+ all_indices = np.concatenate([all_indices, additional_indices])
134
+ else:
135
+ # Fallback: use original sampling
136
+ return sample_frame_indices(num_frames, 4, total_video_frames)
137
+
138
+ # Final cleanup: ensure all indices are valid and within bounds
139
+ all_indices = np.clip(all_indices, 0, total_video_frames - 1)
140
+
141
+ # Sort indices to maintain temporal order
142
+ all_indices = np.sort(all_indices)
143
+
144
+ # Final verification - this should never fail now
145
+ assert len(all_indices) == num_frames, f"Expected {num_frames} frames, got {len(all_indices)}"
146
+
147
+ return all_indices.tolist()
148
+
149
+ def extract_frames_at_indices(video_path, frame_indices):
150
+ """
151
+ Extract frames from video at specified indices.
152
+
153
+ Args:
154
+ video_path (str): Path to video file
155
+ frame_indices (list): List of frame indices to extract
156
+
157
+ Returns:
158
+ list: List of PIL Images
159
+ """
160
+ container = av.open(video_path)
161
+ video_stream = container.streams.video[0]
162
+
163
+ frames = []
164
+ frame_idx = 0
165
+ target_indices = set(frame_indices)
166
+
167
+ # Decode video and extract frames at specified indices
168
+ for frame in container.decode(video=0):
169
+ if frame_idx in target_indices:
170
+ # Convert frame to PIL Image
171
+ img = frame.to_image()
172
+ frames.append(img)
173
+
174
+ # Remove from target set
175
+ target_indices.remove(frame_idx)
176
+
177
+ # Stop if we've collected all frames
178
+ if not target_indices:
179
+ break
180
+
181
+ frame_idx += 1
182
+
183
+ container.close()
184
+ return frames
185
+
186
+ def process_video(video_file, num_frames, segment_duration, num_segments):
187
+ """
188
+ Main processing function for Gradio interface.
189
+
190
+ Args:
191
+ video_file: Uploaded video file
192
+ num_frames (int): Number of frames to sample
193
+ segment_duration (float): Duration of each segment in seconds
194
+ num_segments (int): Number of segments
195
+
196
+ Returns:
197
+ tuple: (frames list, info string, indices list)
198
+ """
199
+ if video_file is None:
200
+ return [], "Please upload a video file", []
201
+
202
+ try:
203
+ # Open video container
204
+ container = av.open(video_file)
205
+ video_stream = container.streams.video[0]
206
+
207
+ # Get video info
208
+ video_fps = float(video_stream.average_rate)
209
+ total_frames = video_stream.frames
210
+ video_duration = total_frames / video_fps if video_fps > 0 else 0
211
+
212
+ # Get frame indices using the sampling function
213
+ frame_indices = sample_frame_indices_efficient_segments(
214
+ num_frames, segment_duration, num_segments, container
215
+ )
216
+
217
+ container.close()
218
+
219
+ # Extract frames at selected indices
220
+ frames = extract_frames_at_indices(video_file, frame_indices)
221
+
222
+ # Create info string
223
+ info = f"""
224
+ **Video Information:**
225
+ - Total frames: {total_frames}
226
+ - FPS: {video_fps:.2f}
227
+ - Duration: {video_duration:.2f} seconds
228
+
229
+ **Sampling Configuration:**
230
+ - Frames to sample: {num_frames}
231
+ - Number of segments: {num_segments}
232
+ - Segment duration: {segment_duration:.2f} seconds
233
+
234
+ **Results:**
235
+ - Sampled frame indices: {frame_indices}
236
+ - Number of frames extracted: {len(frames)}
237
+ """
238
+
239
+ # Add frame numbers to images for display
240
+ labeled_frames = []
241
+ for i, (frame, idx) in enumerate(zip(frames, frame_indices)):
242
+ # Create a copy and add text overlay
243
+ frame_copy = frame.copy()
244
+ # Add frame number as caption
245
+ labeled_frames.append((frame_copy, f"Frame {idx} (Sample {i+1}/{num_frames})"))
246
+
247
+ return labeled_frames, info, frame_indices
248
+
249
+ except Exception as e:
250
+ return [], f"Error processing video: {str(e)}", []
251
+
252
+ # Create Gradio interface
253
+ with gr.Blocks(title="Video Frame Sampling Tool") as demo:
254
+ gr.Markdown("""
255
+ # Video Frame Sampling Tool
256
+
257
+ This tool uses an enhanced frame sampling strategy that distributes frames across temporal segments
258
+ of the video for better temporal coverage and content diversity.
259
+
260
+ Upload a video and configure the sampling parameters to extract representative frames.
261
+ """)
262
+
263
+ with gr.Row():
264
+ with gr.Column(scale=1):
265
+ # Input components
266
+ video_input = gr.Video(label="Upload Video")
267
+
268
+ gr.Markdown("### Sampling Parameters")
269
+ num_frames = gr.Slider(
270
+ minimum=1,
271
+ maximum=50,
272
+ value=8,
273
+ step=1,
274
+ label="Number of Frames to Sample",
275
+ info="Total number of frames to extract from the video"
276
+ )
277
+
278
+ num_segments = gr.Slider(
279
+ minimum=1,
280
+ maximum=20,
281
+ value=4,
282
+ step=1,
283
+ label="Number of Segments",
284
+ info="Number of temporal segments to divide the video into"
285
+ )
286
+
287
+ segment_duration = gr.Slider(
288
+ minimum=0.5,
289
+ maximum=10.0,
290
+ value=2.0,
291
+ step=0.5,
292
+ label="Segment Duration (seconds)",
293
+ info="Duration of each segment for sampling"
294
+ )
295
+
296
+ process_btn = gr.Button("Process Video", variant="primary")
297
+
298
+ with gr.Column(scale=2):
299
+ # Output components
300
+ info_output = gr.Markdown(label="Processing Information")
301
+ gallery_output = gr.Gallery(
302
+ label="Sampled Frames",
303
+ show_label=True,
304
+ elem_id="gallery",
305
+ columns=4,
306
+ rows=3,
307
+ height="auto"
308
+ )
309
+ indices_output = gr.JSON(label="Frame Indices", visible=False)
310
+
311
+ # Examples
312
+ gr.Examples(
313
+ examples=[
314
+ [8, 4, 2.0],
315
+ [16, 8, 1.5],
316
+ [4, 2, 3.0],
317
+ [24, 6, 2.5],
318
+ ],
319
+ inputs=[num_frames, num_segments, segment_duration],
320
+ label="Example Configurations"
321
+ )
322
+
323
+ # Connect the processing function
324
+ process_btn.click(
325
+ fn=process_video,
326
+ inputs=[video_input, num_frames, segment_duration, num_segments],
327
+ outputs=[gallery_output, info_output, indices_output]
328
+ )
329
+
330
+ gr.Markdown("""
331
+ ### How it works:
332
+ 1. The video is divided into the specified number of segments
333
+ 2. Each segment has a maximum duration as specified
334
+ 3. Frames are sampled evenly from within each segment
335
+ 4. The algorithm ensures exactly the requested number of frames are returned
336
+ 5. If the video is too short, it falls back to uniform sampling
337
+
338
+ ### Tips:
339
+ - Use more segments for longer videos to get better temporal coverage
340
+ - Adjust segment duration based on the pace of content in your video
341
+ - For short videos, use fewer segments with shorter durations
342
+ """)
343
+
344
+ # Launch the app
345
+ if __name__ == "__main__":
346
+ demo.launch()