Spaces:
Runtime error
Runtime error
Miquel Farre
commited on
Commit
·
c7fdc4d
1
Parent(s):
fc0912b
- app.py +11 -142
- video_highlight_detector.py +1 -6
app.py
CHANGED
|
@@ -60,15 +60,15 @@ def create_ui(examples_path: str):
|
|
| 60 |
label=f"Highlights ({format_duration(example['highlights']['duration_seconds'])})",
|
| 61 |
interactive=False
|
| 62 |
)
|
| 63 |
-
with gr.Accordion("
|
| 64 |
-
gr.Markdown(f"
|
| 65 |
-
gr.Markdown(f"
|
| 66 |
|
| 67 |
gr.Markdown("## Try It Yourself!")
|
| 68 |
with gr.Row():
|
| 69 |
with gr.Column(scale=1):
|
| 70 |
input_video = gr.Video(
|
| 71 |
-
label="Upload your video (max
|
| 72 |
interactive=True
|
| 73 |
)
|
| 74 |
process_btn = gr.Button("Process Video", variant="primary")
|
|
@@ -83,7 +83,7 @@ def create_ui(examples_path: str):
|
|
| 83 |
status = gr.Markdown()
|
| 84 |
|
| 85 |
analysis_accordion = gr.Accordion(
|
| 86 |
-
"
|
| 87 |
open=True,
|
| 88 |
visible=False
|
| 89 |
)
|
|
@@ -106,9 +106,9 @@ def create_ui(examples_path: str):
|
|
| 106 |
|
| 107 |
try:
|
| 108 |
duration = get_video_duration_seconds(video)
|
| 109 |
-
if duration >
|
| 110 |
yield [
|
| 111 |
-
"Video must be shorter than
|
| 112 |
"",
|
| 113 |
"",
|
| 114 |
gr.update(visible=False),
|
|
@@ -122,14 +122,14 @@ def create_ui(examples_path: str):
|
|
| 122 |
"",
|
| 123 |
"",
|
| 124 |
gr.update(visible=False),
|
| 125 |
-
gr.update(visible=
|
| 126 |
]
|
| 127 |
|
| 128 |
model, processor = load_model()
|
| 129 |
detector = BatchedVideoHighlightDetector(
|
| 130 |
model,
|
| 131 |
processor,
|
| 132 |
-
batch_size=
|
| 133 |
)
|
| 134 |
|
| 135 |
yield [
|
|
@@ -141,7 +141,7 @@ def create_ui(examples_path: str):
|
|
| 141 |
]
|
| 142 |
|
| 143 |
video_desc = detector.analyze_video_content(video)
|
| 144 |
-
formatted_desc = f"
|
| 145 |
|
| 146 |
yield [
|
| 147 |
"Determining highlight types...",
|
|
@@ -152,7 +152,7 @@ def create_ui(examples_path: str):
|
|
| 152 |
]
|
| 153 |
|
| 154 |
highlights = detector.determine_highlights(video_desc)
|
| 155 |
-
formatted_highlights = f"
|
| 156 |
|
| 157 |
# Get all segments
|
| 158 |
segments = get_fixed_30s_segments(video)
|
|
@@ -223,137 +223,6 @@ def create_ui(examples_path: str):
|
|
| 223 |
)
|
| 224 |
|
| 225 |
return app
|
| 226 |
-
# gr.Markdown("## Try It Yourself!")
|
| 227 |
-
# with gr.Row():
|
| 228 |
-
# with gr.Column(scale=1):
|
| 229 |
-
# input_video = gr.Video(
|
| 230 |
-
# label="Upload your video (max 20 minutes)",
|
| 231 |
-
# interactive=True
|
| 232 |
-
# )
|
| 233 |
-
# process_btn = gr.Button("Process Video", variant="primary")
|
| 234 |
-
|
| 235 |
-
# with gr.Column(scale=1):
|
| 236 |
-
# output_video = gr.Video(
|
| 237 |
-
# label="Highlight Video",
|
| 238 |
-
# visible=False,
|
| 239 |
-
# interactive=False,
|
| 240 |
-
# )
|
| 241 |
-
|
| 242 |
-
# status = gr.Markdown()
|
| 243 |
-
|
| 244 |
-
# analysis_accordion = gr.Accordion(
|
| 245 |
-
# "Model chain of thought details",
|
| 246 |
-
# open=True,
|
| 247 |
-
# visible=False
|
| 248 |
-
# )
|
| 249 |
-
|
| 250 |
-
# with analysis_accordion:
|
| 251 |
-
# video_description = gr.Markdown("", elem_id="video_desc")
|
| 252 |
-
# highlight_types = gr.Markdown("", elem_id="highlight_types")
|
| 253 |
-
|
| 254 |
-
# @spaces.GPU
|
| 255 |
-
# def on_process(video):
|
| 256 |
-
# if not video:
|
| 257 |
-
# yield [
|
| 258 |
-
# "Please upload a video", # status
|
| 259 |
-
# "", # video_description
|
| 260 |
-
# "", # highlight_types
|
| 261 |
-
# gr.update(visible=False), # output_video
|
| 262 |
-
# gr.update(visible=False) # analysis_accordion
|
| 263 |
-
# ]
|
| 264 |
-
# return
|
| 265 |
-
|
| 266 |
-
# try:
|
| 267 |
-
# duration = get_video_duration_seconds(video)
|
| 268 |
-
# if duration > 1200: # 20 minutes
|
| 269 |
-
# yield [
|
| 270 |
-
# "Video must be shorter than 20 minutes",
|
| 271 |
-
# "",
|
| 272 |
-
# "",
|
| 273 |
-
# gr.update(visible=False),
|
| 274 |
-
# gr.update(visible=False)
|
| 275 |
-
# ]
|
| 276 |
-
# return
|
| 277 |
-
|
| 278 |
-
# # Make accordion visible as soon as processing starts
|
| 279 |
-
# yield [
|
| 280 |
-
# "Loading model...",
|
| 281 |
-
# "",
|
| 282 |
-
# "",
|
| 283 |
-
# gr.update(visible=False),
|
| 284 |
-
# gr.update(visible=True)
|
| 285 |
-
# ]
|
| 286 |
-
|
| 287 |
-
# model, processor = load_model()
|
| 288 |
-
# detector = BatchedVideoHighlightDetector(model, processor, batch_size=8)
|
| 289 |
-
|
| 290 |
-
# yield [
|
| 291 |
-
# "Analyzing video content...",
|
| 292 |
-
# "",
|
| 293 |
-
# "",
|
| 294 |
-
# gr.update(visible=False),
|
| 295 |
-
# gr.update(visible=True)
|
| 296 |
-
# ]
|
| 297 |
-
|
| 298 |
-
# video_desc = detector.analyze_video_content(video)
|
| 299 |
-
# formatted_desc = f"#Summary: {video_desc[:500] + '...' if len(video_desc) > 500 else video_desc}"
|
| 300 |
-
|
| 301 |
-
# # Update description as soon as it's available
|
| 302 |
-
# yield [
|
| 303 |
-
# "Determining highlight types...",
|
| 304 |
-
# formatted_desc,
|
| 305 |
-
# "",
|
| 306 |
-
# gr.update(visible=False),
|
| 307 |
-
# gr.update(visible=True)
|
| 308 |
-
# ]
|
| 309 |
-
|
| 310 |
-
# highlights = detector.determine_highlights(video_desc)
|
| 311 |
-
# formatted_highlights = f"#Highlights to search for: {highlights[:500] + '...' if len(highlights) > 500 else highlights}"
|
| 312 |
-
|
| 313 |
-
# # Update highlights as soon as they're available
|
| 314 |
-
# yield [
|
| 315 |
-
# "Detecting and extracting highlights...",
|
| 316 |
-
# formatted_desc,
|
| 317 |
-
# formatted_highlights,
|
| 318 |
-
# gr.update(visible=False),
|
| 319 |
-
# gr.update(visible=True)
|
| 320 |
-
# ]
|
| 321 |
-
|
| 322 |
-
# with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as tmp_file:
|
| 323 |
-
# temp_output = tmp_file.name
|
| 324 |
-
# detector.create_highlight_video(video, temp_output)
|
| 325 |
-
|
| 326 |
-
# yield [
|
| 327 |
-
# "Processing complete!",
|
| 328 |
-
# formatted_desc,
|
| 329 |
-
# formatted_highlights,
|
| 330 |
-
# gr.update(value=temp_output, visible=True),
|
| 331 |
-
# gr.update(visible=True)
|
| 332 |
-
# ]
|
| 333 |
-
|
| 334 |
-
# except Exception as e:
|
| 335 |
-
# yield [
|
| 336 |
-
# f"Error processing video: {str(e)}",
|
| 337 |
-
# "",
|
| 338 |
-
# "",
|
| 339 |
-
# gr.update(visible=False),
|
| 340 |
-
# gr.update(visible=False)
|
| 341 |
-
# ]
|
| 342 |
-
|
| 343 |
-
# process_btn.click(
|
| 344 |
-
# on_process,
|
| 345 |
-
# inputs=[input_video],
|
| 346 |
-
# outputs=[
|
| 347 |
-
# status,
|
| 348 |
-
# video_description,
|
| 349 |
-
# highlight_types,
|
| 350 |
-
# output_video,
|
| 351 |
-
# analysis_accordion
|
| 352 |
-
# ],
|
| 353 |
-
# queue=True,
|
| 354 |
-
# )
|
| 355 |
-
|
| 356 |
-
# return app
|
| 357 |
|
| 358 |
if __name__ == "__main__":
|
| 359 |
# Initialize CUDA
|
|
|
|
| 60 |
label=f"Highlights ({format_duration(example['highlights']['duration_seconds'])})",
|
| 61 |
interactive=False
|
| 62 |
)
|
| 63 |
+
with gr.Accordion("Chain of thought details", open=False):
|
| 64 |
+
gr.Markdown(f"### Summary: {example['analysis']['video_description']}")
|
| 65 |
+
gr.Markdown(f"### Highlights to search for: {example['analysis']['highlight_types']}")
|
| 66 |
|
| 67 |
gr.Markdown("## Try It Yourself!")
|
| 68 |
with gr.Row():
|
| 69 |
with gr.Column(scale=1):
|
| 70 |
input_video = gr.Video(
|
| 71 |
+
label="Upload your video (max 30 minutes)",
|
| 72 |
interactive=True
|
| 73 |
)
|
| 74 |
process_btn = gr.Button("Process Video", variant="primary")
|
|
|
|
| 83 |
status = gr.Markdown()
|
| 84 |
|
| 85 |
analysis_accordion = gr.Accordion(
|
| 86 |
+
"Chain of thought details",
|
| 87 |
open=True,
|
| 88 |
visible=False
|
| 89 |
)
|
|
|
|
| 106 |
|
| 107 |
try:
|
| 108 |
duration = get_video_duration_seconds(video)
|
| 109 |
+
if duration > 1800: # 30 minutes
|
| 110 |
yield [
|
| 111 |
+
"Video must be shorter than 30 minutes",
|
| 112 |
"",
|
| 113 |
"",
|
| 114 |
gr.update(visible=False),
|
|
|
|
| 122 |
"",
|
| 123 |
"",
|
| 124 |
gr.update(visible=False),
|
| 125 |
+
gr.update(visible=False)
|
| 126 |
]
|
| 127 |
|
| 128 |
model, processor = load_model()
|
| 129 |
detector = BatchedVideoHighlightDetector(
|
| 130 |
model,
|
| 131 |
processor,
|
| 132 |
+
batch_size=12
|
| 133 |
)
|
| 134 |
|
| 135 |
yield [
|
|
|
|
| 141 |
]
|
| 142 |
|
| 143 |
video_desc = detector.analyze_video_content(video)
|
| 144 |
+
formatted_desc = f"### Summary: {video_desc[:500] + '...' if len(video_desc) > 500 else video_desc}"
|
| 145 |
|
| 146 |
yield [
|
| 147 |
"Determining highlight types...",
|
|
|
|
| 152 |
]
|
| 153 |
|
| 154 |
highlights = detector.determine_highlights(video_desc)
|
| 155 |
+
formatted_highlights = f"### Highlights to search for: {highlights[:500] + '...' if len(highlights) > 500 else highlights}"
|
| 156 |
|
| 157 |
# Get all segments
|
| 158 |
segments = get_fixed_30s_segments(video)
|
|
|
|
| 223 |
)
|
| 224 |
|
| 225 |
return app
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 226 |
|
| 227 |
if __name__ == "__main__":
|
| 228 |
# Initialize CUDA
|
video_highlight_detector.py
CHANGED
|
@@ -318,7 +318,6 @@ class BatchedVideoHighlightDetector:
|
|
| 318 |
batch_size=8,
|
| 319 |
max_frames_per_segment=32,
|
| 320 |
target_fps=1.0,
|
| 321 |
-
progress_callback=None
|
| 322 |
):
|
| 323 |
self.model = model
|
| 324 |
self.processor = processor
|
|
@@ -326,7 +325,6 @@ class BatchedVideoHighlightDetector:
|
|
| 326 |
self.batch_size = batch_size
|
| 327 |
self.max_frames_per_segment = max_frames_per_segment
|
| 328 |
self.target_fps = target_fps
|
| 329 |
-
self.progress_callback = progress_callback
|
| 330 |
|
| 331 |
def _extract_frames_batch(
|
| 332 |
self,
|
|
@@ -498,10 +496,7 @@ class BatchedVideoHighlightDetector:
|
|
| 498 |
for output in outputs
|
| 499 |
]
|
| 500 |
|
| 501 |
-
|
| 502 |
-
if self.progress_callback:
|
| 503 |
-
self.progress_callback(segments_processed + len(segments), total_segments)
|
| 504 |
-
|
| 505 |
# Check for "yes" in responses
|
| 506 |
return ["yes" in response for response in responses]
|
| 507 |
|
|
|
|
| 318 |
batch_size=8,
|
| 319 |
max_frames_per_segment=32,
|
| 320 |
target_fps=1.0,
|
|
|
|
| 321 |
):
|
| 322 |
self.model = model
|
| 323 |
self.processor = processor
|
|
|
|
| 325 |
self.batch_size = batch_size
|
| 326 |
self.max_frames_per_segment = max_frames_per_segment
|
| 327 |
self.target_fps = target_fps
|
|
|
|
| 328 |
|
| 329 |
def _extract_frames_batch(
|
| 330 |
self,
|
|
|
|
| 496 |
for output in outputs
|
| 497 |
]
|
| 498 |
|
| 499 |
+
|
|
|
|
|
|
|
|
|
|
| 500 |
# Check for "yes" in responses
|
| 501 |
return ["yes" in response for response in responses]
|
| 502 |
|