Chaitanya-aitf commited on
Commit
0ef3eb8
·
verified ·
1 Parent(s): a1b611f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +688 -98
app.py CHANGED
@@ -7,17 +7,22 @@ Features:
7
  - Domain-optimized presets
8
  - Person-specific filtering (optional)
9
  - Scene-aware clip cutting
 
10
  """
11
 
12
  import os
13
  import sys
14
  import tempfile
15
  import shutil
 
 
16
  from pathlib import Path
17
  import time
18
  import traceback
 
19
 
20
  import gradio as gr
 
21
 
22
  # Add project root to path
23
  sys.path.insert(0, str(Path(__file__).parent))
@@ -33,13 +38,18 @@ except Exception:
33
  logger = logging.getLogger("app")
34
 
35
 
36
- def build_metrics_output(result, domain: str) -> str:
 
 
 
 
37
  """
38
  Build formatted metrics output for testing and evaluation.
39
 
40
  Args:
41
  result: PipelineResult object
42
  domain: Content domain used for processing
 
43
 
44
  Returns:
45
  Formatted string with all metrics
@@ -58,6 +68,7 @@ def build_metrics_output(result, domain: str) -> str:
58
  lines.append(f"scenes_detected: {len(result.scenes)}")
59
  lines.append(f"audio_segments_analyzed: {len(result.audio_features)}")
60
  lines.append(f"domain: {domain}")
 
61
 
62
  # Count hooks from scores (estimate based on high-scoring segments)
63
  hooks_detected = sum(1 for s in result.scores if s.combined_score > 0.7) if result.scores else 0
@@ -119,6 +130,10 @@ def build_metrics_output(result, domain: str) -> str:
119
  return "\n".join(lines)
120
 
121
 
 
 
 
 
122
  def process_video(
123
  video_file,
124
  domain,
@@ -129,7 +144,7 @@ def process_video(
129
  progress=gr.Progress()
130
  ):
131
  """
132
- Main video processing function.
133
 
134
  Args:
135
  video_file: Uploaded video file path
@@ -239,7 +254,7 @@ def process_video(
239
  status = f"Successfully extracted {len(clip_paths)} highlight clips!\nProcessing time: {result.processing_time:.1f}s"
240
 
241
  # Build metrics output
242
- metrics_output = build_metrics_output(result, domain_value)
243
 
244
  pipeline.cleanup()
245
  progress(1.0, desc="Done!")
@@ -263,7 +278,417 @@ def process_video(
263
  return error_msg, None, None, None, "\n".join(log_messages), ""
264
 
265
 
266
- # Build Gradio interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
  with gr.Blocks(
268
  title="ShortSmith v2",
269
  theme=gr.themes.Soft(),
@@ -274,129 +699,294 @@ with gr.Blocks(
274
  ) as demo:
275
 
276
  gr.Markdown("""
277
- # 🎬 ShortSmith v2
278
  ### AI-Powered Video Highlight Extractor
279
 
280
  Upload a video and automatically extract the most engaging highlight clips using AI analysis.
281
  """)
282
 
283
- with gr.Row():
284
- # Left column - Inputs
285
- with gr.Column(scale=1):
286
- gr.Markdown("### 📤 Input")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
 
288
- video_input = gr.Video(
289
- label="Upload Video",
290
- sources=["upload"],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
  )
292
 
293
- with gr.Accordion("⚙️ Settings", open=True):
294
- domain_dropdown = gr.Dropdown(
295
- choices=["Sports", "Vlogs", "Music Videos", "Podcasts", "Gaming", "General"],
296
- value="General",
297
- label="Content Domain",
298
- info="Select the type of content for optimized scoring"
299
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
 
301
- with gr.Row():
302
- num_clips_slider = gr.Slider(
303
  minimum=1,
304
  maximum=3,
305
  value=3,
306
  step=1,
307
- label="Number of Clips",
308
- info="How many highlight clips to extract"
309
  )
310
- duration_slider = gr.Slider(
311
- minimum=5,
312
- maximum=30,
313
- value=15,
314
- step=1,
315
- label="Clip Duration (seconds)",
316
- info="Target duration for each clip"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
  )
318
 
319
- with gr.Accordion("👤 Person Filtering (Optional)", open=False):
320
- reference_image = gr.Image(
321
- label="Reference Image",
322
- type="filepath",
323
- sources=["upload"],
324
- )
325
- gr.Markdown("*Upload a photo of a person to prioritize clips featuring them.*")
326
 
327
- with gr.Accordion("📝 Custom Instructions (Optional)", open=False):
328
- custom_prompt = gr.Textbox(
329
- label="Additional Instructions",
330
- placeholder="E.g., 'Focus on crowd reactions' or 'Prioritize action scenes'",
331
- lines=2,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
332
  )
333
 
334
- process_btn = gr.Button(
335
- "🚀 Extract Highlights",
336
- variant="primary",
337
- size="lg"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
338
  )
339
 
340
- # Right column - Outputs
341
- with gr.Column(scale=1):
342
- gr.Markdown("### 📥 Output")
343
-
344
- status_output = gr.Textbox(
345
- label="Status",
346
- lines=2,
347
- interactive=False
348
  )
349
 
350
- gr.Markdown("#### Extracted Clips")
351
- clip1_output = gr.Video(label="Clip 1", elem_classes=["output-video"])
352
- clip2_output = gr.Video(label="Clip 2", elem_classes=["output-video"])
353
- clip3_output = gr.Video(label="Clip 3", elem_classes=["output-video"])
354
-
355
- with gr.Accordion("📋 Processing Log", open=True):
356
- log_output = gr.Textbox(
357
- label="Log",
358
- lines=10,
359
- interactive=False,
360
- show_copy_button=True
361
- )
362
-
363
- with gr.Accordion("📊 Automated Metrics (System-Generated)", open=True):
364
- metrics_output = gr.Textbox(
365
- label="Metrics for Testing",
366
- lines=20,
367
- interactive=False,
368
- show_copy_button=True,
369
- info="Copy these metrics for evaluation spreadsheets"
370
- )
371
-
372
  gr.Markdown("""
373
  ---
374
  **ShortSmith v2** | Powered by Qwen2-VL, InsightFace, and Librosa |
375
  [GitHub](https://github.com) | Built with Gradio
376
  """)
377
 
378
- # Connect the button to the processing function
379
- process_btn.click(
380
- fn=process_video,
381
- inputs=[
382
- video_input,
383
- domain_dropdown,
384
- num_clips_slider,
385
- duration_slider,
386
- reference_image,
387
- custom_prompt
388
- ],
389
- outputs=[
390
- status_output,
391
- clip1_output,
392
- clip2_output,
393
- clip3_output,
394
- log_output,
395
- metrics_output
396
- ],
397
- show_progress="full"
398
- )
399
-
400
  # Launch the app
401
  if __name__ == "__main__":
402
  demo.queue()
 
7
  - Domain-optimized presets
8
  - Person-specific filtering (optional)
9
  - Scene-aware clip cutting
10
+ - Batch testing with parameter variations
11
  """
12
 
13
  import os
14
  import sys
15
  import tempfile
16
  import shutil
17
+ import json
18
+ import zipfile
19
  from pathlib import Path
20
  import time
21
  import traceback
22
+ from typing import List, Dict, Any, Optional
23
 
24
  import gradio as gr
25
+ import pandas as pd
26
 
27
  # Add project root to path
28
  sys.path.insert(0, str(Path(__file__).parent))
 
38
  logger = logging.getLogger("app")
39
 
40
 
41
+ # =============================================================================
42
+ # Shared Utilities
43
+ # =============================================================================
44
+
45
+ def build_metrics_output(result, domain: str, custom_prompt: Optional[str] = None) -> str:
46
  """
47
  Build formatted metrics output for testing and evaluation.
48
 
49
  Args:
50
  result: PipelineResult object
51
  domain: Content domain used for processing
52
+ custom_prompt: Custom prompt used (if any)
53
 
54
  Returns:
55
  Formatted string with all metrics
 
68
  lines.append(f"scenes_detected: {len(result.scenes)}")
69
  lines.append(f"audio_segments_analyzed: {len(result.audio_features)}")
70
  lines.append(f"domain: {domain}")
71
+ lines.append(f"custom_prompt: {custom_prompt if custom_prompt else 'none'}")
72
 
73
  # Count hooks from scores (estimate based on high-scoring segments)
74
  hooks_detected = sum(1 for s in result.scores if s.combined_score > 0.7) if result.scores else 0
 
130
  return "\n".join(lines)
131
 
132
 
133
+ # =============================================================================
134
+ # Single Video Processing
135
+ # =============================================================================
136
+
137
  def process_video(
138
  video_file,
139
  domain,
 
144
  progress=gr.Progress()
145
  ):
146
  """
147
+ Main video processing function for single video mode.
148
 
149
  Args:
150
  video_file: Uploaded video file path
 
254
  status = f"Successfully extracted {len(clip_paths)} highlight clips!\nProcessing time: {result.processing_time:.1f}s"
255
 
256
  # Build metrics output
257
+ metrics_output = build_metrics_output(result, domain_value, custom_prompt.strip() if custom_prompt else None)
258
 
259
  pipeline.cleanup()
260
  progress(1.0, desc="Done!")
 
278
  return error_msg, None, None, None, "\n".join(log_messages), ""
279
 
280
 
281
+ # =============================================================================
282
+ # Batch Testing Functions
283
+ # =============================================================================
284
+
285
+ def generate_test_queue(
286
+ videos: List[str],
287
+ domains: List[str],
288
+ durations: List[int],
289
+ num_clips: int,
290
+ ref_image: Optional[str],
291
+ prompts: List[str],
292
+ include_no_prompt: bool
293
+ ) -> List[Dict[str, Any]]:
294
+ """Generate all parameter combinations to test (cartesian product)."""
295
+ # Build prompt list
296
+ prompt_list = []
297
+ if include_no_prompt:
298
+ prompt_list.append(None) # No prompt baseline
299
+ prompt_list.extend([p.strip() for p in prompts if p and p.strip()])
300
+
301
+ # If no prompts at all, use just None
302
+ if not prompt_list:
303
+ prompt_list = [None]
304
+
305
+ # Map domain display names to internal values
306
+ domain_map = {
307
+ "Sports": "sports",
308
+ "Vlogs": "vlogs",
309
+ "Music Videos": "music",
310
+ "Podcasts": "podcasts",
311
+ "Gaming": "gaming",
312
+ "General": "general",
313
+ }
314
+
315
+ queue = []
316
+ test_id = 1
317
+ for video in videos:
318
+ video_name = Path(video).name if video else "unknown"
319
+ for domain in domains:
320
+ domain_value = domain_map.get(domain, "general")
321
+ for duration in durations:
322
+ for prompt in prompt_list:
323
+ queue.append({
324
+ "test_id": test_id,
325
+ "video_path": video,
326
+ "video_name": video_name,
327
+ "domain": domain,
328
+ "domain_value": domain_value,
329
+ "clip_duration": duration,
330
+ "num_clips": num_clips,
331
+ "reference_image": ref_image,
332
+ "custom_prompt": prompt,
333
+ })
334
+ test_id += 1
335
+ return queue
336
+
337
+
338
+ def run_single_batch_test(config: Dict[str, Any], output_base_dir: Path) -> Dict[str, Any]:
339
+ """Run a single test from the batch queue."""
340
+ from utils.helpers import validate_video_file
341
+ from pipeline.orchestrator import PipelineOrchestrator
342
+
343
+ test_id = config["test_id"]
344
+ video_path = config["video_path"]
345
+ video_name = config["video_name"]
346
+ domain_value = config["domain_value"]
347
+ duration = config["clip_duration"]
348
+ num_clips = config["num_clips"]
349
+ ref_image = config["reference_image"]
350
+ custom_prompt = config["custom_prompt"]
351
+
352
+ # Create unique output folder for this test
353
+ prompt_suffix = "no_prompt" if not custom_prompt else f"prompt_{hash(custom_prompt) % 1000}"
354
+ test_folder = f"{Path(video_name).stem}_{domain_value}_{duration}s_{prompt_suffix}"
355
+ output_dir = output_base_dir / test_folder
356
+ output_dir.mkdir(parents=True, exist_ok=True)
357
+
358
+ result_data = {
359
+ "test_id": test_id,
360
+ "video_name": video_name,
361
+ "domain": domain_value,
362
+ "clip_duration": duration,
363
+ "custom_prompt": custom_prompt if custom_prompt else "none",
364
+ "num_clips": num_clips,
365
+ "status": "failed",
366
+ "error": None,
367
+ "processing_time": 0,
368
+ "frames_analyzed": 0,
369
+ "scenes_detected": 0,
370
+ "hooks_detected": 0,
371
+ "clips": [],
372
+ "clip_paths": [],
373
+ }
374
+
375
+ try:
376
+ # Validate video
377
+ validation = validate_video_file(video_path)
378
+ if not validation.is_valid:
379
+ result_data["error"] = validation.error_message
380
+ return result_data
381
+
382
+ # Initialize and run pipeline
383
+ pipeline = PipelineOrchestrator()
384
+ result = pipeline.process(
385
+ video_path=video_path,
386
+ num_clips=num_clips,
387
+ clip_duration=float(duration),
388
+ domain=domain_value,
389
+ reference_image=ref_image,
390
+ custom_prompt=custom_prompt,
391
+ )
392
+
393
+ if result.success:
394
+ result_data["status"] = "success"
395
+ result_data["processing_time"] = round(result.processing_time, 2)
396
+ result_data["frames_analyzed"] = len(result.visual_features)
397
+ result_data["scenes_detected"] = len(result.scenes)
398
+ result_data["hooks_detected"] = sum(1 for s in result.scores if s.combined_score > 0.7) if result.scores else 0
399
+
400
+ # Copy clips and collect data
401
+ for i, clip in enumerate(result.clips):
402
+ if clip.clip_path.exists():
403
+ clip_output = output_dir / f"clip_{i+1}.mp4"
404
+ shutil.copy2(clip.clip_path, clip_output)
405
+ result_data["clip_paths"].append(str(clip_output))
406
+
407
+ # Find hook type for this clip
408
+ hook_type = "none"
409
+ hook_confidence = 0.0
410
+ for score in result.scores:
411
+ if abs(score.start_time - clip.start_time) < 1.0:
412
+ if score.combined_score > 0.7:
413
+ hook_confidence = score.combined_score
414
+ if score.audio_score > score.visual_score and score.audio_score > score.motion_score:
415
+ hook_type = "audio_peak"
416
+ elif score.motion_score > score.visual_score:
417
+ hook_type = "motion_spike"
418
+ else:
419
+ hook_type = "visual_highlight"
420
+ break
421
+
422
+ result_data["clips"].append({
423
+ "clip_id": i + 1,
424
+ "start_time": round(clip.start_time, 2),
425
+ "end_time": round(clip.end_time, 2),
426
+ "duration": round(clip.duration, 2),
427
+ "hype_score": round(clip.hype_score, 4),
428
+ "visual_score": round(clip.visual_score, 4),
429
+ "audio_score": round(clip.audio_score, 4),
430
+ "motion_score": round(clip.motion_score, 4),
431
+ "hook_type": hook_type,
432
+ "hook_confidence": round(hook_confidence, 4),
433
+ })
434
+ else:
435
+ result_data["error"] = result.error_message
436
+
437
+ pipeline.cleanup()
438
+
439
+ except Exception as e:
440
+ result_data["error"] = str(e)
441
+ logger.exception(f"Batch test {test_id} failed")
442
+
443
+ return result_data
444
+
445
+
446
+ def results_to_dataframe(results: List[Dict[str, Any]]) -> pd.DataFrame:
447
+ """Convert batch results to a pandas DataFrame for display."""
448
+ rows = []
449
+ for r in results:
450
+ row = {
451
+ "Test ID": r["test_id"],
452
+ "Video": r["video_name"],
453
+ "Domain": r["domain"],
454
+ "Duration": f"{r['clip_duration']}s",
455
+ "Prompt": r["custom_prompt"][:20] + "..." if len(r["custom_prompt"]) > 20 else r["custom_prompt"],
456
+ "Status": r["status"],
457
+ "Time (s)": r["processing_time"],
458
+ "Frames": r["frames_analyzed"],
459
+ "Hooks": r["hooks_detected"],
460
+ }
461
+ # Add clip scores
462
+ for i, clip in enumerate(r.get("clips", [])[:3]):
463
+ row[f"Clip {i+1} Hype"] = clip.get("hype_score", 0)
464
+ rows.append(row)
465
+ return pd.DataFrame(rows)
466
+
467
+
468
+ def results_to_csv(results: List[Dict[str, Any]]) -> str:
469
+ """Convert results to CSV format."""
470
+ rows = []
471
+ for r in results:
472
+ row = {
473
+ "test_id": r["test_id"],
474
+ "video_name": r["video_name"],
475
+ "domain": r["domain"],
476
+ "clip_duration": r["clip_duration"],
477
+ "custom_prompt": r["custom_prompt"],
478
+ "num_clips": r["num_clips"],
479
+ "status": r["status"],
480
+ "error": r.get("error", ""),
481
+ "processing_time": r["processing_time"],
482
+ "frames_analyzed": r["frames_analyzed"],
483
+ "scenes_detected": r["scenes_detected"],
484
+ "hooks_detected": r["hooks_detected"],
485
+ }
486
+ # Add per-clip data
487
+ for i in range(3):
488
+ if i < len(r.get("clips", [])):
489
+ clip = r["clips"][i]
490
+ row[f"clip_{i+1}_start"] = clip["start_time"]
491
+ row[f"clip_{i+1}_end"] = clip["end_time"]
492
+ row[f"clip_{i+1}_hype"] = clip["hype_score"]
493
+ row[f"clip_{i+1}_visual"] = clip["visual_score"]
494
+ row[f"clip_{i+1}_audio"] = clip["audio_score"]
495
+ row[f"clip_{i+1}_motion"] = clip["motion_score"]
496
+ row[f"clip_{i+1}_hook_type"] = clip["hook_type"]
497
+ else:
498
+ row[f"clip_{i+1}_start"] = ""
499
+ row[f"clip_{i+1}_end"] = ""
500
+ row[f"clip_{i+1}_hype"] = ""
501
+ row[f"clip_{i+1}_visual"] = ""
502
+ row[f"clip_{i+1}_audio"] = ""
503
+ row[f"clip_{i+1}_motion"] = ""
504
+ row[f"clip_{i+1}_hook_type"] = ""
505
+ rows.append(row)
506
+
507
+ df = pd.DataFrame(rows)
508
+ return df.to_csv(index=False)
509
+
510
+
511
+ def results_to_json(results: List[Dict[str, Any]]) -> str:
512
+ """Convert results to JSON format."""
513
+ # Remove clip_paths from export (they're temp files)
514
+ export_results = []
515
+ for r in results:
516
+ r_copy = r.copy()
517
+ r_copy.pop("clip_paths", None)
518
+ export_results.append(r_copy)
519
+ return json.dumps(export_results, indent=2)
520
+
521
+
522
+ def create_clips_zip(results: List[Dict[str, Any]]) -> Optional[str]:
523
+ """Create a ZIP file of all extracted clips."""
524
+ zip_path = Path(tempfile.mkdtemp()) / "batch_clips.zip"
525
+
526
+ with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
527
+ for r in results:
528
+ if r["status"] == "success":
529
+ folder_name = f"{Path(r['video_name']).stem}_{r['domain']}_{r['clip_duration']}s"
530
+ if r["custom_prompt"] != "none":
531
+ folder_name += f"_prompt"
532
+ for clip_path in r.get("clip_paths", []):
533
+ if Path(clip_path).exists():
534
+ arcname = f"{folder_name}/{Path(clip_path).name}"
535
+ zf.write(clip_path, arcname)
536
+
537
+ return str(zip_path) if zip_path.exists() else None
538
+
539
+
540
+ # Batch state (module level for simplicity)
541
+ batch_state = {
542
+ "is_running": False,
543
+ "should_cancel": False,
544
+ "results": [],
545
+ "output_dir": None,
546
+ }
547
+
548
+
549
+ def run_batch_tests(
550
+ videos,
551
+ domains,
552
+ durations,
553
+ num_clips,
554
+ reference_image,
555
+ include_no_prompt,
556
+ prompt1,
557
+ prompt2,
558
+ prompt3,
559
+ progress=gr.Progress()
560
+ ):
561
+ """Main batch testing function."""
562
+ global batch_state
563
+
564
+ # Validate inputs
565
+ if not videos:
566
+ return "Please upload at least one video.", None, "", "", None, None, None
567
+
568
+ if not domains:
569
+ return "Please select at least one domain.", None, "", "", None, None, None
570
+
571
+ if not durations:
572
+ return "Please select at least one duration.", None, "", "", None, None, None
573
+
574
+ # Collect prompts
575
+ prompts = [p for p in [prompt1, prompt2, prompt3] if p and p.strip()]
576
+
577
+ # Generate test queue
578
+ queue = generate_test_queue(
579
+ videos=videos,
580
+ domains=domains,
581
+ durations=durations,
582
+ num_clips=int(num_clips),
583
+ ref_image=reference_image,
584
+ prompts=prompts,
585
+ include_no_prompt=include_no_prompt,
586
+ )
587
+
588
+ if not queue:
589
+ return "No tests to run. Please check your configuration.", None, "", "", None, None, None
590
+
591
+ # Initialize batch state
592
+ batch_state["is_running"] = True
593
+ batch_state["should_cancel"] = False
594
+ batch_state["results"] = []
595
+ batch_state["output_dir"] = Path(tempfile.mkdtemp(prefix="shortsmith_batch_"))
596
+
597
+ total_tests = len(queue)
598
+ log_messages = []
599
+
600
+ def log(msg):
601
+ log_messages.append(f"[{time.strftime('%H:%M:%S')}] {msg}")
602
+ logger.info(msg)
603
+
604
+ log(f"Starting batch testing: {total_tests} tests")
605
+ log(f"Videos: {len(videos)}, Domains: {len(domains)}, Durations: {len(durations)}, Prompts: {len(prompts) + (1 if include_no_prompt else 0)}")
606
+
607
+ # Run tests sequentially
608
+ for i, test_config in enumerate(queue):
609
+ if batch_state["should_cancel"]:
610
+ log("Batch cancelled by user")
611
+ break
612
+
613
+ test_id = test_config["test_id"]
614
+ video_name = test_config["video_name"]
615
+ domain = test_config["domain_value"]
616
+ duration = test_config["clip_duration"]
617
+ prompt = test_config["custom_prompt"] or "no-prompt"
618
+
619
+ log(f"[{i+1}/{total_tests}] Testing: {video_name} | {domain} | {duration}s | {prompt[:30]}...")
620
+ progress((i + 1) / total_tests, desc=f"Test {i+1}/{total_tests}: {video_name}")
621
+
622
+ # Run the test
623
+ result = run_single_batch_test(test_config, batch_state["output_dir"])
624
+ batch_state["results"].append(result)
625
+
626
+ if result["status"] == "success":
627
+ log(f" ✓ Completed in {result['processing_time']}s")
628
+ else:
629
+ log(f" ✗ Failed: {result.get('error', 'Unknown error')}")
630
+
631
+ # Finalize
632
+ batch_state["is_running"] = False
633
+ completed = len([r for r in batch_state["results"] if r["status"] == "success"])
634
+ failed = len([r for r in batch_state["results"] if r["status"] == "failed"])
635
+
636
+ log(f"Batch complete: {completed} succeeded, {failed} failed")
637
+
638
+ # Generate outputs
639
+ results_df = results_to_dataframe(batch_state["results"])
640
+ csv_content = results_to_csv(batch_state["results"])
641
+ json_content = results_to_json(batch_state["results"])
642
+
643
+ # Save CSV and JSON to files for download
644
+ csv_path = batch_state["output_dir"] / "results.csv"
645
+ json_path = batch_state["output_dir"] / "results.json"
646
+ csv_path.write_text(csv_content)
647
+ json_path.write_text(json_content)
648
+
649
+ # Create ZIP of clips
650
+ zip_path = create_clips_zip(batch_state["results"])
651
+
652
+ status = f"Batch complete: {completed}/{total_tests} tests succeeded"
653
+
654
+ return (
655
+ status,
656
+ results_df,
657
+ "\n".join(log_messages),
658
+ json_content,
659
+ str(csv_path),
660
+ str(json_path),
661
+ zip_path,
662
+ )
663
+
664
+
665
+ def cancel_batch():
666
+ """Cancel the running batch."""
667
+ global batch_state
668
+ batch_state["should_cancel"] = True
669
+ return "Cancelling batch... (will stop after current test completes)"
670
+
671
+
672
+ def calculate_queue_size(videos, domains, durations, include_no_prompt, prompt1, prompt2, prompt3):
673
+ """Calculate and display the queue size."""
674
+ num_videos = len(videos) if videos else 0
675
+ num_domains = len(domains) if domains else 0
676
+ num_durations = len(durations) if durations else 0
677
+
678
+ prompts = [p for p in [prompt1, prompt2, prompt3] if p and p.strip()]
679
+ num_prompts = len(prompts) + (1 if include_no_prompt else 0)
680
+ if num_prompts == 0:
681
+ num_prompts = 1 # Default to no-prompt if nothing selected
682
+
683
+ total = num_videos * num_domains * num_durations * num_prompts
684
+
685
+ return f"Queue: {num_videos} video(s) × {num_domains} domain(s) × {num_durations} duration(s) × {num_prompts} prompt(s) = **{total} tests**"
686
+
687
+
688
+ # =============================================================================
689
+ # Build Gradio Interface
690
+ # =============================================================================
691
+
692
  with gr.Blocks(
693
  title="ShortSmith v2",
694
  theme=gr.themes.Soft(),
 
699
  ) as demo:
700
 
701
  gr.Markdown("""
702
+ # ShortSmith v2
703
  ### AI-Powered Video Highlight Extractor
704
 
705
  Upload a video and automatically extract the most engaging highlight clips using AI analysis.
706
  """)
707
 
708
+ with gr.Tabs():
709
+ # =================================================================
710
+ # Tab 1: Single Video
711
+ # =================================================================
712
+ with gr.TabItem("Single Video"):
713
+ with gr.Row():
714
+ # Left column - Inputs
715
+ with gr.Column(scale=1):
716
+ gr.Markdown("### Input")
717
+
718
+ video_input = gr.Video(
719
+ label="Upload Video",
720
+ sources=["upload"],
721
+ )
722
+
723
+ with gr.Accordion("Settings", open=True):
724
+ domain_dropdown = gr.Dropdown(
725
+ choices=["Sports", "Vlogs", "Music Videos", "Podcasts", "Gaming", "General"],
726
+ value="General",
727
+ label="Content Domain",
728
+ info="Select the type of content for optimized scoring"
729
+ )
730
+
731
+ with gr.Row():
732
+ num_clips_slider = gr.Slider(
733
+ minimum=1,
734
+ maximum=3,
735
+ value=3,
736
+ step=1,
737
+ label="Number of Clips",
738
+ info="How many highlight clips to extract"
739
+ )
740
+ duration_slider = gr.Slider(
741
+ minimum=5,
742
+ maximum=30,
743
+ value=15,
744
+ step=1,
745
+ label="Clip Duration (seconds)",
746
+ info="Target duration for each clip"
747
+ )
748
+
749
+ with gr.Accordion("Person Filtering (Optional)", open=False):
750
+ reference_image = gr.Image(
751
+ label="Reference Image",
752
+ type="filepath",
753
+ sources=["upload"],
754
+ )
755
+ gr.Markdown("*Upload a photo of a person to prioritize clips featuring them.*")
756
+
757
+ with gr.Accordion("Custom Instructions (Optional)", open=False):
758
+ custom_prompt = gr.Textbox(
759
+ label="Additional Instructions",
760
+ placeholder="E.g., 'Focus on crowd reactions' or 'Prioritize action scenes'",
761
+ lines=2,
762
+ )
763
+
764
+ process_btn = gr.Button(
765
+ "Extract Highlights",
766
+ variant="primary",
767
+ size="lg"
768
+ )
769
+
770
+ # Right column - Outputs
771
+ with gr.Column(scale=1):
772
+ gr.Markdown("### Output")
773
 
774
+ status_output = gr.Textbox(
775
+ label="Status",
776
+ lines=2,
777
+ interactive=False
778
+ )
779
+
780
+ gr.Markdown("#### Extracted Clips")
781
+ clip1_output = gr.Video(label="Clip 1", elem_classes=["output-video"])
782
+ clip2_output = gr.Video(label="Clip 2", elem_classes=["output-video"])
783
+ clip3_output = gr.Video(label="Clip 3", elem_classes=["output-video"])
784
+
785
+ with gr.Accordion("Processing Log", open=True):
786
+ log_output = gr.Textbox(
787
+ label="Log",
788
+ lines=10,
789
+ interactive=False,
790
+ show_copy_button=True
791
+ )
792
+
793
+ with gr.Accordion("Automated Metrics (System-Generated)", open=True):
794
+ metrics_output = gr.Textbox(
795
+ label="Metrics for Testing",
796
+ lines=20,
797
+ interactive=False,
798
+ show_copy_button=True,
799
+ info="Copy these metrics for evaluation spreadsheets"
800
+ )
801
+
802
+ # Connect single video processing
803
+ process_btn.click(
804
+ fn=process_video,
805
+ inputs=[
806
+ video_input,
807
+ domain_dropdown,
808
+ num_clips_slider,
809
+ duration_slider,
810
+ reference_image,
811
+ custom_prompt
812
+ ],
813
+ outputs=[
814
+ status_output,
815
+ clip1_output,
816
+ clip2_output,
817
+ clip3_output,
818
+ log_output,
819
+ metrics_output
820
+ ],
821
+ show_progress="full"
822
  )
823
 
824
+ # =================================================================
825
+ # Tab 2: Batch Testing
826
+ # =================================================================
827
+ with gr.TabItem("Batch Testing"):
828
+ with gr.Row():
829
+ # Left column - Configuration
830
+ with gr.Column(scale=1):
831
+ gr.Markdown("### Batch Configuration")
832
+
833
+ batch_videos = gr.File(
834
+ label="Upload Video(s)",
835
+ file_count="multiple",
836
+ file_types=["video"],
837
+ )
838
+
839
+ gr.Markdown("#### Domains to Test")
840
+ batch_domains = gr.CheckboxGroup(
841
+ choices=["Sports", "Vlogs", "Music Videos", "Podcasts", "Gaming", "General"],
842
+ value=["General"],
843
+ label="Select domains",
844
+ )
845
+
846
+ gr.Markdown("#### Clip Durations to Test")
847
+ batch_durations = gr.CheckboxGroup(
848
+ choices=[10, 15, 20, 30],
849
+ value=[15],
850
+ label="Select durations (seconds)",
851
+ )
852
 
853
+ batch_num_clips = gr.Slider(
 
854
  minimum=1,
855
  maximum=3,
856
  value=3,
857
  step=1,
858
+ label="Number of Clips per Test",
 
859
  )
860
+
861
+ with gr.Accordion("Custom Prompts", open=True):
862
+ batch_no_prompt = gr.Checkbox(
863
+ label="Include no-prompt baseline",
864
+ value=True,
865
+ info="Test without any custom prompt for comparison"
866
+ )
867
+ batch_prompt1 = gr.Textbox(
868
+ label="Prompt 1",
869
+ placeholder="E.g., 'Focus on action moments'",
870
+ lines=1,
871
+ )
872
+ batch_prompt2 = gr.Textbox(
873
+ label="Prompt 2",
874
+ placeholder="E.g., 'Find crowd reactions'",
875
+ lines=1,
876
+ )
877
+ batch_prompt3 = gr.Textbox(
878
+ label="Prompt 3",
879
+ placeholder="E.g., 'Prioritize emotional moments'",
880
+ lines=1,
881
+ )
882
+
883
+ with gr.Accordion("Reference Image (Optional)", open=False):
884
+ batch_ref_image = gr.Image(
885
+ label="Reference Image (applies to all tests)",
886
+ type="filepath",
887
+ sources=["upload"],
888
+ )
889
+
890
+ # Queue size indicator
891
+ queue_info = gr.Markdown("Queue: 0 tests")
892
+
893
+ with gr.Row():
894
+ batch_start_btn = gr.Button(
895
+ "Start Batch",
896
+ variant="primary",
897
+ size="lg"
898
+ )
899
+ batch_cancel_btn = gr.Button(
900
+ "Cancel",
901
+ variant="secondary",
902
+ size="lg"
903
+ )
904
+
905
+ # Right column - Results
906
+ with gr.Column(scale=1):
907
+ gr.Markdown("### Results")
908
+
909
+ batch_status = gr.Textbox(
910
+ label="Status",
911
+ lines=2,
912
+ interactive=False
913
  )
914
 
915
+ batch_results_table = gr.Dataframe(
916
+ label="Test Results",
917
+ headers=["Test ID", "Video", "Domain", "Duration", "Prompt", "Status", "Time (s)", "Frames", "Hooks"],
918
+ interactive=False,
919
+ )
 
 
920
 
921
+ with gr.Accordion("Processing Log", open=True):
922
+ batch_log = gr.Textbox(
923
+ label="Log",
924
+ lines=15,
925
+ interactive=False,
926
+ show_copy_button=True
927
+ )
928
+
929
+ with gr.Accordion("Full Results (JSON)", open=False):
930
+ batch_json = gr.Textbox(
931
+ label="JSON Output",
932
+ lines=10,
933
+ interactive=False,
934
+ show_copy_button=True
935
+ )
936
+
937
+ gr.Markdown("#### Download Results")
938
+ with gr.Row():
939
+ csv_download = gr.File(label="CSV Results")
940
+ json_download = gr.File(label="JSON Results")
941
+ zip_download = gr.File(label="All Clips (ZIP)")
942
+
943
+ # Update queue size when inputs change
944
+ queue_inputs = [batch_videos, batch_domains, batch_durations, batch_no_prompt, batch_prompt1, batch_prompt2, batch_prompt3]
945
+ for inp in queue_inputs:
946
+ inp.change(
947
+ fn=calculate_queue_size,
948
+ inputs=queue_inputs,
949
+ outputs=queue_info
950
  )
951
 
952
+ # Connect batch processing
953
+ batch_start_btn.click(
954
+ fn=run_batch_tests,
955
+ inputs=[
956
+ batch_videos,
957
+ batch_domains,
958
+ batch_durations,
959
+ batch_num_clips,
960
+ batch_ref_image,
961
+ batch_no_prompt,
962
+ batch_prompt1,
963
+ batch_prompt2,
964
+ batch_prompt3,
965
+ ],
966
+ outputs=[
967
+ batch_status,
968
+ batch_results_table,
969
+ batch_log,
970
+ batch_json,
971
+ csv_download,
972
+ json_download,
973
+ zip_download,
974
+ ],
975
+ show_progress="full"
976
  )
977
 
978
+ batch_cancel_btn.click(
979
+ fn=cancel_batch,
980
+ inputs=[],
981
+ outputs=[batch_status]
 
 
 
 
982
  )
983
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
984
  gr.Markdown("""
985
  ---
986
  **ShortSmith v2** | Powered by Qwen2-VL, InsightFace, and Librosa |
987
  [GitHub](https://github.com) | Built with Gradio
988
  """)
989
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
990
  # Launch the app
991
  if __name__ == "__main__":
992
  demo.queue()