RafaG commited on
Commit
1934649
·
verified ·
1 Parent(s): 9b22111

Upload 2 files

Browse files
Files changed (2) hide show
  1. main_improved.py +31 -1
  2. prompt.txt +1 -1
main_improved.py CHANGED
@@ -121,6 +121,7 @@ def main():
121
  parser.add_argument("--face-model", choices=["insightface", "mediapipe"], default="insightface", help="Face detection model")
122
  parser.add_argument("--face-mode", choices=["auto", "1", "2"], default="auto", help="Face tracking mode: auto, 1, 2")
123
  parser.add_argument("--subtitle-config", help="Path to subtitle configuration JSON file")
 
124
  parser.add_argument("--face-detect-interval", type=str, default="0.17,1.0", help="Face detection interval in seconds. Single value or 'interval_1face,interval_2face'")
125
  parser.add_argument("--face-filter-threshold", type=float, default=0.35, help="Relative area threshold to ignore background faces (default: 0.35)")
126
  parser.add_argument("--face-two-threshold", type=float, default=0.60, help="Relative area threshold to trigger 2-face mode (default: 0.60)")
@@ -487,6 +488,34 @@ def main():
487
 
488
  save_json.save_viral_segments(viral_segments, project_folder=project_folder)
489
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
490
  # 4. Cut Segments
491
  # Se workflow for 3, pulamos corte
492
  if workflow_choice == "3":
@@ -547,7 +576,8 @@ def main():
547
  active_speaker_motion_deadzone=args.active_speaker_motion_threshold,
548
  active_speaker_motion_sensitivity=args.active_speaker_motion_sensitivity,
549
  active_speaker_decay=args.active_speaker_decay,
550
- segments_data=viral_segments.get("segments", []) if viral_segments else None
 
551
  )
552
 
553
 
 
121
  parser.add_argument("--face-model", choices=["insightface", "mediapipe"], default="insightface", help="Face detection model")
122
  parser.add_argument("--face-mode", choices=["auto", "1", "2"], default="auto", help="Face tracking mode: auto, 1, 2")
123
  parser.add_argument("--subtitle-config", help="Path to subtitle configuration JSON file")
124
+ parser.add_argument("--no-face-mode", choices=["padding", "zoom"], default="padding", help="Method to handle segments with no face detected: 'padding' (9:16 frame with black bars) or 'zoom' (Center Crop Zoom)")
125
  parser.add_argument("--face-detect-interval", type=str, default="0.17,1.0", help="Face detection interval in seconds. Single value or 'interval_1face,interval_2face'")
126
  parser.add_argument("--face-filter-threshold", type=float, default=0.35, help="Relative area threshold to ignore background faces (default: 0.35)")
127
  parser.add_argument("--face-two-threshold", type=float, default=0.60, help="Relative area threshold to trigger 2-face mode (default: 0.60)")
 
488
 
489
  save_json.save_viral_segments(viral_segments, project_folder=project_folder)
490
 
491
+ # 3.5. Fix Raw Segments (missing timestamps)
492
+ if workflow_choice != "3" and viral_segments and "segments" in viral_segments:
493
+ segs = viral_segments.get("segments", [])
494
+ if segs and len(segs) > 0:
495
+ # Check first segment for duration 0 but having start_time_ref or just check duration
496
+ first = segs[0]
497
+ # If duration is effectively 0 and we have a ref tag (or even if we dont, we cant cut 0s video)
498
+ # We assume if duration is 0, it is raw.
499
+ if first.get("duration", 0) == 0:
500
+ print(i18n("Detected raw AI segments without timestamps (Duration 0). Running alignment..."))
501
+ try:
502
+ # Load transcript
503
+ transcript = create_viral_segments.load_transcript(project_folder)
504
+ # Process (Align)
505
+ # Use None for output_count to keep all found segments
506
+ viral_segments = create_viral_segments.process_segments(
507
+ segs,
508
+ transcript,
509
+ args.min_duration,
510
+ args.max_duration,
511
+ output_count=None
512
+ )
513
+ save_json.save_viral_segments(viral_segments, project_folder=project_folder)
514
+ print(i18n("Segments aligned and saved."))
515
+ except Exception as e:
516
+ print(i18n("Failed to align raw segments: {}").format(e))
517
+ # If alignment fails, it might crash later, but we tried.
518
+
519
  # 4. Cut Segments
520
  # Se workflow for 3, pulamos corte
521
  if workflow_choice == "3":
 
576
  active_speaker_motion_deadzone=args.active_speaker_motion_threshold,
577
  active_speaker_motion_sensitivity=args.active_speaker_motion_sensitivity,
578
  active_speaker_decay=args.active_speaker_decay,
579
+ segments_data=viral_segments.get("segments", []) if viral_segments else None,
580
+ no_face_mode=args.no_face_mode
581
  )
582
 
583
 
prompt.txt CHANGED
@@ -28,7 +28,7 @@ The transcript below is a continuous text stream with embedded **Time Tags** lik
28
 
29
  4. **DURATION MATH:**
30
  - Use the `(XXs)` tags to estimate duration.
31
- - Target: {min_duration}s to {max_duration}s.
32
 
33
  ### YOUR TASK:
34
  Analyze the transcript below. Find {amount} potential viral segments.
 
28
 
29
  4. **DURATION MATH:**
30
  - Use the `(XXs)` tags to estimate duration.
31
+ - CONSTRAINT: Segment MUST be between {min_duration}s and {max_duration}s.
32
 
33
  ### YOUR TASK:
34
  Analyze the transcript below. Find {amount} potential viral segments.