Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- main_improved.py +31 -1
- prompt.txt +1 -1
main_improved.py
CHANGED
|
@@ -121,6 +121,7 @@ def main():
|
|
| 121 |
parser.add_argument("--face-model", choices=["insightface", "mediapipe"], default="insightface", help="Face detection model")
|
| 122 |
parser.add_argument("--face-mode", choices=["auto", "1", "2"], default="auto", help="Face tracking mode: auto, 1, 2")
|
| 123 |
parser.add_argument("--subtitle-config", help="Path to subtitle configuration JSON file")
|
|
|
|
| 124 |
parser.add_argument("--face-detect-interval", type=str, default="0.17,1.0", help="Face detection interval in seconds. Single value or 'interval_1face,interval_2face'")
|
| 125 |
parser.add_argument("--face-filter-threshold", type=float, default=0.35, help="Relative area threshold to ignore background faces (default: 0.35)")
|
| 126 |
parser.add_argument("--face-two-threshold", type=float, default=0.60, help="Relative area threshold to trigger 2-face mode (default: 0.60)")
|
|
@@ -487,6 +488,34 @@ def main():
|
|
| 487 |
|
| 488 |
save_json.save_viral_segments(viral_segments, project_folder=project_folder)
|
| 489 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 490 |
# 4. Cut Segments
|
| 491 |
# Se workflow for 3, pulamos corte
|
| 492 |
if workflow_choice == "3":
|
|
@@ -547,7 +576,8 @@ def main():
|
|
| 547 |
active_speaker_motion_deadzone=args.active_speaker_motion_threshold,
|
| 548 |
active_speaker_motion_sensitivity=args.active_speaker_motion_sensitivity,
|
| 549 |
active_speaker_decay=args.active_speaker_decay,
|
| 550 |
-
segments_data=viral_segments.get("segments", []) if viral_segments else None
|
|
|
|
| 551 |
)
|
| 552 |
|
| 553 |
|
|
|
|
| 121 |
parser.add_argument("--face-model", choices=["insightface", "mediapipe"], default="insightface", help="Face detection model")
|
| 122 |
parser.add_argument("--face-mode", choices=["auto", "1", "2"], default="auto", help="Face tracking mode: auto, 1, 2")
|
| 123 |
parser.add_argument("--subtitle-config", help="Path to subtitle configuration JSON file")
|
| 124 |
+
parser.add_argument("--no-face-mode", choices=["padding", "zoom"], default="padding", help="Method to handle segments with no face detected: 'padding' (9:16 frame with black bars) or 'zoom' (Center Crop Zoom)")
|
| 125 |
parser.add_argument("--face-detect-interval", type=str, default="0.17,1.0", help="Face detection interval in seconds. Single value or 'interval_1face,interval_2face'")
|
| 126 |
parser.add_argument("--face-filter-threshold", type=float, default=0.35, help="Relative area threshold to ignore background faces (default: 0.35)")
|
| 127 |
parser.add_argument("--face-two-threshold", type=float, default=0.60, help="Relative area threshold to trigger 2-face mode (default: 0.60)")
|
|
|
|
| 488 |
|
| 489 |
save_json.save_viral_segments(viral_segments, project_folder=project_folder)
|
| 490 |
|
| 491 |
+
# 3.5. Fix Raw Segments (missing timestamps)
|
| 492 |
+
if workflow_choice != "3" and viral_segments and "segments" in viral_segments:
|
| 493 |
+
segs = viral_segments.get("segments", [])
|
| 494 |
+
if segs and len(segs) > 0:
|
| 495 |
+
# Check first segment for duration 0 but having start_time_ref or just check duration
|
| 496 |
+
first = segs[0]
|
| 497 |
+
# If duration is effectively 0 and we have a ref tag (or even if we dont, we cant cut 0s video)
|
| 498 |
+
# We assume if duration is 0, it is raw.
|
| 499 |
+
if first.get("duration", 0) == 0:
|
| 500 |
+
print(i18n("Detected raw AI segments without timestamps (Duration 0). Running alignment..."))
|
| 501 |
+
try:
|
| 502 |
+
# Load transcript
|
| 503 |
+
transcript = create_viral_segments.load_transcript(project_folder)
|
| 504 |
+
# Process (Align)
|
| 505 |
+
# Use None for output_count to keep all found segments
|
| 506 |
+
viral_segments = create_viral_segments.process_segments(
|
| 507 |
+
segs,
|
| 508 |
+
transcript,
|
| 509 |
+
args.min_duration,
|
| 510 |
+
args.max_duration,
|
| 511 |
+
output_count=None
|
| 512 |
+
)
|
| 513 |
+
save_json.save_viral_segments(viral_segments, project_folder=project_folder)
|
| 514 |
+
print(i18n("Segments aligned and saved."))
|
| 515 |
+
except Exception as e:
|
| 516 |
+
print(i18n("Failed to align raw segments: {}").format(e))
|
| 517 |
+
# If alignment fails, it might crash later, but we tried.
|
| 518 |
+
|
| 519 |
# 4. Cut Segments
|
| 520 |
# Se workflow for 3, pulamos corte
|
| 521 |
if workflow_choice == "3":
|
|
|
|
| 576 |
active_speaker_motion_deadzone=args.active_speaker_motion_threshold,
|
| 577 |
active_speaker_motion_sensitivity=args.active_speaker_motion_sensitivity,
|
| 578 |
active_speaker_decay=args.active_speaker_decay,
|
| 579 |
+
segments_data=viral_segments.get("segments", []) if viral_segments else None,
|
| 580 |
+
no_face_mode=args.no_face_mode
|
| 581 |
)
|
| 582 |
|
| 583 |
|
prompt.txt
CHANGED
|
@@ -28,7 +28,7 @@ The transcript below is a continuous text stream with embedded **Time Tags** lik
|
|
| 28 |
|
| 29 |
4. **DURATION MATH:**
|
| 30 |
- Use the `(XXs)` tags to estimate duration.
|
| 31 |
-
-
|
| 32 |
|
| 33 |
### YOUR TASK:
|
| 34 |
Analyze the transcript below. Find {amount} potential viral segments.
|
|
|
|
| 28 |
|
| 29 |
4. **DURATION MATH:**
|
| 30 |
- Use the `(XXs)` tags to estimate duration.
|
| 31 |
+
- CONSTRAINT: Segment MUST be between {min_duration}s and {max_duration}s.
|
| 32 |
|
| 33 |
### YOUR TASK:
|
| 34 |
Analyze the transcript below. Find {amount} potential viral segments.
|