Spaces:
Paused
Paused
Update pipeline/orchestrator.py
Browse files- pipeline/orchestrator.py +150 -3
pipeline/orchestrator.py
CHANGED
|
@@ -40,6 +40,7 @@ from models.body_recognizer import BodyRecognizer
|
|
| 40 |
from models.motion_detector import MotionDetector
|
| 41 |
from scoring.hype_scorer import HypeScorer, SegmentScore
|
| 42 |
from scoring.domain_presets import get_domain_preset, Domain
|
|
|
|
| 43 |
|
| 44 |
logger = get_logger("pipeline.orchestrator")
|
| 45 |
|
|
@@ -56,6 +57,7 @@ class PipelineStage(Enum):
|
|
| 56 |
DETECTING_PERSON = "detecting_person"
|
| 57 |
ANALYZING_MOTION = "analyzing_motion"
|
| 58 |
SCORING = "scoring"
|
|
|
|
| 59 |
EXTRACTING_CLIPS = "extracting_clips"
|
| 60 |
FINALIZING = "finalizing"
|
| 61 |
COMPLETE = "complete"
|
|
@@ -123,10 +125,11 @@ class PipelineOrchestrator:
|
|
| 123 |
PipelineStage.EXTRACTING_AUDIO: 0.05,
|
| 124 |
PipelineStage.ANALYZING_AUDIO: 0.10,
|
| 125 |
PipelineStage.SAMPLING_FRAMES: 0.10,
|
| 126 |
-
PipelineStage.ANALYZING_VISUAL: 0.
|
| 127 |
PipelineStage.DETECTING_PERSON: 0.10,
|
| 128 |
PipelineStage.ANALYZING_MOTION: 0.05,
|
| 129 |
PipelineStage.SCORING: 0.05,
|
|
|
|
| 130 |
PipelineStage.EXTRACTING_CLIPS: 0.10,
|
| 131 |
PipelineStage.FINALIZING: 0.05,
|
| 132 |
}
|
|
@@ -161,6 +164,7 @@ class PipelineOrchestrator:
|
|
| 161 |
self._motion_detector: Optional[MotionDetector] = None
|
| 162 |
self._clip_extractor: Optional[ClipExtractor] = None
|
| 163 |
self._hype_scorer: Optional[HypeScorer] = None
|
|
|
|
| 164 |
|
| 165 |
logger.info("PipelineOrchestrator initialized")
|
| 166 |
|
|
@@ -359,13 +363,25 @@ class PipelineOrchestrator:
|
|
| 359 |
)
|
| 360 |
self._update_progress(PipelineStage.SCORING, 1.0, f"Scored {len(segment_scores)} segments")
|
| 361 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 362 |
# Clip extraction
|
| 363 |
self._update_progress(PipelineStage.EXTRACTING_CLIPS, 0.0, "Extracting clips...")
|
| 364 |
-
candidates = self._scores_to_candidates(segment_scores, clip_duration)
|
| 365 |
clips = self._clip_extractor.extract_clips(
|
| 366 |
video_path,
|
| 367 |
self._temp_dir / "clips",
|
| 368 |
-
|
| 369 |
num_clips=num_clips,
|
| 370 |
)
|
| 371 |
self._update_progress(PipelineStage.EXTRACTING_CLIPS, 1.0, f"Extracted {len(clips)} clips")
|
|
@@ -464,6 +480,9 @@ class PipelineOrchestrator:
|
|
| 464 |
preset = get_domain_preset(domain, person_filter_enabled=person_filter)
|
| 465 |
self._hype_scorer = HypeScorer(preset=preset)
|
| 466 |
|
|
|
|
|
|
|
|
|
|
| 467 |
logger.info("Components initialized")
|
| 468 |
|
| 469 |
def _compute_segment_scores(
|
|
@@ -589,6 +608,134 @@ class PipelineOrchestrator:
|
|
| 589 |
|
| 590 |
return [motion_map.get(f.action_detected, 0.4) for f in visual_features]
|
| 591 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 592 |
def cleanup(self) -> None:
|
| 593 |
"""Clean up temporary files and unload models."""
|
| 594 |
if self._temp_dir:
|
|
|
|
| 40 |
from models.motion_detector import MotionDetector
|
| 41 |
from scoring.hype_scorer import HypeScorer, SegmentScore
|
| 42 |
from scoring.domain_presets import get_domain_preset, Domain
|
| 43 |
+
from scoring.viral_hooks import ViralHookDetector, HookSignal
|
| 44 |
|
| 45 |
logger = get_logger("pipeline.orchestrator")
|
| 46 |
|
|
|
|
| 57 |
DETECTING_PERSON = "detecting_person"
|
| 58 |
ANALYZING_MOTION = "analyzing_motion"
|
| 59 |
SCORING = "scoring"
|
| 60 |
+
OPTIMIZING_HOOKS = "optimizing_hooks"
|
| 61 |
EXTRACTING_CLIPS = "extracting_clips"
|
| 62 |
FINALIZING = "finalizing"
|
| 63 |
COMPLETE = "complete"
|
|
|
|
| 125 |
PipelineStage.EXTRACTING_AUDIO: 0.05,
|
| 126 |
PipelineStage.ANALYZING_AUDIO: 0.10,
|
| 127 |
PipelineStage.SAMPLING_FRAMES: 0.10,
|
| 128 |
+
PipelineStage.ANALYZING_VISUAL: 0.25,
|
| 129 |
PipelineStage.DETECTING_PERSON: 0.10,
|
| 130 |
PipelineStage.ANALYZING_MOTION: 0.05,
|
| 131 |
PipelineStage.SCORING: 0.05,
|
| 132 |
+
PipelineStage.OPTIMIZING_HOOKS: 0.05,
|
| 133 |
PipelineStage.EXTRACTING_CLIPS: 0.10,
|
| 134 |
PipelineStage.FINALIZING: 0.05,
|
| 135 |
}
|
|
|
|
| 164 |
self._motion_detector: Optional[MotionDetector] = None
|
| 165 |
self._clip_extractor: Optional[ClipExtractor] = None
|
| 166 |
self._hype_scorer: Optional[HypeScorer] = None
|
| 167 |
+
self._hook_detector: Optional[ViralHookDetector] = None
|
| 168 |
|
| 169 |
logger.info("PipelineOrchestrator initialized")
|
| 170 |
|
|
|
|
| 363 |
)
|
| 364 |
self._update_progress(PipelineStage.SCORING, 1.0, f"Scored {len(segment_scores)} segments")
|
| 365 |
|
| 366 |
+
# Viral hook optimization - find best starting points
|
| 367 |
+
self._update_progress(PipelineStage.OPTIMIZING_HOOKS, 0.0, "Finding viral hooks...")
|
| 368 |
+
candidates = self._scores_to_candidates(segment_scores, clip_duration)
|
| 369 |
+
|
| 370 |
+
# Detect hooks and optimize clip start times
|
| 371 |
+
hooks = self._detect_viral_hooks(
|
| 372 |
+
frames, audio_features, visual_features, motion_scores
|
| 373 |
+
)
|
| 374 |
+
optimized_candidates = self._optimize_clip_starts(
|
| 375 |
+
candidates, hooks, num_clips
|
| 376 |
+
)
|
| 377 |
+
self._update_progress(PipelineStage.OPTIMIZING_HOOKS, 1.0, f"Optimized {len(optimized_candidates)} clip hooks")
|
| 378 |
+
|
| 379 |
# Clip extraction
|
| 380 |
self._update_progress(PipelineStage.EXTRACTING_CLIPS, 0.0, "Extracting clips...")
|
|
|
|
| 381 |
clips = self._clip_extractor.extract_clips(
|
| 382 |
video_path,
|
| 383 |
self._temp_dir / "clips",
|
| 384 |
+
optimized_candidates,
|
| 385 |
num_clips=num_clips,
|
| 386 |
)
|
| 387 |
self._update_progress(PipelineStage.EXTRACTING_CLIPS, 1.0, f"Extracted {len(clips)} clips")
|
|
|
|
| 480 |
preset = get_domain_preset(domain, person_filter_enabled=person_filter)
|
| 481 |
self._hype_scorer = HypeScorer(preset=preset)
|
| 482 |
|
| 483 |
+
# Viral hook detector
|
| 484 |
+
self._hook_detector = ViralHookDetector(domain=domain)
|
| 485 |
+
|
| 486 |
logger.info("Components initialized")
|
| 487 |
|
| 488 |
def _compute_segment_scores(
|
|
|
|
| 608 |
|
| 609 |
return [motion_map.get(f.action_detected, 0.4) for f in visual_features]
|
| 610 |
|
| 611 |
+
def _detect_viral_hooks(
|
| 612 |
+
self,
|
| 613 |
+
frames: List[SampledFrame],
|
| 614 |
+
audio_features: List[AudioFeatures],
|
| 615 |
+
visual_features: List[VisualFeatures],
|
| 616 |
+
motion_scores: List[float],
|
| 617 |
+
) -> List[HookSignal]:
|
| 618 |
+
"""
|
| 619 |
+
Detect viral hook moments from all available signals.
|
| 620 |
+
|
| 621 |
+
Args:
|
| 622 |
+
frames: Sampled frames with timestamps
|
| 623 |
+
audio_features: Audio analysis results
|
| 624 |
+
visual_features: Visual analysis results
|
| 625 |
+
motion_scores: Motion intensity scores
|
| 626 |
+
|
| 627 |
+
Returns:
|
| 628 |
+
List of detected hook signals
|
| 629 |
+
"""
|
| 630 |
+
if not self._hook_detector:
|
| 631 |
+
return []
|
| 632 |
+
|
| 633 |
+
# Prepare timestamps
|
| 634 |
+
frame_timestamps = [f.timestamp for f in frames]
|
| 635 |
+
|
| 636 |
+
# Prepare audio signals
|
| 637 |
+
audio_timestamps = [af.timestamp for af in audio_features] if audio_features else []
|
| 638 |
+
audio_energy = [af.rms_energy for af in audio_features] if audio_features else None
|
| 639 |
+
audio_flux = [af.spectral_flux for af in audio_features] if audio_features else None
|
| 640 |
+
audio_centroid = [af.spectral_centroid for af in audio_features] if audio_features else None
|
| 641 |
+
|
| 642 |
+
# Prepare visual signals
|
| 643 |
+
visual_scores = [vf.hype_score for vf in visual_features] if visual_features else None
|
| 644 |
+
emotions = [vf.emotion for vf in visual_features] if visual_features else None
|
| 645 |
+
actions = [vf.action_detected for vf in visual_features] if visual_features else None
|
| 646 |
+
|
| 647 |
+
# Use audio timestamps if available (finer granularity), else frame timestamps
|
| 648 |
+
timestamps = audio_timestamps if audio_timestamps else frame_timestamps
|
| 649 |
+
|
| 650 |
+
# Interpolate visual/motion to audio timeline if needed
|
| 651 |
+
if audio_timestamps and visual_scores and len(visual_scores) != len(audio_timestamps):
|
| 652 |
+
visual_scores = self._interpolate_scores(frame_timestamps, visual_scores, audio_timestamps)
|
| 653 |
+
motion_scores = self._interpolate_scores(frame_timestamps, motion_scores, audio_timestamps) if motion_scores else None
|
| 654 |
+
# For emotions/actions, we'll use nearest neighbor (keep original)
|
| 655 |
+
emotions = None # Can't interpolate strings
|
| 656 |
+
actions = None
|
| 657 |
+
|
| 658 |
+
# Detect hooks
|
| 659 |
+
hooks = self._hook_detector.detect_hooks(
|
| 660 |
+
timestamps=timestamps,
|
| 661 |
+
audio_energy=audio_energy,
|
| 662 |
+
audio_flux=audio_flux,
|
| 663 |
+
audio_centroid=audio_centroid,
|
| 664 |
+
visual_scores=visual_scores,
|
| 665 |
+
motion_scores=motion_scores,
|
| 666 |
+
emotions=emotions,
|
| 667 |
+
actions=actions,
|
| 668 |
+
)
|
| 669 |
+
|
| 670 |
+
logger.info(f"Detected {len(hooks)} potential viral hook moments")
|
| 671 |
+
return hooks
|
| 672 |
+
|
| 673 |
+
def _optimize_clip_starts(
|
| 674 |
+
self,
|
| 675 |
+
candidates: List[ClipCandidate],
|
| 676 |
+
hooks: List[HookSignal],
|
| 677 |
+
num_clips: int,
|
| 678 |
+
) -> List[ClipCandidate]:
|
| 679 |
+
"""
|
| 680 |
+
Optimize clip start times to align with viral hooks.
|
| 681 |
+
|
| 682 |
+
Args:
|
| 683 |
+
candidates: Original clip candidates
|
| 684 |
+
hooks: Detected hook signals
|
| 685 |
+
num_clips: Number of clips to extract
|
| 686 |
+
|
| 687 |
+
Returns:
|
| 688 |
+
Optimized clip candidates with adjusted start times
|
| 689 |
+
"""
|
| 690 |
+
if not hooks or not self._hook_detector:
|
| 691 |
+
logger.info("No hooks detected, using original clip timings")
|
| 692 |
+
return candidates
|
| 693 |
+
|
| 694 |
+
optimized = []
|
| 695 |
+
|
| 696 |
+
# Process top candidates
|
| 697 |
+
for candidate in candidates[:num_clips * 2]: # Consider more candidates for optimization
|
| 698 |
+
# Find best hook-aligned start time
|
| 699 |
+
adjusted_start, best_hook = self._hook_detector.find_best_clip_start(
|
| 700 |
+
clip_start=candidate.start_time,
|
| 701 |
+
clip_end=candidate.end_time,
|
| 702 |
+
hooks=hooks,
|
| 703 |
+
allow_adjustment=3.0, # Allow up to 3 seconds earlier
|
| 704 |
+
)
|
| 705 |
+
|
| 706 |
+
# Create optimized candidate
|
| 707 |
+
clip_duration = candidate.end_time - candidate.start_time
|
| 708 |
+
|
| 709 |
+
# Boost score if we found a good hook
|
| 710 |
+
hook_boost = 1.0
|
| 711 |
+
if best_hook:
|
| 712 |
+
hook_score = self._hook_detector.score_clip_hook_potential(
|
| 713 |
+
adjusted_start, clip_duration, hooks
|
| 714 |
+
)
|
| 715 |
+
hook_boost = 1.0 + (hook_score * 0.3) # Up to 30% boost
|
| 716 |
+
|
| 717 |
+
optimized.append(ClipCandidate(
|
| 718 |
+
start_time=adjusted_start,
|
| 719 |
+
end_time=adjusted_start + clip_duration,
|
| 720 |
+
hype_score=candidate.hype_score * hook_boost,
|
| 721 |
+
visual_score=candidate.visual_score,
|
| 722 |
+
audio_score=candidate.audio_score,
|
| 723 |
+
motion_score=candidate.motion_score,
|
| 724 |
+
person_score=candidate.person_score,
|
| 725 |
+
))
|
| 726 |
+
|
| 727 |
+
if best_hook:
|
| 728 |
+
logger.debug(
|
| 729 |
+
f"Clip {candidate.start_time:.1f}s -> {adjusted_start:.1f}s "
|
| 730 |
+
f"(hook: {best_hook.hook_type.value}, boost: {hook_boost:.2f}x)"
|
| 731 |
+
)
|
| 732 |
+
|
| 733 |
+
# Re-sort by boosted score
|
| 734 |
+
optimized.sort(key=lambda c: c.hype_score, reverse=True)
|
| 735 |
+
|
| 736 |
+
logger.info(f"Optimized {len(optimized)} candidates with viral hooks")
|
| 737 |
+
return optimized
|
| 738 |
+
|
| 739 |
def cleanup(self) -> None:
|
| 740 |
"""Clean up temporary files and unload models."""
|
| 741 |
if self._temp_dir:
|