""" ShortSmith v2 - Viral Hooks Module Optimizes clip start points for maximum viral potential. The first 1-3 seconds determine if viewers keep watching. Research-backed viral triggers by content type: - Sports: Peak action moments, crowd eruptions, commentator hype - Music: Beat drops, chorus hits, dance peaks - Gaming: Clutch plays, reactions, unexpected moments - Vlogs: Emotional peaks, reveals, punch lines - Podcasts: Hot takes, laughs, controversial statements Each domain has specific "hook triggers" that maximize retention. """ from dataclasses import dataclass, field from typing import List, Dict, Optional, Tuple from enum import Enum import numpy as np from utils.logger import get_logger logger = get_logger("scoring.viral_hooks") class HookType(Enum): """Types of viral hook moments.""" # Universal hooks PEAK_ENERGY = "peak_energy" # Maximum audio/visual energy SUDDEN_CHANGE = "sudden_change" # Dramatic shift in content EMOTIONAL_PEAK = "emotional_peak" # High emotion moment # Sports-specific GOAL_MOMENT = "goal_moment" # Scoring play CROWD_ERUPTION = "crowd_eruption" # Crowd going wild COMMENTATOR_HYPE = "commentator_hype" # Excited commentary REPLAY_WORTHY = "replay_worthy" # Highlight reel moment # Music-specific BEAT_DROP = "beat_drop" # Bass drop / beat switch CHORUS_HIT = "chorus_hit" # Chorus start DANCE_PEAK = "dance_peak" # Peak choreography VISUAL_CLIMAX = "visual_climax" # Visual spectacle # Gaming-specific CLUTCH_PLAY = "clutch_play" # Skill moment ELIMINATION = "elimination" # Kill/win moment RAGE_REACTION = "rage_reaction" # Streamer reaction UNEXPECTED = "unexpected" # Plot twist / surprise # Vlog-specific REVEAL = "reveal" # Surprise reveal PUNCHLINE = "punchline" # Joke landing EMOTIONAL_MOMENT = "emotional_moment" # Tears/joy/shock CONFRONTATION = "confrontation" # Drama/tension # Podcast-specific HOT_TAKE = "hot_take" # Controversial opinion BIG_LAUGH = "big_laugh" # Group laughter REVELATION = "revelation" # Surprising info HEATED_DEBATE = "heated_debate" # Argument/passion @dataclass class HookSignal: """A detected hook signal at a specific timestamp.""" timestamp: float hook_type: HookType confidence: float # 0-1, how confident we are this is a hook intensity: float # 0-1, how strong the hook is description: str # Human readable description @property def score(self) -> float: """Combined hook score.""" return self.confidence * self.intensity @dataclass class ViralHookConfig: """Configuration for viral hook detection per domain.""" domain: str # Which hook types to look for (in priority order) priority_hooks: List[HookType] = field(default_factory=list) # Audio thresholds audio_spike_threshold: float = 0.7 # RMS energy spike to detect audio_spike_window: float = 0.5 # Seconds to detect spike crowd_noise_threshold: float = 0.6 # Spectral centroid for crowd speech_energy_threshold: float = 0.8 # For commentator/speaker hype # Visual thresholds motion_spike_threshold: float = 0.7 # Sudden motion increase scene_change_weight: float = 0.3 # Weight for scene transitions emotion_threshold: float = 0.7 # For detected emotions # Timing preferences ideal_hook_window: Tuple[float, float] = (0.0, 2.0) # Seconds from clip start max_hook_search_window: float = 5.0 # How far to search for hook # Hook scoring weights hook_type_weights: Dict[HookType, float] = field(default_factory=dict) # Minimum score to consider a valid hook min_hook_score: float = 0.5 # Domain-specific viral hook configurations VIRAL_HOOK_CONFIGS: Dict[str, ViralHookConfig] = { "sports": ViralHookConfig( domain="sports", priority_hooks=[ HookType.GOAL_MOMENT, HookType.CROWD_ERUPTION, HookType.COMMENTATOR_HYPE, HookType.REPLAY_WORTHY, HookType.PEAK_ENERGY, ], audio_spike_threshold=0.75, # Sports has loud moments crowd_noise_threshold=0.65, # Crowd detection speech_energy_threshold=0.85, # Commentator excitement motion_spike_threshold=0.7, # Action detection ideal_hook_window=(0.0, 1.5), # Sports hooks need to be immediate hook_type_weights={ HookType.GOAL_MOMENT: 1.0, HookType.CROWD_ERUPTION: 0.95, HookType.COMMENTATOR_HYPE: 0.9, HookType.REPLAY_WORTHY: 0.85, HookType.PEAK_ENERGY: 0.8, HookType.SUDDEN_CHANGE: 0.6, }, min_hook_score=0.6, ), "music": ViralHookConfig( domain="music", priority_hooks=[ HookType.BEAT_DROP, HookType.CHORUS_HIT, HookType.DANCE_PEAK, HookType.VISUAL_CLIMAX, HookType.PEAK_ENERGY, ], audio_spike_threshold=0.8, # Beat drops are loud audio_spike_window=0.3, # Quick detection for beats motion_spike_threshold=0.65, # Dance moves ideal_hook_window=(0.0, 2.0), # Can build slightly hook_type_weights={ HookType.BEAT_DROP: 1.0, HookType.CHORUS_HIT: 0.95, HookType.DANCE_PEAK: 0.85, HookType.VISUAL_CLIMAX: 0.8, HookType.PEAK_ENERGY: 0.75, HookType.SUDDEN_CHANGE: 0.7, }, min_hook_score=0.55, ), "gaming": ViralHookConfig( domain="gaming", priority_hooks=[ HookType.CLUTCH_PLAY, HookType.ELIMINATION, HookType.RAGE_REACTION, HookType.UNEXPECTED, HookType.PEAK_ENERGY, ], audio_spike_threshold=0.7, # Streamer reactions speech_energy_threshold=0.75, # Voice reactions motion_spike_threshold=0.6, # Gameplay action ideal_hook_window=(0.0, 2.5), # Gaming can have slight buildup hook_type_weights={ HookType.CLUTCH_PLAY: 1.0, HookType.ELIMINATION: 0.95, HookType.RAGE_REACTION: 0.9, HookType.UNEXPECTED: 0.85, HookType.PEAK_ENERGY: 0.75, HookType.EMOTIONAL_PEAK: 0.7, }, min_hook_score=0.5, ), "vlogs": ViralHookConfig( domain="vlogs", priority_hooks=[ HookType.REVEAL, HookType.PUNCHLINE, HookType.EMOTIONAL_MOMENT, HookType.CONFRONTATION, HookType.EMOTIONAL_PEAK, ], audio_spike_threshold=0.65, # Reactions less loud speech_energy_threshold=0.7, # Speaking emphasis emotion_threshold=0.65, # Facial emotions ideal_hook_window=(0.0, 3.0), # Vlogs can have more buildup hook_type_weights={ HookType.REVEAL: 1.0, HookType.PUNCHLINE: 0.95, HookType.EMOTIONAL_MOMENT: 0.9, HookType.CONFRONTATION: 0.85, HookType.EMOTIONAL_PEAK: 0.8, HookType.SUDDEN_CHANGE: 0.7, }, min_hook_score=0.45, ), "podcasts": ViralHookConfig( domain="podcasts", priority_hooks=[ HookType.HOT_TAKE, HookType.BIG_LAUGH, HookType.REVELATION, HookType.HEATED_DEBATE, HookType.EMOTIONAL_PEAK, ], audio_spike_threshold=0.6, # Speech-based speech_energy_threshold=0.8, # Emphasis detection crowd_noise_threshold=0.7, # Group laughter ideal_hook_window=(0.0, 2.0), # Podcasts need quick hooks hook_type_weights={ HookType.HOT_TAKE: 1.0, HookType.BIG_LAUGH: 0.95, HookType.REVELATION: 0.9, HookType.HEATED_DEBATE: 0.85, HookType.EMOTIONAL_PEAK: 0.75, HookType.SUDDEN_CHANGE: 0.6, }, min_hook_score=0.5, ), "general": ViralHookConfig( domain="general", priority_hooks=[ HookType.PEAK_ENERGY, HookType.SUDDEN_CHANGE, HookType.EMOTIONAL_PEAK, ], audio_spike_threshold=0.7, motion_spike_threshold=0.65, ideal_hook_window=(0.0, 2.5), hook_type_weights={ HookType.PEAK_ENERGY: 1.0, HookType.SUDDEN_CHANGE: 0.9, HookType.EMOTIONAL_PEAK: 0.85, }, min_hook_score=0.5, ), } class ViralHookDetector: """ Detects viral hook moments in video segments. Analyzes audio, visual, and motion signals to find the best starting point for maximum viewer retention. """ def __init__(self, domain: str = "general"): """ Initialize hook detector. Args: domain: Content domain for hook detection """ self.domain = domain self.config = VIRAL_HOOK_CONFIGS.get(domain, VIRAL_HOOK_CONFIGS["general"]) logger.info(f"ViralHookDetector initialized for domain: {domain}") def detect_hooks( self, timestamps: List[float], audio_energy: Optional[List[float]] = None, audio_flux: Optional[List[float]] = None, audio_centroid: Optional[List[float]] = None, visual_scores: Optional[List[float]] = None, motion_scores: Optional[List[float]] = None, emotions: Optional[List[str]] = None, actions: Optional[List[str]] = None, ) -> List[HookSignal]: """ Detect hook moments from multi-modal signals. Args: timestamps: Time points for each data sample audio_energy: RMS energy values (0-1) audio_flux: Spectral flux values (0-1) audio_centroid: Spectral centroid values (0-1) visual_scores: Visual hype scores (0-1) motion_scores: Motion intensity scores (0-1) emotions: Detected emotions per timestamp actions: Detected actions per timestamp Returns: List of detected HookSignals sorted by score """ hooks = [] # Detect audio-based hooks if audio_energy is not None: hooks.extend(self._detect_audio_spikes(timestamps, audio_energy, audio_flux)) # Detect crowd/laughter from spectral centroid if audio_centroid is not None: hooks.extend(self._detect_crowd_moments(timestamps, audio_centroid, audio_energy)) # Detect motion-based hooks if motion_scores is not None: hooks.extend(self._detect_motion_peaks(timestamps, motion_scores)) # Detect visual peaks if visual_scores is not None: hooks.extend(self._detect_visual_peaks(timestamps, visual_scores)) # Detect emotion-based hooks if emotions is not None: hooks.extend(self._detect_emotion_hooks(timestamps, emotions)) # Detect action-based hooks if actions is not None: hooks.extend(self._detect_action_hooks(timestamps, actions)) # Sort by score descending hooks.sort(key=lambda h: h.score, reverse=True) # Filter by minimum score hooks = [h for h in hooks if h.score >= self.config.min_hook_score] logger.info(f"Detected {len(hooks)} potential hook moments") return hooks def _detect_audio_spikes( self, timestamps: List[float], energy: List[float], flux: Optional[List[float]] = None, ) -> List[HookSignal]: """Detect sudden audio energy spikes (beat drops, reactions, etc.)""" hooks = [] if len(energy) < 3: return hooks energy_arr = np.array(energy) threshold = self.config.audio_spike_threshold # Calculate rolling mean and detect spikes window = max(3, int(len(energy) * 0.1)) rolling_mean = np.convolve(energy_arr, np.ones(window)/window, mode='same') for i in range(1, len(energy) - 1): # Spike detection: current value significantly above local average if energy[i] > threshold and energy[i] > rolling_mean[i] * 1.3: # Check if it's a peak (higher than neighbors) if energy[i] >= energy[i-1] and energy[i] >= energy[i+1]: # Determine hook type based on domain if self.domain == "music": hook_type = HookType.BEAT_DROP elif self.domain == "sports": hook_type = HookType.COMMENTATOR_HYPE elif self.domain == "gaming": hook_type = HookType.RAGE_REACTION else: hook_type = HookType.PEAK_ENERGY intensity = min(1.0, energy[i]) confidence = min(1.0, (energy[i] - rolling_mean[i]) / 0.3) hooks.append(HookSignal( timestamp=timestamps[i], hook_type=hook_type, confidence=confidence, intensity=intensity, description=f"Audio spike at {timestamps[i]:.1f}s (energy: {energy[i]:.2f})" )) return hooks def _detect_crowd_moments( self, timestamps: List[float], centroid: List[float], energy: Optional[List[float]] = None, ) -> List[HookSignal]: """Detect crowd noise / group reactions from spectral characteristics.""" hooks = [] threshold = self.config.crowd_noise_threshold for i, (ts, cent) in enumerate(zip(timestamps, centroid)): # High centroid + high energy = crowd/cheering energy_val = energy[i] if energy else 0.5 if cent > threshold and energy_val > 0.5: if self.domain == "sports": hook_type = HookType.CROWD_ERUPTION elif self.domain == "podcasts": hook_type = HookType.BIG_LAUGH else: hook_type = HookType.PEAK_ENERGY intensity = min(1.0, cent * energy_val * 1.5) confidence = min(1.0, cent) hooks.append(HookSignal( timestamp=ts, hook_type=hook_type, confidence=confidence, intensity=intensity, description=f"Crowd/group moment at {ts:.1f}s" )) return hooks def _detect_motion_peaks( self, timestamps: List[float], motion: List[float], ) -> List[HookSignal]: """Detect peak motion moments (action, dance, etc.)""" hooks = [] threshold = self.config.motion_spike_threshold motion_arr = np.array(motion) # Find local maxima above threshold for i in range(1, len(motion) - 1): if motion[i] > threshold: if motion[i] >= motion[i-1] and motion[i] >= motion[i+1]: if self.domain == "music": hook_type = HookType.DANCE_PEAK elif self.domain == "sports": hook_type = HookType.REPLAY_WORTHY elif self.domain == "gaming": hook_type = HookType.CLUTCH_PLAY else: hook_type = HookType.PEAK_ENERGY hooks.append(HookSignal( timestamp=timestamps[i], hook_type=hook_type, confidence=min(1.0, motion[i]), intensity=motion[i], description=f"High motion at {timestamps[i]:.1f}s" )) return hooks def _detect_visual_peaks( self, timestamps: List[float], visual: List[float], ) -> List[HookSignal]: """Detect visual hype peaks.""" hooks = [] # Find timestamps with high visual scores threshold = 0.7 for i, (ts, score) in enumerate(zip(timestamps, visual)): if score > threshold: hooks.append(HookSignal( timestamp=ts, hook_type=HookType.VISUAL_CLIMAX if self.domain == "music" else HookType.PEAK_ENERGY, confidence=score, intensity=score, description=f"Visual peak at {ts:.1f}s (score: {score:.2f})" )) return hooks def _detect_emotion_hooks( self, timestamps: List[float], emotions: List[str], ) -> List[HookSignal]: """Detect emotion-based hook moments.""" hooks = [] # High-engagement emotions hook_emotions = { "excitement": (HookType.EMOTIONAL_PEAK, 0.9), "joy": (HookType.EMOTIONAL_MOMENT, 0.85), "surprise": (HookType.REVEAL if self.domain == "vlogs" else HookType.UNEXPECTED, 0.9), "tension": (HookType.CONFRONTATION if self.domain == "vlogs" else HookType.EMOTIONAL_PEAK, 0.8), "anger": (HookType.HEATED_DEBATE if self.domain == "podcasts" else HookType.RAGE_REACTION, 0.85), } for ts, emotion in zip(timestamps, emotions): emotion_lower = emotion.lower() if emotion_lower in hook_emotions: hook_type, intensity = hook_emotions[emotion_lower] hooks.append(HookSignal( timestamp=ts, hook_type=hook_type, confidence=0.8, intensity=intensity, description=f"Emotion '{emotion}' at {ts:.1f}s" )) return hooks def _detect_action_hooks( self, timestamps: List[float], actions: List[str], ) -> List[HookSignal]: """Detect action-based hook moments.""" hooks = [] # High-engagement actions by domain hook_actions = { "sports": { "celebration": (HookType.GOAL_MOMENT, 1.0), "action": (HookType.REPLAY_WORTHY, 0.85), "reaction": (HookType.CROWD_ERUPTION, 0.8), }, "music": { "performance": (HookType.VISUAL_CLIMAX, 0.9), "action": (HookType.DANCE_PEAK, 0.85), }, "gaming": { "action": (HookType.CLUTCH_PLAY, 0.9), "reaction": (HookType.RAGE_REACTION, 0.85), "celebration": (HookType.ELIMINATION, 0.9), }, "vlogs": { "reaction": (HookType.REVEAL, 0.9), "celebration": (HookType.EMOTIONAL_MOMENT, 0.85), }, "podcasts": { "reaction": (HookType.BIG_LAUGH, 0.85), "speech": (HookType.HOT_TAKE, 0.8), }, } domain_actions = hook_actions.get(self.domain, {}) for ts, action in zip(timestamps, actions): action_lower = action.lower() if action_lower in domain_actions: hook_type, intensity = domain_actions[action_lower] hooks.append(HookSignal( timestamp=ts, hook_type=hook_type, confidence=0.85, intensity=intensity, description=f"Action '{action}' at {ts:.1f}s" )) return hooks def find_best_clip_start( self, clip_start: float, clip_end: float, hooks: List[HookSignal], allow_adjustment: float = 3.0, ) -> Tuple[float, Optional[HookSignal]]: """ Find the best starting point for a clip based on detected hooks. Args: clip_start: Original clip start time clip_end: Original clip end time hooks: Detected hook signals allow_adjustment: Max seconds to adjust start backwards Returns: Tuple of (adjusted_start_time, best_hook_signal) """ # Find hooks within the ideal window from clip start search_start = max(0, clip_start - allow_adjustment) search_end = clip_start + self.config.max_hook_search_window # Filter hooks in search range candidate_hooks = [ h for h in hooks if search_start <= h.timestamp <= search_end ] if not candidate_hooks: logger.debug(f"No hooks found for clip at {clip_start:.1f}s") return clip_start, None # Score each hook based on: # 1. Hook quality (score) # 2. Position preference (earlier in ideal window = better) # 3. Hook type priority for domain best_hook = None best_score = 0 for hook in candidate_hooks: # Base score from hook quality score = hook.score # Apply hook type weight type_weight = self.config.hook_type_weights.get(hook.hook_type, 0.5) score *= type_weight # Prefer hooks that land in ideal window ideal_start, ideal_end = self.config.ideal_hook_window time_from_original = hook.timestamp - clip_start if ideal_start <= time_from_original <= ideal_end: # Perfect position score *= 1.2 elif time_from_original < ideal_start: # Hook is before clip start - we'd need to adjust adjustment_needed = clip_start - hook.timestamp if adjustment_needed <= allow_adjustment: # Penalize based on adjustment needed score *= (1.0 - adjustment_needed / allow_adjustment * 0.3) else: score *= 0.3 # Heavy penalty else: # Hook is after ideal window score *= 0.8 if score > best_score: best_score = score best_hook = hook if best_hook: # Adjust start to put hook in ideal position ideal_position = self.config.ideal_hook_window[0] + 0.5 # Aim for middle of ideal window adjusted_start = best_hook.timestamp - ideal_position # Don't go before search_start or make clip too short adjusted_start = max(search_start, adjusted_start) adjusted_start = min(adjusted_start, clip_end - 5.0) # Keep at least 5s logger.info( f"Adjusted clip start: {clip_start:.1f}s -> {adjusted_start:.1f}s " f"(hook: {best_hook.hook_type.value} at {best_hook.timestamp:.1f}s)" ) return adjusted_start, best_hook return clip_start, None def score_clip_hook_potential( self, clip_start: float, clip_duration: float, hooks: List[HookSignal], ) -> float: """ Score a clip's viral potential based on hook placement. Args: clip_start: Clip start time clip_duration: Clip duration hooks: All detected hooks Returns: Hook potential score (0-1) """ clip_end = clip_start + clip_duration # Find hooks in the first few seconds of clip hook_window = self.config.ideal_hook_window[1] early_hooks = [ h for h in hooks if clip_start <= h.timestamp <= clip_start + hook_window ] if not early_hooks: return 0.3 # Base score for clips without clear hooks # Score based on best hook in opening best_hook = max(early_hooks, key=lambda h: h.score) # Apply type weight type_weight = self.config.hook_type_weights.get(best_hook.hook_type, 0.5) return min(1.0, best_hook.score * type_weight * 1.2) def get_viral_hook_config(domain: str) -> ViralHookConfig: """Get viral hook configuration for a domain.""" return VIRAL_HOOK_CONFIGS.get(domain, VIRAL_HOOK_CONFIGS["general"]) def get_viral_hook_detector(domain: str) -> ViralHookDetector: """Get a viral hook detector for a domain.""" return ViralHookDetector(domain) # Export public interface __all__ = [ "HookType", "HookSignal", "ViralHookConfig", "ViralHookDetector", "VIRAL_HOOK_CONFIGS", "get_viral_hook_config", "get_viral_hook_detector", ]