dev_caio / scoring /viral_hooks.py
Chaitanya-aitf's picture
Create viral_hooks.py
fa7dc30 verified
"""
ShortSmith v2 - Viral Hooks Module
Optimizes clip start points for maximum viral potential.
The first 1-3 seconds determine if viewers keep watching.
Research-backed viral triggers by content type:
- Sports: Peak action moments, crowd eruptions, commentator hype
- Music: Beat drops, chorus hits, dance peaks
- Gaming: Clutch plays, reactions, unexpected moments
- Vlogs: Emotional peaks, reveals, punch lines
- Podcasts: Hot takes, laughs, controversial statements
Each domain has specific "hook triggers" that maximize retention.
"""
from dataclasses import dataclass, field
from typing import List, Dict, Optional, Tuple
from enum import Enum
import numpy as np
from utils.logger import get_logger
logger = get_logger("scoring.viral_hooks")
class HookType(Enum):
"""Types of viral hook moments."""
# Universal hooks
PEAK_ENERGY = "peak_energy" # Maximum audio/visual energy
SUDDEN_CHANGE = "sudden_change" # Dramatic shift in content
EMOTIONAL_PEAK = "emotional_peak" # High emotion moment
# Sports-specific
GOAL_MOMENT = "goal_moment" # Scoring play
CROWD_ERUPTION = "crowd_eruption" # Crowd going wild
COMMENTATOR_HYPE = "commentator_hype" # Excited commentary
REPLAY_WORTHY = "replay_worthy" # Highlight reel moment
# Music-specific
BEAT_DROP = "beat_drop" # Bass drop / beat switch
CHORUS_HIT = "chorus_hit" # Chorus start
DANCE_PEAK = "dance_peak" # Peak choreography
VISUAL_CLIMAX = "visual_climax" # Visual spectacle
# Gaming-specific
CLUTCH_PLAY = "clutch_play" # Skill moment
ELIMINATION = "elimination" # Kill/win moment
RAGE_REACTION = "rage_reaction" # Streamer reaction
UNEXPECTED = "unexpected" # Plot twist / surprise
# Vlog-specific
REVEAL = "reveal" # Surprise reveal
PUNCHLINE = "punchline" # Joke landing
EMOTIONAL_MOMENT = "emotional_moment" # Tears/joy/shock
CONFRONTATION = "confrontation" # Drama/tension
# Podcast-specific
HOT_TAKE = "hot_take" # Controversial opinion
BIG_LAUGH = "big_laugh" # Group laughter
REVELATION = "revelation" # Surprising info
HEATED_DEBATE = "heated_debate" # Argument/passion
@dataclass
class HookSignal:
"""A detected hook signal at a specific timestamp."""
timestamp: float
hook_type: HookType
confidence: float # 0-1, how confident we are this is a hook
intensity: float # 0-1, how strong the hook is
description: str # Human readable description
@property
def score(self) -> float:
"""Combined hook score."""
return self.confidence * self.intensity
@dataclass
class ViralHookConfig:
"""Configuration for viral hook detection per domain."""
domain: str
# Which hook types to look for (in priority order)
priority_hooks: List[HookType] = field(default_factory=list)
# Audio thresholds
audio_spike_threshold: float = 0.7 # RMS energy spike to detect
audio_spike_window: float = 0.5 # Seconds to detect spike
crowd_noise_threshold: float = 0.6 # Spectral centroid for crowd
speech_energy_threshold: float = 0.8 # For commentator/speaker hype
# Visual thresholds
motion_spike_threshold: float = 0.7 # Sudden motion increase
scene_change_weight: float = 0.3 # Weight for scene transitions
emotion_threshold: float = 0.7 # For detected emotions
# Timing preferences
ideal_hook_window: Tuple[float, float] = (0.0, 2.0) # Seconds from clip start
max_hook_search_window: float = 5.0 # How far to search for hook
# Hook scoring weights
hook_type_weights: Dict[HookType, float] = field(default_factory=dict)
# Minimum score to consider a valid hook
min_hook_score: float = 0.5
# Domain-specific viral hook configurations
VIRAL_HOOK_CONFIGS: Dict[str, ViralHookConfig] = {
"sports": ViralHookConfig(
domain="sports",
priority_hooks=[
HookType.GOAL_MOMENT,
HookType.CROWD_ERUPTION,
HookType.COMMENTATOR_HYPE,
HookType.REPLAY_WORTHY,
HookType.PEAK_ENERGY,
],
audio_spike_threshold=0.75, # Sports has loud moments
crowd_noise_threshold=0.65, # Crowd detection
speech_energy_threshold=0.85, # Commentator excitement
motion_spike_threshold=0.7, # Action detection
ideal_hook_window=(0.0, 1.5), # Sports hooks need to be immediate
hook_type_weights={
HookType.GOAL_MOMENT: 1.0,
HookType.CROWD_ERUPTION: 0.95,
HookType.COMMENTATOR_HYPE: 0.9,
HookType.REPLAY_WORTHY: 0.85,
HookType.PEAK_ENERGY: 0.8,
HookType.SUDDEN_CHANGE: 0.6,
},
min_hook_score=0.6,
),
"music": ViralHookConfig(
domain="music",
priority_hooks=[
HookType.BEAT_DROP,
HookType.CHORUS_HIT,
HookType.DANCE_PEAK,
HookType.VISUAL_CLIMAX,
HookType.PEAK_ENERGY,
],
audio_spike_threshold=0.8, # Beat drops are loud
audio_spike_window=0.3, # Quick detection for beats
motion_spike_threshold=0.65, # Dance moves
ideal_hook_window=(0.0, 2.0), # Can build slightly
hook_type_weights={
HookType.BEAT_DROP: 1.0,
HookType.CHORUS_HIT: 0.95,
HookType.DANCE_PEAK: 0.85,
HookType.VISUAL_CLIMAX: 0.8,
HookType.PEAK_ENERGY: 0.75,
HookType.SUDDEN_CHANGE: 0.7,
},
min_hook_score=0.55,
),
"gaming": ViralHookConfig(
domain="gaming",
priority_hooks=[
HookType.CLUTCH_PLAY,
HookType.ELIMINATION,
HookType.RAGE_REACTION,
HookType.UNEXPECTED,
HookType.PEAK_ENERGY,
],
audio_spike_threshold=0.7, # Streamer reactions
speech_energy_threshold=0.75, # Voice reactions
motion_spike_threshold=0.6, # Gameplay action
ideal_hook_window=(0.0, 2.5), # Gaming can have slight buildup
hook_type_weights={
HookType.CLUTCH_PLAY: 1.0,
HookType.ELIMINATION: 0.95,
HookType.RAGE_REACTION: 0.9,
HookType.UNEXPECTED: 0.85,
HookType.PEAK_ENERGY: 0.75,
HookType.EMOTIONAL_PEAK: 0.7,
},
min_hook_score=0.5,
),
"vlogs": ViralHookConfig(
domain="vlogs",
priority_hooks=[
HookType.REVEAL,
HookType.PUNCHLINE,
HookType.EMOTIONAL_MOMENT,
HookType.CONFRONTATION,
HookType.EMOTIONAL_PEAK,
],
audio_spike_threshold=0.65, # Reactions less loud
speech_energy_threshold=0.7, # Speaking emphasis
emotion_threshold=0.65, # Facial emotions
ideal_hook_window=(0.0, 3.0), # Vlogs can have more buildup
hook_type_weights={
HookType.REVEAL: 1.0,
HookType.PUNCHLINE: 0.95,
HookType.EMOTIONAL_MOMENT: 0.9,
HookType.CONFRONTATION: 0.85,
HookType.EMOTIONAL_PEAK: 0.8,
HookType.SUDDEN_CHANGE: 0.7,
},
min_hook_score=0.45,
),
"podcasts": ViralHookConfig(
domain="podcasts",
priority_hooks=[
HookType.HOT_TAKE,
HookType.BIG_LAUGH,
HookType.REVELATION,
HookType.HEATED_DEBATE,
HookType.EMOTIONAL_PEAK,
],
audio_spike_threshold=0.6, # Speech-based
speech_energy_threshold=0.8, # Emphasis detection
crowd_noise_threshold=0.7, # Group laughter
ideal_hook_window=(0.0, 2.0), # Podcasts need quick hooks
hook_type_weights={
HookType.HOT_TAKE: 1.0,
HookType.BIG_LAUGH: 0.95,
HookType.REVELATION: 0.9,
HookType.HEATED_DEBATE: 0.85,
HookType.EMOTIONAL_PEAK: 0.75,
HookType.SUDDEN_CHANGE: 0.6,
},
min_hook_score=0.5,
),
"general": ViralHookConfig(
domain="general",
priority_hooks=[
HookType.PEAK_ENERGY,
HookType.SUDDEN_CHANGE,
HookType.EMOTIONAL_PEAK,
],
audio_spike_threshold=0.7,
motion_spike_threshold=0.65,
ideal_hook_window=(0.0, 2.5),
hook_type_weights={
HookType.PEAK_ENERGY: 1.0,
HookType.SUDDEN_CHANGE: 0.9,
HookType.EMOTIONAL_PEAK: 0.85,
},
min_hook_score=0.5,
),
}
class ViralHookDetector:
"""
Detects viral hook moments in video segments.
Analyzes audio, visual, and motion signals to find the best
starting point for maximum viewer retention.
"""
def __init__(self, domain: str = "general"):
"""
Initialize hook detector.
Args:
domain: Content domain for hook detection
"""
self.domain = domain
self.config = VIRAL_HOOK_CONFIGS.get(domain, VIRAL_HOOK_CONFIGS["general"])
logger.info(f"ViralHookDetector initialized for domain: {domain}")
def detect_hooks(
self,
timestamps: List[float],
audio_energy: Optional[List[float]] = None,
audio_flux: Optional[List[float]] = None,
audio_centroid: Optional[List[float]] = None,
visual_scores: Optional[List[float]] = None,
motion_scores: Optional[List[float]] = None,
emotions: Optional[List[str]] = None,
actions: Optional[List[str]] = None,
) -> List[HookSignal]:
"""
Detect hook moments from multi-modal signals.
Args:
timestamps: Time points for each data sample
audio_energy: RMS energy values (0-1)
audio_flux: Spectral flux values (0-1)
audio_centroid: Spectral centroid values (0-1)
visual_scores: Visual hype scores (0-1)
motion_scores: Motion intensity scores (0-1)
emotions: Detected emotions per timestamp
actions: Detected actions per timestamp
Returns:
List of detected HookSignals sorted by score
"""
hooks = []
# Detect audio-based hooks
if audio_energy is not None:
hooks.extend(self._detect_audio_spikes(timestamps, audio_energy, audio_flux))
# Detect crowd/laughter from spectral centroid
if audio_centroid is not None:
hooks.extend(self._detect_crowd_moments(timestamps, audio_centroid, audio_energy))
# Detect motion-based hooks
if motion_scores is not None:
hooks.extend(self._detect_motion_peaks(timestamps, motion_scores))
# Detect visual peaks
if visual_scores is not None:
hooks.extend(self._detect_visual_peaks(timestamps, visual_scores))
# Detect emotion-based hooks
if emotions is not None:
hooks.extend(self._detect_emotion_hooks(timestamps, emotions))
# Detect action-based hooks
if actions is not None:
hooks.extend(self._detect_action_hooks(timestamps, actions))
# Sort by score descending
hooks.sort(key=lambda h: h.score, reverse=True)
# Filter by minimum score
hooks = [h for h in hooks if h.score >= self.config.min_hook_score]
logger.info(f"Detected {len(hooks)} potential hook moments")
return hooks
def _detect_audio_spikes(
self,
timestamps: List[float],
energy: List[float],
flux: Optional[List[float]] = None,
) -> List[HookSignal]:
"""Detect sudden audio energy spikes (beat drops, reactions, etc.)"""
hooks = []
if len(energy) < 3:
return hooks
energy_arr = np.array(energy)
threshold = self.config.audio_spike_threshold
# Calculate rolling mean and detect spikes
window = max(3, int(len(energy) * 0.1))
rolling_mean = np.convolve(energy_arr, np.ones(window)/window, mode='same')
for i in range(1, len(energy) - 1):
# Spike detection: current value significantly above local average
if energy[i] > threshold and energy[i] > rolling_mean[i] * 1.3:
# Check if it's a peak (higher than neighbors)
if energy[i] >= energy[i-1] and energy[i] >= energy[i+1]:
# Determine hook type based on domain
if self.domain == "music":
hook_type = HookType.BEAT_DROP
elif self.domain == "sports":
hook_type = HookType.COMMENTATOR_HYPE
elif self.domain == "gaming":
hook_type = HookType.RAGE_REACTION
else:
hook_type = HookType.PEAK_ENERGY
intensity = min(1.0, energy[i])
confidence = min(1.0, (energy[i] - rolling_mean[i]) / 0.3)
hooks.append(HookSignal(
timestamp=timestamps[i],
hook_type=hook_type,
confidence=confidence,
intensity=intensity,
description=f"Audio spike at {timestamps[i]:.1f}s (energy: {energy[i]:.2f})"
))
return hooks
def _detect_crowd_moments(
self,
timestamps: List[float],
centroid: List[float],
energy: Optional[List[float]] = None,
) -> List[HookSignal]:
"""Detect crowd noise / group reactions from spectral characteristics."""
hooks = []
threshold = self.config.crowd_noise_threshold
for i, (ts, cent) in enumerate(zip(timestamps, centroid)):
# High centroid + high energy = crowd/cheering
energy_val = energy[i] if energy else 0.5
if cent > threshold and energy_val > 0.5:
if self.domain == "sports":
hook_type = HookType.CROWD_ERUPTION
elif self.domain == "podcasts":
hook_type = HookType.BIG_LAUGH
else:
hook_type = HookType.PEAK_ENERGY
intensity = min(1.0, cent * energy_val * 1.5)
confidence = min(1.0, cent)
hooks.append(HookSignal(
timestamp=ts,
hook_type=hook_type,
confidence=confidence,
intensity=intensity,
description=f"Crowd/group moment at {ts:.1f}s"
))
return hooks
def _detect_motion_peaks(
self,
timestamps: List[float],
motion: List[float],
) -> List[HookSignal]:
"""Detect peak motion moments (action, dance, etc.)"""
hooks = []
threshold = self.config.motion_spike_threshold
motion_arr = np.array(motion)
# Find local maxima above threshold
for i in range(1, len(motion) - 1):
if motion[i] > threshold:
if motion[i] >= motion[i-1] and motion[i] >= motion[i+1]:
if self.domain == "music":
hook_type = HookType.DANCE_PEAK
elif self.domain == "sports":
hook_type = HookType.REPLAY_WORTHY
elif self.domain == "gaming":
hook_type = HookType.CLUTCH_PLAY
else:
hook_type = HookType.PEAK_ENERGY
hooks.append(HookSignal(
timestamp=timestamps[i],
hook_type=hook_type,
confidence=min(1.0, motion[i]),
intensity=motion[i],
description=f"High motion at {timestamps[i]:.1f}s"
))
return hooks
def _detect_visual_peaks(
self,
timestamps: List[float],
visual: List[float],
) -> List[HookSignal]:
"""Detect visual hype peaks."""
hooks = []
# Find timestamps with high visual scores
threshold = 0.7
for i, (ts, score) in enumerate(zip(timestamps, visual)):
if score > threshold:
hooks.append(HookSignal(
timestamp=ts,
hook_type=HookType.VISUAL_CLIMAX if self.domain == "music" else HookType.PEAK_ENERGY,
confidence=score,
intensity=score,
description=f"Visual peak at {ts:.1f}s (score: {score:.2f})"
))
return hooks
def _detect_emotion_hooks(
self,
timestamps: List[float],
emotions: List[str],
) -> List[HookSignal]:
"""Detect emotion-based hook moments."""
hooks = []
# High-engagement emotions
hook_emotions = {
"excitement": (HookType.EMOTIONAL_PEAK, 0.9),
"joy": (HookType.EMOTIONAL_MOMENT, 0.85),
"surprise": (HookType.REVEAL if self.domain == "vlogs" else HookType.UNEXPECTED, 0.9),
"tension": (HookType.CONFRONTATION if self.domain == "vlogs" else HookType.EMOTIONAL_PEAK, 0.8),
"anger": (HookType.HEATED_DEBATE if self.domain == "podcasts" else HookType.RAGE_REACTION, 0.85),
}
for ts, emotion in zip(timestamps, emotions):
emotion_lower = emotion.lower()
if emotion_lower in hook_emotions:
hook_type, intensity = hook_emotions[emotion_lower]
hooks.append(HookSignal(
timestamp=ts,
hook_type=hook_type,
confidence=0.8,
intensity=intensity,
description=f"Emotion '{emotion}' at {ts:.1f}s"
))
return hooks
def _detect_action_hooks(
self,
timestamps: List[float],
actions: List[str],
) -> List[HookSignal]:
"""Detect action-based hook moments."""
hooks = []
# High-engagement actions by domain
hook_actions = {
"sports": {
"celebration": (HookType.GOAL_MOMENT, 1.0),
"action": (HookType.REPLAY_WORTHY, 0.85),
"reaction": (HookType.CROWD_ERUPTION, 0.8),
},
"music": {
"performance": (HookType.VISUAL_CLIMAX, 0.9),
"action": (HookType.DANCE_PEAK, 0.85),
},
"gaming": {
"action": (HookType.CLUTCH_PLAY, 0.9),
"reaction": (HookType.RAGE_REACTION, 0.85),
"celebration": (HookType.ELIMINATION, 0.9),
},
"vlogs": {
"reaction": (HookType.REVEAL, 0.9),
"celebration": (HookType.EMOTIONAL_MOMENT, 0.85),
},
"podcasts": {
"reaction": (HookType.BIG_LAUGH, 0.85),
"speech": (HookType.HOT_TAKE, 0.8),
},
}
domain_actions = hook_actions.get(self.domain, {})
for ts, action in zip(timestamps, actions):
action_lower = action.lower()
if action_lower in domain_actions:
hook_type, intensity = domain_actions[action_lower]
hooks.append(HookSignal(
timestamp=ts,
hook_type=hook_type,
confidence=0.85,
intensity=intensity,
description=f"Action '{action}' at {ts:.1f}s"
))
return hooks
def find_best_clip_start(
self,
clip_start: float,
clip_end: float,
hooks: List[HookSignal],
allow_adjustment: float = 3.0,
) -> Tuple[float, Optional[HookSignal]]:
"""
Find the best starting point for a clip based on detected hooks.
Args:
clip_start: Original clip start time
clip_end: Original clip end time
hooks: Detected hook signals
allow_adjustment: Max seconds to adjust start backwards
Returns:
Tuple of (adjusted_start_time, best_hook_signal)
"""
# Find hooks within the ideal window from clip start
search_start = max(0, clip_start - allow_adjustment)
search_end = clip_start + self.config.max_hook_search_window
# Filter hooks in search range
candidate_hooks = [
h for h in hooks
if search_start <= h.timestamp <= search_end
]
if not candidate_hooks:
logger.debug(f"No hooks found for clip at {clip_start:.1f}s")
return clip_start, None
# Score each hook based on:
# 1. Hook quality (score)
# 2. Position preference (earlier in ideal window = better)
# 3. Hook type priority for domain
best_hook = None
best_score = 0
for hook in candidate_hooks:
# Base score from hook quality
score = hook.score
# Apply hook type weight
type_weight = self.config.hook_type_weights.get(hook.hook_type, 0.5)
score *= type_weight
# Prefer hooks that land in ideal window
ideal_start, ideal_end = self.config.ideal_hook_window
time_from_original = hook.timestamp - clip_start
if ideal_start <= time_from_original <= ideal_end:
# Perfect position
score *= 1.2
elif time_from_original < ideal_start:
# Hook is before clip start - we'd need to adjust
adjustment_needed = clip_start - hook.timestamp
if adjustment_needed <= allow_adjustment:
# Penalize based on adjustment needed
score *= (1.0 - adjustment_needed / allow_adjustment * 0.3)
else:
score *= 0.3 # Heavy penalty
else:
# Hook is after ideal window
score *= 0.8
if score > best_score:
best_score = score
best_hook = hook
if best_hook:
# Adjust start to put hook in ideal position
ideal_position = self.config.ideal_hook_window[0] + 0.5 # Aim for middle of ideal window
adjusted_start = best_hook.timestamp - ideal_position
# Don't go before search_start or make clip too short
adjusted_start = max(search_start, adjusted_start)
adjusted_start = min(adjusted_start, clip_end - 5.0) # Keep at least 5s
logger.info(
f"Adjusted clip start: {clip_start:.1f}s -> {adjusted_start:.1f}s "
f"(hook: {best_hook.hook_type.value} at {best_hook.timestamp:.1f}s)"
)
return adjusted_start, best_hook
return clip_start, None
def score_clip_hook_potential(
self,
clip_start: float,
clip_duration: float,
hooks: List[HookSignal],
) -> float:
"""
Score a clip's viral potential based on hook placement.
Args:
clip_start: Clip start time
clip_duration: Clip duration
hooks: All detected hooks
Returns:
Hook potential score (0-1)
"""
clip_end = clip_start + clip_duration
# Find hooks in the first few seconds of clip
hook_window = self.config.ideal_hook_window[1]
early_hooks = [
h for h in hooks
if clip_start <= h.timestamp <= clip_start + hook_window
]
if not early_hooks:
return 0.3 # Base score for clips without clear hooks
# Score based on best hook in opening
best_hook = max(early_hooks, key=lambda h: h.score)
# Apply type weight
type_weight = self.config.hook_type_weights.get(best_hook.hook_type, 0.5)
return min(1.0, best_hook.score * type_weight * 1.2)
def get_viral_hook_config(domain: str) -> ViralHookConfig:
"""Get viral hook configuration for a domain."""
return VIRAL_HOOK_CONFIGS.get(domain, VIRAL_HOOK_CONFIGS["general"])
def get_viral_hook_detector(domain: str) -> ViralHookDetector:
"""Get a viral hook detector for a domain."""
return ViralHookDetector(domain)
# Export public interface
__all__ = [
"HookType",
"HookSignal",
"ViralHookConfig",
"ViralHookDetector",
"VIRAL_HOOK_CONFIGS",
"get_viral_hook_config",
"get_viral_hook_detector",
]