Spaces:

AI-Talent-Force
/

dev_caio

Paused

App Files Files Community

dev_caio / scoring /viral_hooks.py

Chaitanya-aitf

Create viral_hooks.py

fa7dc30 verified 2 months ago

raw

history blame contribute delete

24.9 kB

	"""
	ShortSmith v2 - Viral Hooks Module

	Optimizes clip start points for maximum viral potential.
	The first 1-3 seconds determine if viewers keep watching.

	Research-backed viral triggers by content type:
	- Sports: Peak action moments, crowd eruptions, commentator hype
	- Music: Beat drops, chorus hits, dance peaks
	- Gaming: Clutch plays, reactions, unexpected moments
	- Vlogs: Emotional peaks, reveals, punch lines
	- Podcasts: Hot takes, laughs, controversial statements

	Each domain has specific "hook triggers" that maximize retention.
	"""

	from dataclasses import dataclass, field
	from typing import List, Dict, Optional, Tuple
	from enum import Enum
	import numpy as np

	from utils.logger import get_logger

	logger = get_logger("scoring.viral_hooks")


	class HookType(Enum):
	"""Types of viral hook moments."""
	# Universal hooks
	PEAK_ENERGY = "peak_energy" # Maximum audio/visual energy
	SUDDEN_CHANGE = "sudden_change" # Dramatic shift in content
	EMOTIONAL_PEAK = "emotional_peak" # High emotion moment

	# Sports-specific
	GOAL_MOMENT = "goal_moment" # Scoring play
	CROWD_ERUPTION = "crowd_eruption" # Crowd going wild
	COMMENTATOR_HYPE = "commentator_hype" # Excited commentary
	REPLAY_WORTHY = "replay_worthy" # Highlight reel moment

	# Music-specific
	BEAT_DROP = "beat_drop" # Bass drop / beat switch
	CHORUS_HIT = "chorus_hit" # Chorus start
	DANCE_PEAK = "dance_peak" # Peak choreography
	VISUAL_CLIMAX = "visual_climax" # Visual spectacle

	# Gaming-specific
	CLUTCH_PLAY = "clutch_play" # Skill moment
	ELIMINATION = "elimination" # Kill/win moment
	RAGE_REACTION = "rage_reaction" # Streamer reaction
	UNEXPECTED = "unexpected" # Plot twist / surprise

	# Vlog-specific
	REVEAL = "reveal" # Surprise reveal
	PUNCHLINE = "punchline" # Joke landing
	EMOTIONAL_MOMENT = "emotional_moment" # Tears/joy/shock
	CONFRONTATION = "confrontation" # Drama/tension

	# Podcast-specific
	HOT_TAKE = "hot_take" # Controversial opinion
	BIG_LAUGH = "big_laugh" # Group laughter
	REVELATION = "revelation" # Surprising info
	HEATED_DEBATE = "heated_debate" # Argument/passion


	@dataclass
	class HookSignal:
	"""A detected hook signal at a specific timestamp."""
	timestamp: float
	hook_type: HookType
	confidence: float # 0-1, how confident we are this is a hook
	intensity: float # 0-1, how strong the hook is
	description: str # Human readable description

	@property
	def score(self) -> float:
	"""Combined hook score."""
	return self.confidence * self.intensity


	@dataclass
	class ViralHookConfig:
	"""Configuration for viral hook detection per domain."""
	domain: str

	# Which hook types to look for (in priority order)
	priority_hooks: List[HookType] = field(default_factory=list)

	# Audio thresholds
	audio_spike_threshold: float = 0.7 # RMS energy spike to detect
	audio_spike_window: float = 0.5 # Seconds to detect spike
	crowd_noise_threshold: float = 0.6 # Spectral centroid for crowd
	speech_energy_threshold: float = 0.8 # For commentator/speaker hype

	# Visual thresholds
	motion_spike_threshold: float = 0.7 # Sudden motion increase
	scene_change_weight: float = 0.3 # Weight for scene transitions
	emotion_threshold: float = 0.7 # For detected emotions

	# Timing preferences
	ideal_hook_window: Tuple[float, float] = (0.0, 2.0) # Seconds from clip start
	max_hook_search_window: float = 5.0 # How far to search for hook

	# Hook scoring weights
	hook_type_weights: Dict[HookType, float] = field(default_factory=dict)

	# Minimum score to consider a valid hook
	min_hook_score: float = 0.5


	# Domain-specific viral hook configurations
	VIRAL_HOOK_CONFIGS: Dict[str, ViralHookConfig] = {

	"sports": ViralHookConfig(
	domain="sports",
	priority_hooks=[
	HookType.GOAL_MOMENT,
	HookType.CROWD_ERUPTION,
	HookType.COMMENTATOR_HYPE,
	HookType.REPLAY_WORTHY,
	HookType.PEAK_ENERGY,
	],
	audio_spike_threshold=0.75, # Sports has loud moments
	crowd_noise_threshold=0.65, # Crowd detection
	speech_energy_threshold=0.85, # Commentator excitement
	motion_spike_threshold=0.7, # Action detection
	ideal_hook_window=(0.0, 1.5), # Sports hooks need to be immediate
	hook_type_weights={
	HookType.GOAL_MOMENT: 1.0,
	HookType.CROWD_ERUPTION: 0.95,
	HookType.COMMENTATOR_HYPE: 0.9,
	HookType.REPLAY_WORTHY: 0.85,
	HookType.PEAK_ENERGY: 0.8,
	HookType.SUDDEN_CHANGE: 0.6,
	},
	min_hook_score=0.6,
	),

	"music": ViralHookConfig(
	domain="music",
	priority_hooks=[
	HookType.BEAT_DROP,
	HookType.CHORUS_HIT,
	HookType.DANCE_PEAK,
	HookType.VISUAL_CLIMAX,
	HookType.PEAK_ENERGY,
	],
	audio_spike_threshold=0.8, # Beat drops are loud
	audio_spike_window=0.3, # Quick detection for beats
	motion_spike_threshold=0.65, # Dance moves
	ideal_hook_window=(0.0, 2.0), # Can build slightly
	hook_type_weights={
	HookType.BEAT_DROP: 1.0,
	HookType.CHORUS_HIT: 0.95,
	HookType.DANCE_PEAK: 0.85,
	HookType.VISUAL_CLIMAX: 0.8,
	HookType.PEAK_ENERGY: 0.75,
	HookType.SUDDEN_CHANGE: 0.7,
	},
	min_hook_score=0.55,
	),

	"gaming": ViralHookConfig(
	domain="gaming",
	priority_hooks=[
	HookType.CLUTCH_PLAY,
	HookType.ELIMINATION,
	HookType.RAGE_REACTION,
	HookType.UNEXPECTED,
	HookType.PEAK_ENERGY,
	],
	audio_spike_threshold=0.7, # Streamer reactions
	speech_energy_threshold=0.75, # Voice reactions
	motion_spike_threshold=0.6, # Gameplay action
	ideal_hook_window=(0.0, 2.5), # Gaming can have slight buildup
	hook_type_weights={
	HookType.CLUTCH_PLAY: 1.0,
	HookType.ELIMINATION: 0.95,
	HookType.RAGE_REACTION: 0.9,
	HookType.UNEXPECTED: 0.85,
	HookType.PEAK_ENERGY: 0.75,
	HookType.EMOTIONAL_PEAK: 0.7,
	},
	min_hook_score=0.5,
	),

	"vlogs": ViralHookConfig(
	domain="vlogs",
	priority_hooks=[
	HookType.REVEAL,
	HookType.PUNCHLINE,
	HookType.EMOTIONAL_MOMENT,
	HookType.CONFRONTATION,
	HookType.EMOTIONAL_PEAK,
	],
	audio_spike_threshold=0.65, # Reactions less loud
	speech_energy_threshold=0.7, # Speaking emphasis
	emotion_threshold=0.65, # Facial emotions
	ideal_hook_window=(0.0, 3.0), # Vlogs can have more buildup
	hook_type_weights={
	HookType.REVEAL: 1.0,
	HookType.PUNCHLINE: 0.95,
	HookType.EMOTIONAL_MOMENT: 0.9,
	HookType.CONFRONTATION: 0.85,
	HookType.EMOTIONAL_PEAK: 0.8,
	HookType.SUDDEN_CHANGE: 0.7,
	},
	min_hook_score=0.45,
	),

	"podcasts": ViralHookConfig(
	domain="podcasts",
	priority_hooks=[
	HookType.HOT_TAKE,
	HookType.BIG_LAUGH,
	HookType.REVELATION,
	HookType.HEATED_DEBATE,
	HookType.EMOTIONAL_PEAK,
	],
	audio_spike_threshold=0.6, # Speech-based
	speech_energy_threshold=0.8, # Emphasis detection
	crowd_noise_threshold=0.7, # Group laughter
	ideal_hook_window=(0.0, 2.0), # Podcasts need quick hooks
	hook_type_weights={
	HookType.HOT_TAKE: 1.0,
	HookType.BIG_LAUGH: 0.95,
	HookType.REVELATION: 0.9,
	HookType.HEATED_DEBATE: 0.85,
	HookType.EMOTIONAL_PEAK: 0.75,
	HookType.SUDDEN_CHANGE: 0.6,
	},
	min_hook_score=0.5,
	),

	"general": ViralHookConfig(
	domain="general",
	priority_hooks=[
	HookType.PEAK_ENERGY,
	HookType.SUDDEN_CHANGE,
	HookType.EMOTIONAL_PEAK,
	],
	audio_spike_threshold=0.7,
	motion_spike_threshold=0.65,
	ideal_hook_window=(0.0, 2.5),
	hook_type_weights={
	HookType.PEAK_ENERGY: 1.0,
	HookType.SUDDEN_CHANGE: 0.9,
	HookType.EMOTIONAL_PEAK: 0.85,
	},
	min_hook_score=0.5,
	),
	}


	class ViralHookDetector:
	"""
	Detects viral hook moments in video segments.

	Analyzes audio, visual, and motion signals to find the best
	starting point for maximum viewer retention.
	"""

	def __init__(self, domain: str = "general"):
	"""
	Initialize hook detector.

	Args:
	domain: Content domain for hook detection
	"""
	self.domain = domain
	self.config = VIRAL_HOOK_CONFIGS.get(domain, VIRAL_HOOK_CONFIGS["general"])
	logger.info(f"ViralHookDetector initialized for domain: {domain}")

	def detect_hooks(
	self,
	timestamps: List[float],
	audio_energy: Optional[List[float]] = None,
	audio_flux: Optional[List[float]] = None,
	audio_centroid: Optional[List[float]] = None,
	visual_scores: Optional[List[float]] = None,
	motion_scores: Optional[List[float]] = None,
	emotions: Optional[List[str]] = None,
	actions: Optional[List[str]] = None,
	) -> List[HookSignal]:
	"""
	Detect hook moments from multi-modal signals.

	Args:
	timestamps: Time points for each data sample
	audio_energy: RMS energy values (0-1)
	audio_flux: Spectral flux values (0-1)
	audio_centroid: Spectral centroid values (0-1)
	visual_scores: Visual hype scores (0-1)
	motion_scores: Motion intensity scores (0-1)
	emotions: Detected emotions per timestamp
	actions: Detected actions per timestamp

	Returns:
	List of detected HookSignals sorted by score
	"""
	hooks = []

	# Detect audio-based hooks
	if audio_energy is not None:
	hooks.extend(self._detect_audio_spikes(timestamps, audio_energy, audio_flux))

	# Detect crowd/laughter from spectral centroid
	if audio_centroid is not None:
	hooks.extend(self._detect_crowd_moments(timestamps, audio_centroid, audio_energy))

	# Detect motion-based hooks
	if motion_scores is not None:
	hooks.extend(self._detect_motion_peaks(timestamps, motion_scores))

	# Detect visual peaks
	if visual_scores is not None:
	hooks.extend(self._detect_visual_peaks(timestamps, visual_scores))

	# Detect emotion-based hooks
	if emotions is not None:
	hooks.extend(self._detect_emotion_hooks(timestamps, emotions))

	# Detect action-based hooks
	if actions is not None:
	hooks.extend(self._detect_action_hooks(timestamps, actions))

	# Sort by score descending
	hooks.sort(key=lambda h: h.score, reverse=True)

	# Filter by minimum score
	hooks = [h for h in hooks if h.score >= self.config.min_hook_score]

	logger.info(f"Detected {len(hooks)} potential hook moments")
	return hooks

	def _detect_audio_spikes(
	self,
	timestamps: List[float],
	energy: List[float],
	flux: Optional[List[float]] = None,
	) -> List[HookSignal]:
	"""Detect sudden audio energy spikes (beat drops, reactions, etc.)"""
	hooks = []

	if len(energy) < 3:
	return hooks

	energy_arr = np.array(energy)
	threshold = self.config.audio_spike_threshold

	# Calculate rolling mean and detect spikes
	window = max(3, int(len(energy) * 0.1))
	rolling_mean = np.convolve(energy_arr, np.ones(window)/window, mode='same')

	for i in range(1, len(energy) - 1):
	# Spike detection: current value significantly above local average
	if energy[i] > threshold and energy[i] > rolling_mean[i] * 1.3:
	# Check if it's a peak (higher than neighbors)
	if energy[i] >= energy[i-1] and energy[i] >= energy[i+1]:
	# Determine hook type based on domain
	if self.domain == "music":
	hook_type = HookType.BEAT_DROP
	elif self.domain == "sports":
	hook_type = HookType.COMMENTATOR_HYPE
	elif self.domain == "gaming":
	hook_type = HookType.RAGE_REACTION
	else:
	hook_type = HookType.PEAK_ENERGY

	intensity = min(1.0, energy[i])
	confidence = min(1.0, (energy[i] - rolling_mean[i]) / 0.3)

	hooks.append(HookSignal(
	timestamp=timestamps[i],
	hook_type=hook_type,
	confidence=confidence,
	intensity=intensity,
	description=f"Audio spike at {timestamps[i]:.1f}s (energy: {energy[i]:.2f})"
	))

	return hooks

	def _detect_crowd_moments(
	self,
	timestamps: List[float],
	centroid: List[float],
	energy: Optional[List[float]] = None,
	) -> List[HookSignal]:
	"""Detect crowd noise / group reactions from spectral characteristics."""
	hooks = []

	threshold = self.config.crowd_noise_threshold

	for i, (ts, cent) in enumerate(zip(timestamps, centroid)):
	# High centroid + high energy = crowd/cheering
	energy_val = energy[i] if energy else 0.5

	if cent > threshold and energy_val > 0.5:
	if self.domain == "sports":
	hook_type = HookType.CROWD_ERUPTION
	elif self.domain == "podcasts":
	hook_type = HookType.BIG_LAUGH
	else:
	hook_type = HookType.PEAK_ENERGY

	intensity = min(1.0, cent * energy_val * 1.5)
	confidence = min(1.0, cent)

	hooks.append(HookSignal(
	timestamp=ts,
	hook_type=hook_type,
	confidence=confidence,
	intensity=intensity,
	description=f"Crowd/group moment at {ts:.1f}s"
	))

	return hooks

	def _detect_motion_peaks(
	self,
	timestamps: List[float],
	motion: List[float],
	) -> List[HookSignal]:
	"""Detect peak motion moments (action, dance, etc.)"""
	hooks = []

	threshold = self.config.motion_spike_threshold
	motion_arr = np.array(motion)

	# Find local maxima above threshold
	for i in range(1, len(motion) - 1):
	if motion[i] > threshold:
	if motion[i] >= motion[i-1] and motion[i] >= motion[i+1]:
	if self.domain == "music":
	hook_type = HookType.DANCE_PEAK
	elif self.domain == "sports":
	hook_type = HookType.REPLAY_WORTHY
	elif self.domain == "gaming":
	hook_type = HookType.CLUTCH_PLAY
	else:
	hook_type = HookType.PEAK_ENERGY

	hooks.append(HookSignal(
	timestamp=timestamps[i],
	hook_type=hook_type,
	confidence=min(1.0, motion[i]),
	intensity=motion[i],
	description=f"High motion at {timestamps[i]:.1f}s"
	))

	return hooks

	def _detect_visual_peaks(
	self,
	timestamps: List[float],
	visual: List[float],
	) -> List[HookSignal]:
	"""Detect visual hype peaks."""
	hooks = []

	# Find timestamps with high visual scores
	threshold = 0.7

	for i, (ts, score) in enumerate(zip(timestamps, visual)):
	if score > threshold:
	hooks.append(HookSignal(
	timestamp=ts,
	hook_type=HookType.VISUAL_CLIMAX if self.domain == "music" else HookType.PEAK_ENERGY,
	confidence=score,
	intensity=score,
	description=f"Visual peak at {ts:.1f}s (score: {score:.2f})"
	))

	return hooks

	def _detect_emotion_hooks(
	self,
	timestamps: List[float],
	emotions: List[str],
	) -> List[HookSignal]:
	"""Detect emotion-based hook moments."""
	hooks = []

	# High-engagement emotions
	hook_emotions = {
	"excitement": (HookType.EMOTIONAL_PEAK, 0.9),
	"joy": (HookType.EMOTIONAL_MOMENT, 0.85),
	"surprise": (HookType.REVEAL if self.domain == "vlogs" else HookType.UNEXPECTED, 0.9),
	"tension": (HookType.CONFRONTATION if self.domain == "vlogs" else HookType.EMOTIONAL_PEAK, 0.8),
	"anger": (HookType.HEATED_DEBATE if self.domain == "podcasts" else HookType.RAGE_REACTION, 0.85),
	}

	for ts, emotion in zip(timestamps, emotions):
	emotion_lower = emotion.lower()
	if emotion_lower in hook_emotions:
	hook_type, intensity = hook_emotions[emotion_lower]
	hooks.append(HookSignal(
	timestamp=ts,
	hook_type=hook_type,
	confidence=0.8,
	intensity=intensity,
	description=f"Emotion '{emotion}' at {ts:.1f}s"
	))

	return hooks

	def _detect_action_hooks(
	self,
	timestamps: List[float],
	actions: List[str],
	) -> List[HookSignal]:
	"""Detect action-based hook moments."""
	hooks = []

	# High-engagement actions by domain
	hook_actions = {
	"sports": {
	"celebration": (HookType.GOAL_MOMENT, 1.0),
	"action": (HookType.REPLAY_WORTHY, 0.85),
	"reaction": (HookType.CROWD_ERUPTION, 0.8),
	},
	"music": {
	"performance": (HookType.VISUAL_CLIMAX, 0.9),
	"action": (HookType.DANCE_PEAK, 0.85),
	},
	"gaming": {
	"action": (HookType.CLUTCH_PLAY, 0.9),
	"reaction": (HookType.RAGE_REACTION, 0.85),
	"celebration": (HookType.ELIMINATION, 0.9),
	},
	"vlogs": {
	"reaction": (HookType.REVEAL, 0.9),
	"celebration": (HookType.EMOTIONAL_MOMENT, 0.85),
	},
	"podcasts": {
	"reaction": (HookType.BIG_LAUGH, 0.85),
	"speech": (HookType.HOT_TAKE, 0.8),
	},
	}

	domain_actions = hook_actions.get(self.domain, {})

	for ts, action in zip(timestamps, actions):
	action_lower = action.lower()
	if action_lower in domain_actions:
	hook_type, intensity = domain_actions[action_lower]
	hooks.append(HookSignal(
	timestamp=ts,
	hook_type=hook_type,
	confidence=0.85,
	intensity=intensity,
	description=f"Action '{action}' at {ts:.1f}s"
	))

	return hooks

	def find_best_clip_start(
	self,
	clip_start: float,
	clip_end: float,
	hooks: List[HookSignal],
	allow_adjustment: float = 3.0,
	) -> Tuple[float, Optional[HookSignal]]:
	"""
	Find the best starting point for a clip based on detected hooks.

	Args:
	clip_start: Original clip start time
	clip_end: Original clip end time
	hooks: Detected hook signals
	allow_adjustment: Max seconds to adjust start backwards

	Returns:
	Tuple of (adjusted_start_time, best_hook_signal)
	"""
	# Find hooks within the ideal window from clip start
	search_start = max(0, clip_start - allow_adjustment)
	search_end = clip_start + self.config.max_hook_search_window

	# Filter hooks in search range
	candidate_hooks = [
	h for h in hooks
	if search_start <= h.timestamp <= search_end
	]

	if not candidate_hooks:
	logger.debug(f"No hooks found for clip at {clip_start:.1f}s")
	return clip_start, None

	# Score each hook based on:
	# 1. Hook quality (score)
	# 2. Position preference (earlier in ideal window = better)
	# 3. Hook type priority for domain

	best_hook = None
	best_score = 0

	for hook in candidate_hooks:
	# Base score from hook quality
	score = hook.score

	# Apply hook type weight
	type_weight = self.config.hook_type_weights.get(hook.hook_type, 0.5)
	score *= type_weight

	# Prefer hooks that land in ideal window
	ideal_start, ideal_end = self.config.ideal_hook_window
	time_from_original = hook.timestamp - clip_start

	if ideal_start <= time_from_original <= ideal_end:
	# Perfect position
	score *= 1.2
	elif time_from_original < ideal_start:
	# Hook is before clip start - we'd need to adjust
	adjustment_needed = clip_start - hook.timestamp
	if adjustment_needed <= allow_adjustment:
	# Penalize based on adjustment needed
	score = (1.0 - adjustment_needed / allow_adjustment 0.3)
	else:
	score *= 0.3 # Heavy penalty
	else:
	# Hook is after ideal window
	score *= 0.8

	if score > best_score:
	best_score = score
	best_hook = hook

	if best_hook:
	# Adjust start to put hook in ideal position
	ideal_position = self.config.ideal_hook_window[0] + 0.5 # Aim for middle of ideal window
	adjusted_start = best_hook.timestamp - ideal_position

	# Don't go before search_start or make clip too short
	adjusted_start = max(search_start, adjusted_start)
	adjusted_start = min(adjusted_start, clip_end - 5.0) # Keep at least 5s

	logger.info(
	f"Adjusted clip start: {clip_start:.1f}s -> {adjusted_start:.1f}s "
	f"(hook: {best_hook.hook_type.value} at {best_hook.timestamp:.1f}s)"
	)

	return adjusted_start, best_hook

	return clip_start, None

	def score_clip_hook_potential(
	self,
	clip_start: float,
	clip_duration: float,
	hooks: List[HookSignal],
	) -> float:
	"""
	Score a clip's viral potential based on hook placement.

	Args:
	clip_start: Clip start time
	clip_duration: Clip duration
	hooks: All detected hooks

	Returns:
	Hook potential score (0-1)
	"""
	clip_end = clip_start + clip_duration

	# Find hooks in the first few seconds of clip
	hook_window = self.config.ideal_hook_window[1]
	early_hooks = [
	h for h in hooks
	if clip_start <= h.timestamp <= clip_start + hook_window
	]

	if not early_hooks:
	return 0.3 # Base score for clips without clear hooks

	# Score based on best hook in opening
	best_hook = max(early_hooks, key=lambda h: h.score)

	# Apply type weight
	type_weight = self.config.hook_type_weights.get(best_hook.hook_type, 0.5)

	return min(1.0, best_hook.score * type_weight * 1.2)


	def get_viral_hook_config(domain: str) -> ViralHookConfig:
	"""Get viral hook configuration for a domain."""
	return VIRAL_HOOK_CONFIGS.get(domain, VIRAL_HOOK_CONFIGS["general"])


	def get_viral_hook_detector(domain: str) -> ViralHookDetector:
	"""Get a viral hook detector for a domain."""
	return ViralHookDetector(domain)


	# Export public interface
	__all__ = [
	"HookType",
	"HookSignal",
	"ViralHookConfig",
	"ViralHookDetector",
	"VIRAL_HOOK_CONFIGS",
	"get_viral_hook_config",
	"get_viral_hook_detector",
	]