Chaitanya-aitf commited on
Commit
fa7dc30
·
verified ·
1 Parent(s): c4ee290

Create viral_hooks.py

Browse files
Files changed (1) hide show
  1. scoring/viral_hooks.py +703 -0
scoring/viral_hooks.py ADDED
@@ -0,0 +1,703 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ShortSmith v2 - Viral Hooks Module
3
+
4
+ Optimizes clip start points for maximum viral potential.
5
+ The first 1-3 seconds determine if viewers keep watching.
6
+
7
+ Research-backed viral triggers by content type:
8
+ - Sports: Peak action moments, crowd eruptions, commentator hype
9
+ - Music: Beat drops, chorus hits, dance peaks
10
+ - Gaming: Clutch plays, reactions, unexpected moments
11
+ - Vlogs: Emotional peaks, reveals, punch lines
12
+ - Podcasts: Hot takes, laughs, controversial statements
13
+
14
+ Each domain has specific "hook triggers" that maximize retention.
15
+ """
16
+
17
+ from dataclasses import dataclass, field
18
+ from typing import List, Dict, Optional, Tuple
19
+ from enum import Enum
20
+ import numpy as np
21
+
22
+ from utils.logger import get_logger
23
+
24
+ logger = get_logger("scoring.viral_hooks")
25
+
26
+
27
+ class HookType(Enum):
28
+ """Types of viral hook moments."""
29
+ # Universal hooks
30
+ PEAK_ENERGY = "peak_energy" # Maximum audio/visual energy
31
+ SUDDEN_CHANGE = "sudden_change" # Dramatic shift in content
32
+ EMOTIONAL_PEAK = "emotional_peak" # High emotion moment
33
+
34
+ # Sports-specific
35
+ GOAL_MOMENT = "goal_moment" # Scoring play
36
+ CROWD_ERUPTION = "crowd_eruption" # Crowd going wild
37
+ COMMENTATOR_HYPE = "commentator_hype" # Excited commentary
38
+ REPLAY_WORTHY = "replay_worthy" # Highlight reel moment
39
+
40
+ # Music-specific
41
+ BEAT_DROP = "beat_drop" # Bass drop / beat switch
42
+ CHORUS_HIT = "chorus_hit" # Chorus start
43
+ DANCE_PEAK = "dance_peak" # Peak choreography
44
+ VISUAL_CLIMAX = "visual_climax" # Visual spectacle
45
+
46
+ # Gaming-specific
47
+ CLUTCH_PLAY = "clutch_play" # Skill moment
48
+ ELIMINATION = "elimination" # Kill/win moment
49
+ RAGE_REACTION = "rage_reaction" # Streamer reaction
50
+ UNEXPECTED = "unexpected" # Plot twist / surprise
51
+
52
+ # Vlog-specific
53
+ REVEAL = "reveal" # Surprise reveal
54
+ PUNCHLINE = "punchline" # Joke landing
55
+ EMOTIONAL_MOMENT = "emotional_moment" # Tears/joy/shock
56
+ CONFRONTATION = "confrontation" # Drama/tension
57
+
58
+ # Podcast-specific
59
+ HOT_TAKE = "hot_take" # Controversial opinion
60
+ BIG_LAUGH = "big_laugh" # Group laughter
61
+ REVELATION = "revelation" # Surprising info
62
+ HEATED_DEBATE = "heated_debate" # Argument/passion
63
+
64
+
65
+ @dataclass
66
+ class HookSignal:
67
+ """A detected hook signal at a specific timestamp."""
68
+ timestamp: float
69
+ hook_type: HookType
70
+ confidence: float # 0-1, how confident we are this is a hook
71
+ intensity: float # 0-1, how strong the hook is
72
+ description: str # Human readable description
73
+
74
+ @property
75
+ def score(self) -> float:
76
+ """Combined hook score."""
77
+ return self.confidence * self.intensity
78
+
79
+
80
+ @dataclass
81
+ class ViralHookConfig:
82
+ """Configuration for viral hook detection per domain."""
83
+ domain: str
84
+
85
+ # Which hook types to look for (in priority order)
86
+ priority_hooks: List[HookType] = field(default_factory=list)
87
+
88
+ # Audio thresholds
89
+ audio_spike_threshold: float = 0.7 # RMS energy spike to detect
90
+ audio_spike_window: float = 0.5 # Seconds to detect spike
91
+ crowd_noise_threshold: float = 0.6 # Spectral centroid for crowd
92
+ speech_energy_threshold: float = 0.8 # For commentator/speaker hype
93
+
94
+ # Visual thresholds
95
+ motion_spike_threshold: float = 0.7 # Sudden motion increase
96
+ scene_change_weight: float = 0.3 # Weight for scene transitions
97
+ emotion_threshold: float = 0.7 # For detected emotions
98
+
99
+ # Timing preferences
100
+ ideal_hook_window: Tuple[float, float] = (0.0, 2.0) # Seconds from clip start
101
+ max_hook_search_window: float = 5.0 # How far to search for hook
102
+
103
+ # Hook scoring weights
104
+ hook_type_weights: Dict[HookType, float] = field(default_factory=dict)
105
+
106
+ # Minimum score to consider a valid hook
107
+ min_hook_score: float = 0.5
108
+
109
+
110
+ # Domain-specific viral hook configurations
111
+ VIRAL_HOOK_CONFIGS: Dict[str, ViralHookConfig] = {
112
+
113
+ "sports": ViralHookConfig(
114
+ domain="sports",
115
+ priority_hooks=[
116
+ HookType.GOAL_MOMENT,
117
+ HookType.CROWD_ERUPTION,
118
+ HookType.COMMENTATOR_HYPE,
119
+ HookType.REPLAY_WORTHY,
120
+ HookType.PEAK_ENERGY,
121
+ ],
122
+ audio_spike_threshold=0.75, # Sports has loud moments
123
+ crowd_noise_threshold=0.65, # Crowd detection
124
+ speech_energy_threshold=0.85, # Commentator excitement
125
+ motion_spike_threshold=0.7, # Action detection
126
+ ideal_hook_window=(0.0, 1.5), # Sports hooks need to be immediate
127
+ hook_type_weights={
128
+ HookType.GOAL_MOMENT: 1.0,
129
+ HookType.CROWD_ERUPTION: 0.95,
130
+ HookType.COMMENTATOR_HYPE: 0.9,
131
+ HookType.REPLAY_WORTHY: 0.85,
132
+ HookType.PEAK_ENERGY: 0.8,
133
+ HookType.SUDDEN_CHANGE: 0.6,
134
+ },
135
+ min_hook_score=0.6,
136
+ ),
137
+
138
+ "music": ViralHookConfig(
139
+ domain="music",
140
+ priority_hooks=[
141
+ HookType.BEAT_DROP,
142
+ HookType.CHORUS_HIT,
143
+ HookType.DANCE_PEAK,
144
+ HookType.VISUAL_CLIMAX,
145
+ HookType.PEAK_ENERGY,
146
+ ],
147
+ audio_spike_threshold=0.8, # Beat drops are loud
148
+ audio_spike_window=0.3, # Quick detection for beats
149
+ motion_spike_threshold=0.65, # Dance moves
150
+ ideal_hook_window=(0.0, 2.0), # Can build slightly
151
+ hook_type_weights={
152
+ HookType.BEAT_DROP: 1.0,
153
+ HookType.CHORUS_HIT: 0.95,
154
+ HookType.DANCE_PEAK: 0.85,
155
+ HookType.VISUAL_CLIMAX: 0.8,
156
+ HookType.PEAK_ENERGY: 0.75,
157
+ HookType.SUDDEN_CHANGE: 0.7,
158
+ },
159
+ min_hook_score=0.55,
160
+ ),
161
+
162
+ "gaming": ViralHookConfig(
163
+ domain="gaming",
164
+ priority_hooks=[
165
+ HookType.CLUTCH_PLAY,
166
+ HookType.ELIMINATION,
167
+ HookType.RAGE_REACTION,
168
+ HookType.UNEXPECTED,
169
+ HookType.PEAK_ENERGY,
170
+ ],
171
+ audio_spike_threshold=0.7, # Streamer reactions
172
+ speech_energy_threshold=0.75, # Voice reactions
173
+ motion_spike_threshold=0.6, # Gameplay action
174
+ ideal_hook_window=(0.0, 2.5), # Gaming can have slight buildup
175
+ hook_type_weights={
176
+ HookType.CLUTCH_PLAY: 1.0,
177
+ HookType.ELIMINATION: 0.95,
178
+ HookType.RAGE_REACTION: 0.9,
179
+ HookType.UNEXPECTED: 0.85,
180
+ HookType.PEAK_ENERGY: 0.75,
181
+ HookType.EMOTIONAL_PEAK: 0.7,
182
+ },
183
+ min_hook_score=0.5,
184
+ ),
185
+
186
+ "vlogs": ViralHookConfig(
187
+ domain="vlogs",
188
+ priority_hooks=[
189
+ HookType.REVEAL,
190
+ HookType.PUNCHLINE,
191
+ HookType.EMOTIONAL_MOMENT,
192
+ HookType.CONFRONTATION,
193
+ HookType.EMOTIONAL_PEAK,
194
+ ],
195
+ audio_spike_threshold=0.65, # Reactions less loud
196
+ speech_energy_threshold=0.7, # Speaking emphasis
197
+ emotion_threshold=0.65, # Facial emotions
198
+ ideal_hook_window=(0.0, 3.0), # Vlogs can have more buildup
199
+ hook_type_weights={
200
+ HookType.REVEAL: 1.0,
201
+ HookType.PUNCHLINE: 0.95,
202
+ HookType.EMOTIONAL_MOMENT: 0.9,
203
+ HookType.CONFRONTATION: 0.85,
204
+ HookType.EMOTIONAL_PEAK: 0.8,
205
+ HookType.SUDDEN_CHANGE: 0.7,
206
+ },
207
+ min_hook_score=0.45,
208
+ ),
209
+
210
+ "podcasts": ViralHookConfig(
211
+ domain="podcasts",
212
+ priority_hooks=[
213
+ HookType.HOT_TAKE,
214
+ HookType.BIG_LAUGH,
215
+ HookType.REVELATION,
216
+ HookType.HEATED_DEBATE,
217
+ HookType.EMOTIONAL_PEAK,
218
+ ],
219
+ audio_spike_threshold=0.6, # Speech-based
220
+ speech_energy_threshold=0.8, # Emphasis detection
221
+ crowd_noise_threshold=0.7, # Group laughter
222
+ ideal_hook_window=(0.0, 2.0), # Podcasts need quick hooks
223
+ hook_type_weights={
224
+ HookType.HOT_TAKE: 1.0,
225
+ HookType.BIG_LAUGH: 0.95,
226
+ HookType.REVELATION: 0.9,
227
+ HookType.HEATED_DEBATE: 0.85,
228
+ HookType.EMOTIONAL_PEAK: 0.75,
229
+ HookType.SUDDEN_CHANGE: 0.6,
230
+ },
231
+ min_hook_score=0.5,
232
+ ),
233
+
234
+ "general": ViralHookConfig(
235
+ domain="general",
236
+ priority_hooks=[
237
+ HookType.PEAK_ENERGY,
238
+ HookType.SUDDEN_CHANGE,
239
+ HookType.EMOTIONAL_PEAK,
240
+ ],
241
+ audio_spike_threshold=0.7,
242
+ motion_spike_threshold=0.65,
243
+ ideal_hook_window=(0.0, 2.5),
244
+ hook_type_weights={
245
+ HookType.PEAK_ENERGY: 1.0,
246
+ HookType.SUDDEN_CHANGE: 0.9,
247
+ HookType.EMOTIONAL_PEAK: 0.85,
248
+ },
249
+ min_hook_score=0.5,
250
+ ),
251
+ }
252
+
253
+
254
+ class ViralHookDetector:
255
+ """
256
+ Detects viral hook moments in video segments.
257
+
258
+ Analyzes audio, visual, and motion signals to find the best
259
+ starting point for maximum viewer retention.
260
+ """
261
+
262
+ def __init__(self, domain: str = "general"):
263
+ """
264
+ Initialize hook detector.
265
+
266
+ Args:
267
+ domain: Content domain for hook detection
268
+ """
269
+ self.domain = domain
270
+ self.config = VIRAL_HOOK_CONFIGS.get(domain, VIRAL_HOOK_CONFIGS["general"])
271
+ logger.info(f"ViralHookDetector initialized for domain: {domain}")
272
+
273
+ def detect_hooks(
274
+ self,
275
+ timestamps: List[float],
276
+ audio_energy: Optional[List[float]] = None,
277
+ audio_flux: Optional[List[float]] = None,
278
+ audio_centroid: Optional[List[float]] = None,
279
+ visual_scores: Optional[List[float]] = None,
280
+ motion_scores: Optional[List[float]] = None,
281
+ emotions: Optional[List[str]] = None,
282
+ actions: Optional[List[str]] = None,
283
+ ) -> List[HookSignal]:
284
+ """
285
+ Detect hook moments from multi-modal signals.
286
+
287
+ Args:
288
+ timestamps: Time points for each data sample
289
+ audio_energy: RMS energy values (0-1)
290
+ audio_flux: Spectral flux values (0-1)
291
+ audio_centroid: Spectral centroid values (0-1)
292
+ visual_scores: Visual hype scores (0-1)
293
+ motion_scores: Motion intensity scores (0-1)
294
+ emotions: Detected emotions per timestamp
295
+ actions: Detected actions per timestamp
296
+
297
+ Returns:
298
+ List of detected HookSignals sorted by score
299
+ """
300
+ hooks = []
301
+
302
+ # Detect audio-based hooks
303
+ if audio_energy is not None:
304
+ hooks.extend(self._detect_audio_spikes(timestamps, audio_energy, audio_flux))
305
+
306
+ # Detect crowd/laughter from spectral centroid
307
+ if audio_centroid is not None:
308
+ hooks.extend(self._detect_crowd_moments(timestamps, audio_centroid, audio_energy))
309
+
310
+ # Detect motion-based hooks
311
+ if motion_scores is not None:
312
+ hooks.extend(self._detect_motion_peaks(timestamps, motion_scores))
313
+
314
+ # Detect visual peaks
315
+ if visual_scores is not None:
316
+ hooks.extend(self._detect_visual_peaks(timestamps, visual_scores))
317
+
318
+ # Detect emotion-based hooks
319
+ if emotions is not None:
320
+ hooks.extend(self._detect_emotion_hooks(timestamps, emotions))
321
+
322
+ # Detect action-based hooks
323
+ if actions is not None:
324
+ hooks.extend(self._detect_action_hooks(timestamps, actions))
325
+
326
+ # Sort by score descending
327
+ hooks.sort(key=lambda h: h.score, reverse=True)
328
+
329
+ # Filter by minimum score
330
+ hooks = [h for h in hooks if h.score >= self.config.min_hook_score]
331
+
332
+ logger.info(f"Detected {len(hooks)} potential hook moments")
333
+ return hooks
334
+
335
+ def _detect_audio_spikes(
336
+ self,
337
+ timestamps: List[float],
338
+ energy: List[float],
339
+ flux: Optional[List[float]] = None,
340
+ ) -> List[HookSignal]:
341
+ """Detect sudden audio energy spikes (beat drops, reactions, etc.)"""
342
+ hooks = []
343
+
344
+ if len(energy) < 3:
345
+ return hooks
346
+
347
+ energy_arr = np.array(energy)
348
+ threshold = self.config.audio_spike_threshold
349
+
350
+ # Calculate rolling mean and detect spikes
351
+ window = max(3, int(len(energy) * 0.1))
352
+ rolling_mean = np.convolve(energy_arr, np.ones(window)/window, mode='same')
353
+
354
+ for i in range(1, len(energy) - 1):
355
+ # Spike detection: current value significantly above local average
356
+ if energy[i] > threshold and energy[i] > rolling_mean[i] * 1.3:
357
+ # Check if it's a peak (higher than neighbors)
358
+ if energy[i] >= energy[i-1] and energy[i] >= energy[i+1]:
359
+ # Determine hook type based on domain
360
+ if self.domain == "music":
361
+ hook_type = HookType.BEAT_DROP
362
+ elif self.domain == "sports":
363
+ hook_type = HookType.COMMENTATOR_HYPE
364
+ elif self.domain == "gaming":
365
+ hook_type = HookType.RAGE_REACTION
366
+ else:
367
+ hook_type = HookType.PEAK_ENERGY
368
+
369
+ intensity = min(1.0, energy[i])
370
+ confidence = min(1.0, (energy[i] - rolling_mean[i]) / 0.3)
371
+
372
+ hooks.append(HookSignal(
373
+ timestamp=timestamps[i],
374
+ hook_type=hook_type,
375
+ confidence=confidence,
376
+ intensity=intensity,
377
+ description=f"Audio spike at {timestamps[i]:.1f}s (energy: {energy[i]:.2f})"
378
+ ))
379
+
380
+ return hooks
381
+
382
+ def _detect_crowd_moments(
383
+ self,
384
+ timestamps: List[float],
385
+ centroid: List[float],
386
+ energy: Optional[List[float]] = None,
387
+ ) -> List[HookSignal]:
388
+ """Detect crowd noise / group reactions from spectral characteristics."""
389
+ hooks = []
390
+
391
+ threshold = self.config.crowd_noise_threshold
392
+
393
+ for i, (ts, cent) in enumerate(zip(timestamps, centroid)):
394
+ # High centroid + high energy = crowd/cheering
395
+ energy_val = energy[i] if energy else 0.5
396
+
397
+ if cent > threshold and energy_val > 0.5:
398
+ if self.domain == "sports":
399
+ hook_type = HookType.CROWD_ERUPTION
400
+ elif self.domain == "podcasts":
401
+ hook_type = HookType.BIG_LAUGH
402
+ else:
403
+ hook_type = HookType.PEAK_ENERGY
404
+
405
+ intensity = min(1.0, cent * energy_val * 1.5)
406
+ confidence = min(1.0, cent)
407
+
408
+ hooks.append(HookSignal(
409
+ timestamp=ts,
410
+ hook_type=hook_type,
411
+ confidence=confidence,
412
+ intensity=intensity,
413
+ description=f"Crowd/group moment at {ts:.1f}s"
414
+ ))
415
+
416
+ return hooks
417
+
418
+ def _detect_motion_peaks(
419
+ self,
420
+ timestamps: List[float],
421
+ motion: List[float],
422
+ ) -> List[HookSignal]:
423
+ """Detect peak motion moments (action, dance, etc.)"""
424
+ hooks = []
425
+
426
+ threshold = self.config.motion_spike_threshold
427
+ motion_arr = np.array(motion)
428
+
429
+ # Find local maxima above threshold
430
+ for i in range(1, len(motion) - 1):
431
+ if motion[i] > threshold:
432
+ if motion[i] >= motion[i-1] and motion[i] >= motion[i+1]:
433
+ if self.domain == "music":
434
+ hook_type = HookType.DANCE_PEAK
435
+ elif self.domain == "sports":
436
+ hook_type = HookType.REPLAY_WORTHY
437
+ elif self.domain == "gaming":
438
+ hook_type = HookType.CLUTCH_PLAY
439
+ else:
440
+ hook_type = HookType.PEAK_ENERGY
441
+
442
+ hooks.append(HookSignal(
443
+ timestamp=timestamps[i],
444
+ hook_type=hook_type,
445
+ confidence=min(1.0, motion[i]),
446
+ intensity=motion[i],
447
+ description=f"High motion at {timestamps[i]:.1f}s"
448
+ ))
449
+
450
+ return hooks
451
+
452
+ def _detect_visual_peaks(
453
+ self,
454
+ timestamps: List[float],
455
+ visual: List[float],
456
+ ) -> List[HookSignal]:
457
+ """Detect visual hype peaks."""
458
+ hooks = []
459
+
460
+ # Find timestamps with high visual scores
461
+ threshold = 0.7
462
+
463
+ for i, (ts, score) in enumerate(zip(timestamps, visual)):
464
+ if score > threshold:
465
+ hooks.append(HookSignal(
466
+ timestamp=ts,
467
+ hook_type=HookType.VISUAL_CLIMAX if self.domain == "music" else HookType.PEAK_ENERGY,
468
+ confidence=score,
469
+ intensity=score,
470
+ description=f"Visual peak at {ts:.1f}s (score: {score:.2f})"
471
+ ))
472
+
473
+ return hooks
474
+
475
+ def _detect_emotion_hooks(
476
+ self,
477
+ timestamps: List[float],
478
+ emotions: List[str],
479
+ ) -> List[HookSignal]:
480
+ """Detect emotion-based hook moments."""
481
+ hooks = []
482
+
483
+ # High-engagement emotions
484
+ hook_emotions = {
485
+ "excitement": (HookType.EMOTIONAL_PEAK, 0.9),
486
+ "joy": (HookType.EMOTIONAL_MOMENT, 0.85),
487
+ "surprise": (HookType.REVEAL if self.domain == "vlogs" else HookType.UNEXPECTED, 0.9),
488
+ "tension": (HookType.CONFRONTATION if self.domain == "vlogs" else HookType.EMOTIONAL_PEAK, 0.8),
489
+ "anger": (HookType.HEATED_DEBATE if self.domain == "podcasts" else HookType.RAGE_REACTION, 0.85),
490
+ }
491
+
492
+ for ts, emotion in zip(timestamps, emotions):
493
+ emotion_lower = emotion.lower()
494
+ if emotion_lower in hook_emotions:
495
+ hook_type, intensity = hook_emotions[emotion_lower]
496
+ hooks.append(HookSignal(
497
+ timestamp=ts,
498
+ hook_type=hook_type,
499
+ confidence=0.8,
500
+ intensity=intensity,
501
+ description=f"Emotion '{emotion}' at {ts:.1f}s"
502
+ ))
503
+
504
+ return hooks
505
+
506
+ def _detect_action_hooks(
507
+ self,
508
+ timestamps: List[float],
509
+ actions: List[str],
510
+ ) -> List[HookSignal]:
511
+ """Detect action-based hook moments."""
512
+ hooks = []
513
+
514
+ # High-engagement actions by domain
515
+ hook_actions = {
516
+ "sports": {
517
+ "celebration": (HookType.GOAL_MOMENT, 1.0),
518
+ "action": (HookType.REPLAY_WORTHY, 0.85),
519
+ "reaction": (HookType.CROWD_ERUPTION, 0.8),
520
+ },
521
+ "music": {
522
+ "performance": (HookType.VISUAL_CLIMAX, 0.9),
523
+ "action": (HookType.DANCE_PEAK, 0.85),
524
+ },
525
+ "gaming": {
526
+ "action": (HookType.CLUTCH_PLAY, 0.9),
527
+ "reaction": (HookType.RAGE_REACTION, 0.85),
528
+ "celebration": (HookType.ELIMINATION, 0.9),
529
+ },
530
+ "vlogs": {
531
+ "reaction": (HookType.REVEAL, 0.9),
532
+ "celebration": (HookType.EMOTIONAL_MOMENT, 0.85),
533
+ },
534
+ "podcasts": {
535
+ "reaction": (HookType.BIG_LAUGH, 0.85),
536
+ "speech": (HookType.HOT_TAKE, 0.8),
537
+ },
538
+ }
539
+
540
+ domain_actions = hook_actions.get(self.domain, {})
541
+
542
+ for ts, action in zip(timestamps, actions):
543
+ action_lower = action.lower()
544
+ if action_lower in domain_actions:
545
+ hook_type, intensity = domain_actions[action_lower]
546
+ hooks.append(HookSignal(
547
+ timestamp=ts,
548
+ hook_type=hook_type,
549
+ confidence=0.85,
550
+ intensity=intensity,
551
+ description=f"Action '{action}' at {ts:.1f}s"
552
+ ))
553
+
554
+ return hooks
555
+
556
+ def find_best_clip_start(
557
+ self,
558
+ clip_start: float,
559
+ clip_end: float,
560
+ hooks: List[HookSignal],
561
+ allow_adjustment: float = 3.0,
562
+ ) -> Tuple[float, Optional[HookSignal]]:
563
+ """
564
+ Find the best starting point for a clip based on detected hooks.
565
+
566
+ Args:
567
+ clip_start: Original clip start time
568
+ clip_end: Original clip end time
569
+ hooks: Detected hook signals
570
+ allow_adjustment: Max seconds to adjust start backwards
571
+
572
+ Returns:
573
+ Tuple of (adjusted_start_time, best_hook_signal)
574
+ """
575
+ # Find hooks within the ideal window from clip start
576
+ search_start = max(0, clip_start - allow_adjustment)
577
+ search_end = clip_start + self.config.max_hook_search_window
578
+
579
+ # Filter hooks in search range
580
+ candidate_hooks = [
581
+ h for h in hooks
582
+ if search_start <= h.timestamp <= search_end
583
+ ]
584
+
585
+ if not candidate_hooks:
586
+ logger.debug(f"No hooks found for clip at {clip_start:.1f}s")
587
+ return clip_start, None
588
+
589
+ # Score each hook based on:
590
+ # 1. Hook quality (score)
591
+ # 2. Position preference (earlier in ideal window = better)
592
+ # 3. Hook type priority for domain
593
+
594
+ best_hook = None
595
+ best_score = 0
596
+
597
+ for hook in candidate_hooks:
598
+ # Base score from hook quality
599
+ score = hook.score
600
+
601
+ # Apply hook type weight
602
+ type_weight = self.config.hook_type_weights.get(hook.hook_type, 0.5)
603
+ score *= type_weight
604
+
605
+ # Prefer hooks that land in ideal window
606
+ ideal_start, ideal_end = self.config.ideal_hook_window
607
+ time_from_original = hook.timestamp - clip_start
608
+
609
+ if ideal_start <= time_from_original <= ideal_end:
610
+ # Perfect position
611
+ score *= 1.2
612
+ elif time_from_original < ideal_start:
613
+ # Hook is before clip start - we'd need to adjust
614
+ adjustment_needed = clip_start - hook.timestamp
615
+ if adjustment_needed <= allow_adjustment:
616
+ # Penalize based on adjustment needed
617
+ score *= (1.0 - adjustment_needed / allow_adjustment * 0.3)
618
+ else:
619
+ score *= 0.3 # Heavy penalty
620
+ else:
621
+ # Hook is after ideal window
622
+ score *= 0.8
623
+
624
+ if score > best_score:
625
+ best_score = score
626
+ best_hook = hook
627
+
628
+ if best_hook:
629
+ # Adjust start to put hook in ideal position
630
+ ideal_position = self.config.ideal_hook_window[0] + 0.5 # Aim for middle of ideal window
631
+ adjusted_start = best_hook.timestamp - ideal_position
632
+
633
+ # Don't go before search_start or make clip too short
634
+ adjusted_start = max(search_start, adjusted_start)
635
+ adjusted_start = min(adjusted_start, clip_end - 5.0) # Keep at least 5s
636
+
637
+ logger.info(
638
+ f"Adjusted clip start: {clip_start:.1f}s -> {adjusted_start:.1f}s "
639
+ f"(hook: {best_hook.hook_type.value} at {best_hook.timestamp:.1f}s)"
640
+ )
641
+
642
+ return adjusted_start, best_hook
643
+
644
+ return clip_start, None
645
+
646
+ def score_clip_hook_potential(
647
+ self,
648
+ clip_start: float,
649
+ clip_duration: float,
650
+ hooks: List[HookSignal],
651
+ ) -> float:
652
+ """
653
+ Score a clip's viral potential based on hook placement.
654
+
655
+ Args:
656
+ clip_start: Clip start time
657
+ clip_duration: Clip duration
658
+ hooks: All detected hooks
659
+
660
+ Returns:
661
+ Hook potential score (0-1)
662
+ """
663
+ clip_end = clip_start + clip_duration
664
+
665
+ # Find hooks in the first few seconds of clip
666
+ hook_window = self.config.ideal_hook_window[1]
667
+ early_hooks = [
668
+ h for h in hooks
669
+ if clip_start <= h.timestamp <= clip_start + hook_window
670
+ ]
671
+
672
+ if not early_hooks:
673
+ return 0.3 # Base score for clips without clear hooks
674
+
675
+ # Score based on best hook in opening
676
+ best_hook = max(early_hooks, key=lambda h: h.score)
677
+
678
+ # Apply type weight
679
+ type_weight = self.config.hook_type_weights.get(best_hook.hook_type, 0.5)
680
+
681
+ return min(1.0, best_hook.score * type_weight * 1.2)
682
+
683
+
684
+ def get_viral_hook_config(domain: str) -> ViralHookConfig:
685
+ """Get viral hook configuration for a domain."""
686
+ return VIRAL_HOOK_CONFIGS.get(domain, VIRAL_HOOK_CONFIGS["general"])
687
+
688
+
689
+ def get_viral_hook_detector(domain: str) -> ViralHookDetector:
690
+ """Get a viral hook detector for a domain."""
691
+ return ViralHookDetector(domain)
692
+
693
+
694
+ # Export public interface
695
+ __all__ = [
696
+ "HookType",
697
+ "HookSignal",
698
+ "ViralHookConfig",
699
+ "ViralHookDetector",
700
+ "VIRAL_HOOK_CONFIGS",
701
+ "get_viral_hook_config",
702
+ "get_viral_hook_detector",
703
+ ]