viral-clip-generator / utils /clip_detector.py
yasvanthkumar's picture
Create utils/clip_detector.py
b64ffa7 verified
import numpy as np
from transformers import pipeline
import re
class ClipDetector:
def __init__(self):
# Load emotion detection model
self.emotion_classifier = pipeline(
"text-classification",
model="j-hartmann/emotion-english-distilroberta-base",
return_all_scores=True
)
def find_engaging_segments(self, video_path, transcript, max_clips=10):
"""Detect engaging segments from video using multiple signals"""
segments = []
for i, segment in enumerate(transcript['segments']):
# Skip very short segments
duration = segment['end'] - segment['start']
if duration < 20 or duration > 90: # 20-90 seconds ideal for shorts
continue
text = segment['text']
# Analyze emotion
emotions = self.emotion_classifier(text[:512])[0]
emotion_scores = {e['label']: e['score'] for e in emotions}
# Calculate engagement score
engagement_score = self._calculate_engagement(
text,
emotion_scores,
duration
)
# Detect hooks
hook = self._detect_hook(text)
# Generate title
title = self._generate_title(text, emotion_scores)
# Determine caption style based on emotion
caption_style = self._determine_caption_style(emotion_scores)
# Suggest sound effects
sound_effects = self._suggest_sound_effects(emotion_scores, text)
segments.append({
'start': segment['start'],
'end': segment['end'],
'start_time': self._format_time(segment['start']),
'end_time': self._format_time(segment['end']),
'duration': self._format_duration(duration),
'text': text,
'emotions': emotion_scores,
'engagement_score': engagement_score,
'hook': hook,
'title': title,
'caption_style': caption_style,
'sound_effects': sound_effects,
'emotion': max(emotion_scores, key=emotion_scores.get)
})
# Sort by engagement score
segments.sort(key=lambda x: x['engagement_score'], reverse=True)
return segments[:max_clips]
def _calculate_engagement(self, text, emotions, duration):
"""Calculate engagement score based on multiple factors"""
score = 0
# Emotional intensity (excitement, surprise, joy)
score += emotions.get('joy', 0) * 30
score += emotions.get('surprise', 0) * 35
score += emotions.get('anger', 0) * 20 # Controversy can be engaging
# Hook words
hook_words = ['wait', 'watch', 'look', 'amazing', 'incredible', 'shocking',
'unbelievable', 'secret', 'reveal', 'must', 'never']
hook_count = sum(1 for word in hook_words if word in text.lower())
score += hook_count * 5
# Question marks (engagement)
score += text.count('?') * 3
# Ideal duration bonus (30-60 seconds)
if 30 <= duration <= 60:
score += 10
# Numbers (concrete, specific)
if re.search(r'\d+', text):
score += 5
return min(score, 100)
def _detect_hook(self, text):
"""Detect or generate a hook for the clip"""
text_lower = text.lower()
# Common hook patterns
if 'wait' in text_lower:
return '"Wait until you see this..."'
elif 'how' in text_lower or 'why' in text_lower:
return '"You need to know this..."'
elif 'never' in text_lower:
return '"This changes everything..."'
elif '?' in text:
sentences = text.split('.')
for s in sentences:
if '?' in s:
return f'"{s.strip()}"'
# Extract first compelling sentence
sentences = text.split('.')
if sentences:
return f'"{sentences[0].strip()}..."'
return '"Watch what happens next..."'
def _generate_title(self, text, emotions):
"""Generate catchy title based on content and emotion"""
top_emotion = max(emotions, key=emotions.get)
emotion_emojis = {
'joy': '🎉',
'surprise': '😱',
'anger': '😤',
'sadness': '😢',
'fear': '😨',
'disgust': '🤢',
'neutral': '🎯'
}
emoji = emotion_emojis.get(top_emotion, '✨')
# Extract key words
words = text.split()[:10]
if 'amazing' in text.lower() or 'incredible' in text.lower():
return f'{emoji} Mind-Blowing Moment'
elif 'fail' in text.lower() or 'wrong' in text.lower():
return f'{emoji} Epic Fail'
elif 'how' in text.lower() or 'why' in text.lower():
return f'{emoji} Life-Changing Tip'
elif '?' in text:
return f'{emoji} The Answer Revealed'
else:
return f'{emoji} Must-Watch Moment'
def _determine_caption_style(self, emotions):
"""Determine caption color/style based on emotion"""
top_emotion = max(emotions, key=emotions.get)
styles = {
'joy': 'Bold Yellow with Pop',
'surprise': 'Red Bold with Shake',
'anger': 'Orange Bold with Pulse',
'sadness': 'Blue Soft',
'fear': 'Purple Bold',
'neutral': 'White Bold with Glow'
}
return styles.get(top_emotion, 'White Bold')
def _suggest_sound_effects(self, emotions, text):
"""Suggest appropriate sound effects"""
effects = []
top_emotion = max(emotions, key=emotions.get)
# Emotion-based effects
if top_emotion == 'surprise':
effects.extend(['Dramatic Boom', 'Record Scratch'])
elif top_emotion == 'joy':
effects.extend(['Success Ding', 'Celebration'])
elif top_emotion == 'anger':
effects.extend(['Dramatic Whoosh', 'Impact'])
# Text-based effects
if any(word in text.lower() for word in ['wait', 'watch', 'look']):
effects.append('Suspense Build')
if '?' in text:
effects.append('Thinking Sound')
if not effects:
effects = ['Whoosh', 'Transition']
return effects[:3] # Max 3 effects
def _format_time(self, seconds):
"""Format seconds to MM:SS"""
mins = int(seconds // 60)
secs = int(seconds % 60)
return f"{mins}:{secs:02d}"
def _format_duration(self, seconds):
"""Format duration"""
return f"0:{int(seconds):02d}"