yasvanthkumar commited on
Commit
b64ffa7
·
verified ·
1 Parent(s): 06deb2d

Create utils/clip_detector.py

Browse files
Files changed (1) hide show
  1. utils/clip_detector.py +200 -0
utils/clip_detector.py ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from transformers import pipeline
3
+ import re
4
+
5
+ class ClipDetector:
6
+ def __init__(self):
7
+ # Load emotion detection model
8
+ self.emotion_classifier = pipeline(
9
+ "text-classification",
10
+ model="j-hartmann/emotion-english-distilroberta-base",
11
+ return_all_scores=True
12
+ )
13
+
14
+ def find_engaging_segments(self, video_path, transcript, max_clips=10):
15
+ """Detect engaging segments from video using multiple signals"""
16
+ segments = []
17
+
18
+ for i, segment in enumerate(transcript['segments']):
19
+ # Skip very short segments
20
+ duration = segment['end'] - segment['start']
21
+ if duration < 20 or duration > 90: # 20-90 seconds ideal for shorts
22
+ continue
23
+
24
+ text = segment['text']
25
+
26
+ # Analyze emotion
27
+ emotions = self.emotion_classifier(text[:512])[0]
28
+ emotion_scores = {e['label']: e['score'] for e in emotions}
29
+
30
+ # Calculate engagement score
31
+ engagement_score = self._calculate_engagement(
32
+ text,
33
+ emotion_scores,
34
+ duration
35
+ )
36
+
37
+ # Detect hooks
38
+ hook = self._detect_hook(text)
39
+
40
+ # Generate title
41
+ title = self._generate_title(text, emotion_scores)
42
+
43
+ # Determine caption style based on emotion
44
+ caption_style = self._determine_caption_style(emotion_scores)
45
+
46
+ # Suggest sound effects
47
+ sound_effects = self._suggest_sound_effects(emotion_scores, text)
48
+
49
+ segments.append({
50
+ 'start': segment['start'],
51
+ 'end': segment['end'],
52
+ 'start_time': self._format_time(segment['start']),
53
+ 'end_time': self._format_time(segment['end']),
54
+ 'duration': self._format_duration(duration),
55
+ 'text': text,
56
+ 'emotions': emotion_scores,
57
+ 'engagement_score': engagement_score,
58
+ 'hook': hook,
59
+ 'title': title,
60
+ 'caption_style': caption_style,
61
+ 'sound_effects': sound_effects,
62
+ 'emotion': max(emotion_scores, key=emotion_scores.get)
63
+ })
64
+
65
+ # Sort by engagement score
66
+ segments.sort(key=lambda x: x['engagement_score'], reverse=True)
67
+
68
+ return segments[:max_clips]
69
+
70
+ def _calculate_engagement(self, text, emotions, duration):
71
+ """Calculate engagement score based on multiple factors"""
72
+ score = 0
73
+
74
+ # Emotional intensity (excitement, surprise, joy)
75
+ score += emotions.get('joy', 0) * 30
76
+ score += emotions.get('surprise', 0) * 35
77
+ score += emotions.get('anger', 0) * 20 # Controversy can be engaging
78
+
79
+ # Hook words
80
+ hook_words = ['wait', 'watch', 'look', 'amazing', 'incredible', 'shocking',
81
+ 'unbelievable', 'secret', 'reveal', 'must', 'never']
82
+ hook_count = sum(1 for word in hook_words if word in text.lower())
83
+ score += hook_count * 5
84
+
85
+ # Question marks (engagement)
86
+ score += text.count('?') * 3
87
+
88
+ # Ideal duration bonus (30-60 seconds)
89
+ if 30 <= duration <= 60:
90
+ score += 10
91
+
92
+ # Numbers (concrete, specific)
93
+ if re.search(r'\d+', text):
94
+ score += 5
95
+
96
+ return min(score, 100)
97
+
98
+ def _detect_hook(self, text):
99
+ """Detect or generate a hook for the clip"""
100
+ text_lower = text.lower()
101
+
102
+ # Common hook patterns
103
+ if 'wait' in text_lower:
104
+ return '"Wait until you see this..."'
105
+ elif 'how' in text_lower or 'why' in text_lower:
106
+ return '"You need to know this..."'
107
+ elif 'never' in text_lower:
108
+ return '"This changes everything..."'
109
+ elif '?' in text:
110
+ sentences = text.split('.')
111
+ for s in sentences:
112
+ if '?' in s:
113
+ return f'"{s.strip()}"'
114
+
115
+ # Extract first compelling sentence
116
+ sentences = text.split('.')
117
+ if sentences:
118
+ return f'"{sentences[0].strip()}..."'
119
+
120
+ return '"Watch what happens next..."'
121
+
122
+ def _generate_title(self, text, emotions):
123
+ """Generate catchy title based on content and emotion"""
124
+ top_emotion = max(emotions, key=emotions.get)
125
+
126
+ emotion_emojis = {
127
+ 'joy': '🎉',
128
+ 'surprise': '😱',
129
+ 'anger': '😤',
130
+ 'sadness': '😢',
131
+ 'fear': '😨',
132
+ 'disgust': '🤢',
133
+ 'neutral': '🎯'
134
+ }
135
+
136
+ emoji = emotion_emojis.get(top_emotion, '✨')
137
+
138
+ # Extract key words
139
+ words = text.split()[:10]
140
+
141
+ if 'amazing' in text.lower() or 'incredible' in text.lower():
142
+ return f'{emoji} Mind-Blowing Moment'
143
+ elif 'fail' in text.lower() or 'wrong' in text.lower():
144
+ return f'{emoji} Epic Fail'
145
+ elif 'how' in text.lower() or 'why' in text.lower():
146
+ return f'{emoji} Life-Changing Tip'
147
+ elif '?' in text:
148
+ return f'{emoji} The Answer Revealed'
149
+ else:
150
+ return f'{emoji} Must-Watch Moment'
151
+
152
+ def _determine_caption_style(self, emotions):
153
+ """Determine caption color/style based on emotion"""
154
+ top_emotion = max(emotions, key=emotions.get)
155
+
156
+ styles = {
157
+ 'joy': 'Bold Yellow with Pop',
158
+ 'surprise': 'Red Bold with Shake',
159
+ 'anger': 'Orange Bold with Pulse',
160
+ 'sadness': 'Blue Soft',
161
+ 'fear': 'Purple Bold',
162
+ 'neutral': 'White Bold with Glow'
163
+ }
164
+
165
+ return styles.get(top_emotion, 'White Bold')
166
+
167
+ def _suggest_sound_effects(self, emotions, text):
168
+ """Suggest appropriate sound effects"""
169
+ effects = []
170
+ top_emotion = max(emotions, key=emotions.get)
171
+
172
+ # Emotion-based effects
173
+ if top_emotion == 'surprise':
174
+ effects.extend(['Dramatic Boom', 'Record Scratch'])
175
+ elif top_emotion == 'joy':
176
+ effects.extend(['Success Ding', 'Celebration'])
177
+ elif top_emotion == 'anger':
178
+ effects.extend(['Dramatic Whoosh', 'Impact'])
179
+
180
+ # Text-based effects
181
+ if any(word in text.lower() for word in ['wait', 'watch', 'look']):
182
+ effects.append('Suspense Build')
183
+
184
+ if '?' in text:
185
+ effects.append('Thinking Sound')
186
+
187
+ if not effects:
188
+ effects = ['Whoosh', 'Transition']
189
+
190
+ return effects[:3] # Max 3 effects
191
+
192
+ def _format_time(self, seconds):
193
+ """Format seconds to MM:SS"""
194
+ mins = int(seconds // 60)
195
+ secs = int(seconds % 60)
196
+ return f"{mins}:{secs:02d}"
197
+
198
+ def _format_duration(self, seconds):
199
+ """Format duration"""
200
+ return f"0:{int(seconds):02d}"