Marek4321 commited on
Commit
6bdfadc
·
verified ·
1 Parent(s): b128937

Upload 13 files

Browse files
__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # StoryLens - Video Ad Narrative Structure Analyzer
app.py ADDED
@@ -0,0 +1,333 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from PIL import Image
4
+
5
+ from config import INDUSTRIES, CAMPAIGN_GOALS, CATEGORY_COLORS, MAX_VIDEO_LENGTH_SECONDS
6
+ from video_loader import VideoLoader
7
+ from frame_extractor import FrameExtractor
8
+ from audio_extractor import AudioExtractor
9
+ from vision_analyzer import VisionAnalyzer
10
+ from segment_synchronizer import SegmentSynchronizer
11
+ from narrative_classifier import NarrativeClassifier
12
+ from report_generator import ReportGenerator
13
+
14
+ # Page config
15
+ st.set_page_config(
16
+ page_title="StoryLens - Ad Narrative Analyzer",
17
+ page_icon="🎬",
18
+ layout="wide"
19
+ )
20
+
21
+ # Initialize session state
22
+ if 'analysis_result' not in st.session_state:
23
+ st.session_state.analysis_result = None
24
+ if 'transcript' not in st.session_state:
25
+ st.session_state.transcript = None
26
+
27
+ # Sidebar
28
+ with st.sidebar:
29
+ st.header("Configuration")
30
+
31
+ # API Settings
32
+ with st.expander("API Settings", expanded=True):
33
+ st.subheader("MiniMax (Vision & LLM)")
34
+ api_key = st.text_input(
35
+ "MiniMax API Key",
36
+ type="password",
37
+ value=os.getenv("MINIMAX_API_KEY", ""),
38
+ help="Get your API key from MiniMax platform"
39
+ )
40
+ group_id = st.text_input(
41
+ "MiniMax Group ID",
42
+ value=os.getenv("MINIMAX_GROUP_ID", "")
43
+ )
44
+
45
+ if api_key and group_id:
46
+ st.session_state.api_key = api_key
47
+ st.session_state.group_id = group_id
48
+ st.success("MiniMax configured")
49
+
50
+ st.divider()
51
+
52
+ st.subheader("OpenAI (Whisper)")
53
+ openai_key = st.text_input(
54
+ "OpenAI API Key",
55
+ type="password",
56
+ value=os.getenv("OPENAI_API_KEY", ""),
57
+ help="For audio transcription (Whisper)"
58
+ )
59
+
60
+ if openai_key:
61
+ st.session_state.openai_key = openai_key
62
+ st.success("OpenAI configured")
63
+
64
+ st.divider()
65
+
66
+ # Campaign Settings
67
+ st.subheader("Campaign Settings")
68
+
69
+ industry = st.selectbox("Industry", INDUSTRIES)
70
+ campaign_goal = st.selectbox("Campaign Goal", CAMPAIGN_GOALS)
71
+
72
+ # Main content
73
+ st.title("StoryLens")
74
+ st.markdown("*Diagnose your video ad's narrative structure in 60 seconds*")
75
+
76
+ # Video Input
77
+ st.header("Video Input")
78
+
79
+ col1, col2 = st.columns(2)
80
+
81
+ with col1:
82
+ st.subheader("Upload File")
83
+ uploaded_file = st.file_uploader(
84
+ "Choose video file",
85
+ type=["mp4", "mov", "avi", "webm"],
86
+ help="Max 120 seconds"
87
+ )
88
+
89
+ with col2:
90
+ st.subheader("YouTube URL")
91
+ youtube_url = st.text_input(
92
+ "Paste URL",
93
+ placeholder="https://youtube.com/watch?v=..."
94
+ )
95
+
96
+ # Analyze button
97
+ video_source = uploaded_file or youtube_url
98
+ minimax_ready = hasattr(st.session_state, 'api_key') and st.session_state.api_key
99
+ openai_ready = hasattr(st.session_state, 'openai_key') and st.session_state.openai_key
100
+ api_ready = minimax_ready and openai_ready
101
+
102
+ if video_source and api_ready:
103
+ if st.button("Analyze", type="primary", use_container_width=True):
104
+
105
+ # Progress container
106
+ progress_container = st.container()
107
+
108
+ with progress_container:
109
+ progress_bar = st.progress(0)
110
+ status_text = st.empty()
111
+
112
+ try:
113
+ # Initialize components
114
+ api_key = st.session_state.api_key
115
+ group_id = st.session_state.group_id
116
+ openai_key = st.session_state.openai_key
117
+
118
+ video_loader = VideoLoader()
119
+ frame_extractor = FrameExtractor()
120
+ audio_extractor = AudioExtractor(openai_api_key=openai_key)
121
+ vision_analyzer = VisionAnalyzer(api_key, group_id)
122
+ synchronizer = SegmentSynchronizer()
123
+ classifier = NarrativeClassifier(api_key, group_id)
124
+ report_generator = ReportGenerator()
125
+
126
+ # Step 1: Load video
127
+ status_text.text("Loading video...")
128
+ progress_bar.progress(10)
129
+
130
+ if uploaded_file:
131
+ video_path = video_loader.load_local(uploaded_file)
132
+ else:
133
+ video_path = video_loader.load_youtube(youtube_url)
134
+
135
+ if not video_path:
136
+ st.error("Failed to load video")
137
+ st.stop()
138
+
139
+ # Check duration
140
+ duration = video_loader.get_video_duration(video_path)
141
+ if duration > MAX_VIDEO_LENGTH_SECONDS:
142
+ st.error(f"Video too long ({duration:.0f}s). Max allowed: {MAX_VIDEO_LENGTH_SECONDS}s")
143
+ st.stop()
144
+
145
+ # Step 2: Extract frames
146
+ status_text.text("Extracting frames...")
147
+ progress_bar.progress(20)
148
+
149
+ frames = frame_extractor.extract_frames(video_path)
150
+
151
+ # Step 3: Extract & transcribe audio
152
+ status_text.text("Transcribing audio...")
153
+ progress_bar.progress(35)
154
+
155
+ audio_path = audio_extractor.extract_audio(video_path)
156
+ transcript = audio_extractor.transcribe(audio_path)
157
+
158
+ # Step 4: Analyze frames visually
159
+ status_text.text("Analyzing frames...")
160
+ progress_bar.progress(50)
161
+
162
+ frame_descriptions = vision_analyzer.describe_frames_batch(frames)
163
+
164
+ # Step 5: Synchronize
165
+ status_text.text("Synchronizing segments...")
166
+ progress_bar.progress(70)
167
+
168
+ segments = synchronizer.synchronize(frame_descriptions, transcript)
169
+
170
+ # Step 6: Classify narrative
171
+ status_text.text("Classifying narrative structure...")
172
+ progress_bar.progress(85)
173
+
174
+ analysis = classifier.classify(segments)
175
+
176
+ # Step 7: Generate report
177
+ status_text.text("Generating report...")
178
+ progress_bar.progress(95)
179
+
180
+ report = report_generator.generate(analysis, industry, campaign_goal)
181
+
182
+ progress_bar.progress(100)
183
+ status_text.text("Analysis complete!")
184
+
185
+ # Store result
186
+ st.session_state.analysis_result = report
187
+ st.session_state.transcript = transcript
188
+
189
+ except Exception as e:
190
+ st.error(f"Analysis failed: {str(e)}")
191
+ import traceback
192
+ st.code(traceback.format_exc())
193
+
194
+ elif not api_ready:
195
+ missing = []
196
+ if not minimax_ready:
197
+ missing.append("MiniMax API Key + Group ID")
198
+ if not openai_ready:
199
+ missing.append("OpenAI API Key")
200
+ st.warning(f"Please configure API settings in the sidebar: {', '.join(missing)}")
201
+ elif not video_source:
202
+ st.info("Upload a video file or paste a YouTube URL to begin")
203
+
204
+ # Display results
205
+ if st.session_state.analysis_result:
206
+ result = st.session_state.analysis_result
207
+
208
+ st.divider()
209
+
210
+ # Summary metrics
211
+ st.header("Analysis Results")
212
+
213
+ col1, col2, col3, col4 = st.columns(4)
214
+
215
+ with col1:
216
+ story_status = "YES" if result['summary']['has_story'] else "NO"
217
+ st.metric("Story Detected", story_status)
218
+
219
+ with col2:
220
+ st.metric("Detected Arc", result['summary']['detected_arc'])
221
+
222
+ with col3:
223
+ st.metric("Optimal Arc", result['summary']['optimal_arc_for_goal'])
224
+
225
+ with col4:
226
+ st.metric("Potential Uplift", result['summary']['potential_uplift'])
227
+
228
+ # Story explanation
229
+ if result['summary']['story_explanation']:
230
+ st.info(f"**Story Analysis:** {result['summary']['story_explanation']}")
231
+
232
+ st.divider()
233
+
234
+ # Timeline visualization
235
+ st.subheader("Narrative Timeline")
236
+
237
+ for seg in result['segments']:
238
+ col1, col2, col3, col4 = st.columns([1, 1, 2, 3])
239
+
240
+ with col1:
241
+ # Frame thumbnail
242
+ if seg.get('frame_path') and os.path.exists(seg['frame_path']):
243
+ img = Image.open(seg['frame_path'])
244
+ st.image(img, width=120)
245
+ else:
246
+ st.write("[Frame]")
247
+
248
+ with col2:
249
+ st.caption(f"**{seg['start']:.1f}s - {seg['end']:.1f}s**")
250
+
251
+ # Role badge with color
252
+ category = seg.get('role_category', 'OTHER')
253
+ color = CATEGORY_COLORS.get(category, '#9E9E9E')
254
+ role = seg.get('functional_role', 'Unknown')
255
+
256
+ st.markdown(
257
+ f'<span style="background-color: {color}; color: white; '
258
+ f'padding: 4px 8px; border-radius: 4px; font-size: 12px;">'
259
+ f'{role}</span>',
260
+ unsafe_allow_html=True
261
+ )
262
+
263
+ with col3:
264
+ visual_text = seg.get('visual', 'N/A')
265
+ st.write(f"**Visual:** {visual_text}")
266
+
267
+ with col4:
268
+ if seg.get('speech'):
269
+ st.write(f"**Speech:** \"{seg['speech']}\"")
270
+ if seg.get('reasoning'):
271
+ st.caption(f"*{seg['reasoning']}*")
272
+
273
+ st.divider()
274
+
275
+ # Detected sequence
276
+ if result.get('detected_sequence'):
277
+ st.subheader("Story Arc Flow")
278
+ arc_flow = " -> ".join(result['detected_sequence'])
279
+ st.markdown(f"**{arc_flow}**")
280
+
281
+ # Missing elements
282
+ if result.get('missing_elements'):
283
+ st.subheader("Missing Elements")
284
+ for element in result['missing_elements']:
285
+ st.warning(f"- {element}")
286
+
287
+ st.divider()
288
+
289
+ # Recommendations
290
+ st.subheader("Recommendations")
291
+
292
+ for rec in result.get('recommendations', []):
293
+ priority = rec.get('priority', 'LOW')
294
+ icon = "[HIGH]" if priority == "HIGH" else "[MEDIUM]" if priority == "MEDIUM" else "[LOW]"
295
+
296
+ with st.expander(f"{icon} {rec['action']}", expanded=(priority == "HIGH")):
297
+ col1, col2 = st.columns(2)
298
+ with col1:
299
+ st.metric("Expected Impact", rec.get('expected_impact', 'N/A'))
300
+ with col2:
301
+ st.metric("Priority", priority)
302
+ st.write(f"**Reasoning:** {rec.get('reasoning', '')}")
303
+
304
+ # Benchmark info
305
+ with st.expander("Benchmark Details"):
306
+ benchmark = result.get('benchmark', {})
307
+ st.write(f"**Best Arc for {campaign_goal}:** {benchmark.get('best_arc', 'N/A')}")
308
+ st.write(f"**Average Uplift:** +{benchmark.get('uplift_percent', '?')}%")
309
+ st.write(f"**Recommendation:** {benchmark.get('recommendation', 'N/A')}")
310
+
311
+ # Full Transcript
312
+ if hasattr(st.session_state, 'transcript') and st.session_state.transcript:
313
+ st.divider()
314
+ st.subheader("Full Transcript")
315
+
316
+ transcript = st.session_state.transcript
317
+
318
+ # Display with timestamps
319
+ for seg in transcript:
320
+ start = seg.get('start', 0)
321
+ end = seg.get('end', 0)
322
+ text = seg.get('text', '')
323
+
324
+ if text:
325
+ if start > 0 or end > 0:
326
+ st.markdown(f"**[{start:.1f}s - {end:.1f}s]** {text}")
327
+ else:
328
+ st.markdown(text)
329
+
330
+ # Also show as plain text block
331
+ with st.expander("Plain Text"):
332
+ full_text = " ".join([seg.get('text', '') for seg in transcript if seg.get('text')])
333
+ st.text_area("Full transcript", full_text, height=150, disabled=True)
audio_extractor.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import List, Dict
3
+
4
+ from moviepy.editor import VideoFileClip
5
+ from openai import OpenAI
6
+
7
+
8
+ class AudioExtractor:
9
+ def __init__(self, openai_api_key: str = None, **kwargs):
10
+ self.openai_api_key = openai_api_key
11
+ self.client = None
12
+ if openai_api_key:
13
+ self.client = OpenAI(api_key=openai_api_key)
14
+
15
+ def extract_audio(self, video_path: str, output_path: str = None) -> str:
16
+ """
17
+ Extract audio track from video.
18
+
19
+ Returns:
20
+ Path to extracted MP3 file (better for Whisper API)
21
+ """
22
+ if output_path is None:
23
+ output_path = video_path.rsplit('.', 1)[0] + '.mp3'
24
+
25
+ video = VideoFileClip(video_path)
26
+ video.audio.write_audiofile(output_path, codec='mp3', verbose=False, logger=None)
27
+ video.close()
28
+
29
+ return output_path
30
+
31
+ def transcribe(self, audio_path: str) -> List[Dict]:
32
+ """
33
+ Transcribe audio with timestamps using OpenAI Whisper API.
34
+
35
+ Returns:
36
+ List of segments: [
37
+ {"start": 0.0, "end": 3.2, "text": "Tired of everyday exhaustion?"},
38
+ {"start": 3.2, "end": 7.1, "text": "Meet the new SuperVit..."},
39
+ ...
40
+ ]
41
+ """
42
+ if not self.client:
43
+ print("OpenAI API key not configured")
44
+ return []
45
+
46
+ try:
47
+ with open(audio_path, "rb") as audio_file:
48
+ # Use whisper-1 model with verbose_json for timestamps
49
+ response = self.client.audio.transcriptions.create(
50
+ model="whisper-1",
51
+ file=audio_file,
52
+ response_format="verbose_json",
53
+ timestamp_granularities=["segment"]
54
+ )
55
+
56
+ segments = []
57
+
58
+ # Extract segments with timestamps
59
+ if hasattr(response, 'segments') and response.segments:
60
+ for segment in response.segments:
61
+ segments.append({
62
+ "start": segment.get('start', 0) if isinstance(segment, dict) else getattr(segment, 'start', 0),
63
+ "end": segment.get('end', 0) if isinstance(segment, dict) else getattr(segment, 'end', 0),
64
+ "text": (segment.get('text', '') if isinstance(segment, dict) else getattr(segment, 'text', '')).strip()
65
+ })
66
+ elif hasattr(response, 'text') and response.text:
67
+ # Fallback if no segments
68
+ segments.append({
69
+ "start": 0.0,
70
+ "end": 0.0,
71
+ "text": response.text.strip()
72
+ })
73
+
74
+ return segments
75
+
76
+ except Exception as e:
77
+ print(f"Transcription error: {e}")
78
+ return []
benchmarks.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict
2
+
3
+ BENCHMARKS = {
4
+ "Apparel & Accessories": {
5
+ "retention": {
6
+ "best_arc": "Hook-Feature-Benefit-Action",
7
+ "best_arc_short": "HFBA",
8
+ "uplift_percent": 5.8,
9
+ "recommendation": "Start with strong hook, quickly show product features and benefits"
10
+ },
11
+ "ctr": {
12
+ "best_arc": "AIDA",
13
+ "best_arc_short": "AIDA",
14
+ "uplift_percent": 8.9,
15
+ "recommendation": "Build desire through aspirational content before call-to-action"
16
+ },
17
+ "cvr": {
18
+ "best_arc": "Social-Proof-Action",
19
+ "best_arc_short": "SPA",
20
+ "uplift_percent": 4.6,
21
+ "recommendation": "Lead with testimonials and reviews to build trust"
22
+ }
23
+ },
24
+ "Beauty": {
25
+ "retention": {
26
+ "best_arc": "Hook-Problem-Demo-Solution",
27
+ "best_arc_short": "HPDS",
28
+ "uplift_percent": 4.9,
29
+ "recommendation": "Hook attention, show problem, demonstrate product solving it"
30
+ },
31
+ "ctr": {
32
+ "best_arc": "Hook-Feature-Benefit-Action",
33
+ "best_arc_short": "HFBA",
34
+ "uplift_percent": 2.8,
35
+ "recommendation": "Focus on product features and benefits after initial hook"
36
+ },
37
+ "cvr": {
38
+ "best_arc": "Social-Proof-Action",
39
+ "best_arc_short": "SPA",
40
+ "uplift_percent": 3.7,
41
+ "recommendation": "Beauty buyers respond well to testimonials and reviews"
42
+ }
43
+ },
44
+ "Food": {
45
+ "retention": {
46
+ "best_arc": "Problem-Agitate-Solution",
47
+ "best_arc_short": "PAS",
48
+ "uplift_percent": 6.3,
49
+ "recommendation": "Amplify the problem/need before showing solution"
50
+ },
51
+ "ctr": {
52
+ "best_arc": "AIDA",
53
+ "best_arc_short": "AIDA",
54
+ "uplift_percent": 4.8,
55
+ "recommendation": "Build appetite and desire psychologically"
56
+ },
57
+ "cvr": {
58
+ "best_arc": "Problem-Agitate-Solution",
59
+ "best_arc_short": "PAS",
60
+ "uplift_percent": 8.5,
61
+ "recommendation": "Strong problem-solution narrative drives food conversions"
62
+ }
63
+ },
64
+ "Beverages": {
65
+ "retention": {
66
+ "best_arc": "Hook-Problem-Solution",
67
+ "best_arc_short": "HPS",
68
+ "uplift_percent": 4.1,
69
+ "recommendation": "Quick hook into problem-solution flow"
70
+ },
71
+ "ctr": {
72
+ "best_arc": "Feature-Benefit-Action",
73
+ "best_arc_short": "FBA",
74
+ "uplift_percent": 3.9,
75
+ "recommendation": "Direct product focus works for beverages"
76
+ },
77
+ "cvr": {
78
+ "best_arc": "Feature-Benefit-Action",
79
+ "best_arc_short": "FBA",
80
+ "uplift_percent": 5.1,
81
+ "recommendation": "Detailed feature explanation drives beverage conversions"
82
+ }
83
+ },
84
+ "Other": {
85
+ "retention": {
86
+ "best_arc": "Hook-Feature-Benefit-Action",
87
+ "best_arc_short": "HFBA",
88
+ "uplift_percent": 5.0,
89
+ "recommendation": "General best practice: hook + features + benefits"
90
+ },
91
+ "ctr": {
92
+ "best_arc": "AIDA",
93
+ "best_arc_short": "AIDA",
94
+ "uplift_percent": 5.0,
95
+ "recommendation": "Classic AIDA funnel works across categories"
96
+ },
97
+ "cvr": {
98
+ "best_arc": "Social-Proof-Action",
99
+ "best_arc_short": "SPA",
100
+ "uplift_percent": 4.0,
101
+ "recommendation": "Social proof generally effective for conversions"
102
+ }
103
+ }
104
+ }
105
+
106
+ MISSING_ELEMENT_IMPACT = {
107
+ "Hook": {
108
+ "impact": "+5-8% retention in first 2 seconds",
109
+ "suggestion": "Add attention-grabbing opening (question, surprising visual, bold statement)"
110
+ },
111
+ "Problem Setup": {
112
+ "impact": "+4-6% retention",
113
+ "suggestion": "Establish relatable pain point before showing product"
114
+ },
115
+ "Social Proof": {
116
+ "impact": "+3-5% CVR",
117
+ "suggestion": "Add testimonial, review, or crowd validation before CTA"
118
+ },
119
+ "Urgency Trigger": {
120
+ "impact": "+2-4% CVR",
121
+ "suggestion": "Add time-limited element (limited time offer, countdown)"
122
+ },
123
+ "Call-to-Action": {
124
+ "impact": "Critical for conversions",
125
+ "suggestion": "Add clear CTA (Shop Now, Learn More, Get Started)"
126
+ },
127
+ "Outcome": {
128
+ "impact": "+3-5% retention and CVR",
129
+ "suggestion": "Show transformation or result after using product"
130
+ }
131
+ }
132
+
133
+ GOAL_MAPPING = {
134
+ "Retention (Dwell Rate)": "retention",
135
+ "Click-Through (CTR)": "ctr",
136
+ "Conversions (CVR)": "cvr"
137
+ }
138
+
139
+
140
+ def get_benchmark(industry: str, goal: str) -> Dict:
141
+ """Get benchmark data for industry and goal."""
142
+ goal_key = GOAL_MAPPING.get(goal, "retention")
143
+ return BENCHMARKS.get(industry, BENCHMARKS["Other"]).get(goal_key, {})
144
+
145
+
146
+ def get_missing_element_recommendation(element: str) -> Dict:
147
+ """Get recommendation for missing element."""
148
+ return MISSING_ELEMENT_IMPACT.get(element, {
149
+ "impact": "May improve ad performance",
150
+ "suggestion": f"Consider adding {element} to strengthen narrative"
151
+ })
config.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ # API Configuration - MiniMax
4
+ MINIMAX_API_KEY = os.getenv("MINIMAX_API_KEY", "")
5
+ MINIMAX_GROUP_ID = os.getenv("MINIMAX_GROUP_ID", "")
6
+ MINIMAX_BASE_URL = "https://api.minimaxi.chat/v1"
7
+
8
+ # API Configuration - OpenAI (for Whisper transcription)
9
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
10
+
11
+ # Models
12
+ MINIMAX_MODEL_VISION = "MiniMax-Text-01"
13
+ MINIMAX_MODEL_LLM = "MiniMax-Text-01"
14
+
15
+ # Processing Settings
16
+ MAX_VIDEO_LENGTH_SECONDS = 120
17
+ FRAME_INTERVAL_SECONDS = 2
18
+ SUPPORTED_VIDEO_FORMATS = [".mp4", ".avi", ".mov", ".webm"]
19
+
20
+ # UI Options
21
+ INDUSTRIES = ["Apparel & Accessories", "Beauty", "Food", "Beverages", "Other"]
22
+ CAMPAIGN_GOALS = ["Retention (Dwell Rate)", "Click-Through (CTR)", "Conversions (CVR)"]
23
+
24
+ # Role category colors for UI
25
+ CATEGORY_COLORS = {
26
+ "OPENING": "#4CAF50", # Green
27
+ "PROBLEM": "#FF5722", # Deep Orange
28
+ "PRODUCT": "#2196F3", # Blue
29
+ "PERSUASIVE": "#9C27B0", # Purple
30
+ "CLOSURE": "#FFC107", # Amber
31
+ "OTHER": "#9E9E9E" # Grey
32
+ }
frame_extractor.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import subprocess
2
+ import os
3
+ import tempfile
4
+ from typing import List, Dict
5
+
6
+
7
+ class FrameExtractor:
8
+ def __init__(self, output_dir: str = None):
9
+ self.output_dir = output_dir or tempfile.mkdtemp()
10
+ os.makedirs(self.output_dir, exist_ok=True)
11
+
12
+ def extract_frames(self, video_path: str, interval_seconds: float = 2.0) -> List[Dict]:
13
+ """
14
+ Extract frames at regular intervals using FFmpeg.
15
+
16
+ Args:
17
+ video_path: Path to video file
18
+ interval_seconds: Extract one frame every N seconds
19
+
20
+ Returns:
21
+ List of dicts with timestamp and frame path:
22
+ [
23
+ {"timestamp": 0.0, "path": "/tmp/frame_001.jpg"},
24
+ {"timestamp": 2.0, "path": "/tmp/frame_002.jpg"},
25
+ ...
26
+ ]
27
+ """
28
+ fps = 1 / interval_seconds
29
+ output_pattern = os.path.join(self.output_dir, "frame_%03d.jpg")
30
+
31
+ cmd = [
32
+ 'ffmpeg', '-i', video_path,
33
+ '-vf', f'fps={fps}',
34
+ '-q:v', '2', # High quality
35
+ output_pattern,
36
+ '-y' # Overwrite
37
+ ]
38
+
39
+ subprocess.run(cmd, capture_output=True, check=True)
40
+
41
+ # Build result list with timestamps
42
+ frames = []
43
+ frame_files = sorted([f for f in os.listdir(self.output_dir) if f.startswith('frame_')])
44
+
45
+ for i, frame_file in enumerate(frame_files):
46
+ frames.append({
47
+ "timestamp": i * interval_seconds,
48
+ "path": os.path.join(self.output_dir, frame_file)
49
+ })
50
+
51
+ return frames
narrative_classifier.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+ from typing import List, Dict
4
+
5
+ from taxonomy import STORY_ARCS, STORY_DEFINITION, get_taxonomy_formatted
6
+
7
+
8
+ class NarrativeClassifier:
9
+ def __init__(self, api_key: str, group_id: str):
10
+ self.api_key = api_key
11
+ self.group_id = group_id
12
+ self.base_url = "https://api.minimaxi.chat/v1"
13
+
14
+ def _build_prompt(self, segments: List[Dict]) -> str:
15
+ """Build classification prompt."""
16
+
17
+ # Format segments
18
+ segments_text = ""
19
+ for seg in segments:
20
+ segments_text += f"\n[{seg['start']:.1f}s - {seg['end']:.1f}s]"
21
+ segments_text += f"\nVisual: {seg['visual']}"
22
+ if seg['speech']:
23
+ segments_text += f"\nSpeech: \"{seg['speech']}\""
24
+ segments_text += "\n"
25
+
26
+ # Format story arcs
27
+ arcs_text = ""
28
+ for arc_name, arc_info in STORY_ARCS.items():
29
+ arcs_text += f"\n- {arc_name}: {' -> '.join(arc_info['sequence'])}"
30
+
31
+ prompt = f"""You are an expert in advertising narrative structure analysis.
32
+
33
+ Analyze this video advertisement segment by segment.
34
+
35
+ ## SEGMENTS TO ANALYZE:
36
+ {segments_text}
37
+
38
+ ## FUNCTIONAL ROLE TAXONOMY:
39
+ {get_taxonomy_formatted()}
40
+
41
+ ## KNOWN STORY ARCS:
42
+ {arcs_text}
43
+
44
+ ## STORY DEFINITION:
45
+ {STORY_DEFINITION}
46
+
47
+ ## YOUR TASK:
48
+
49
+ 1. For each segment, determine the PRIMARY functional role from the taxonomy
50
+ 2. Determine if this ad contains a STORY (YES/NO)
51
+ 3. Identify which STORY ARC best matches (or "Custom" if none match)
52
+ 4. List any MISSING elements that could strengthen the ad
53
+
54
+ ## RESPONSE FORMAT (use exactly this JSON structure):
55
+
56
+ ```json
57
+ {{
58
+ "segments": [
59
+ {{
60
+ "timestamp": "0.0-2.0s",
61
+ "functional_role": "Hook",
62
+ "role_category": "OPENING",
63
+ "reasoning": "Opens with provocative question to grab attention"
64
+ }}
65
+ ],
66
+ "has_story": true,
67
+ "story_explanation": "Brief explanation of why story is present/absent",
68
+ "story_arc": "Problem-Solution-Outcome",
69
+ "detected_sequence": ["Hook", "Problem Setup", "Solution Reveal", "Call-to-Action"],
70
+ "missing_elements": ["Social Proof", "Outcome"]
71
+ }}
72
+ ```
73
+
74
+ Respond ONLY with valid JSON, no other text."""
75
+
76
+ return prompt
77
+
78
+ def classify(self, segments: List[Dict]) -> Dict:
79
+ """
80
+ Classify each segment and detect overall story arc.
81
+
82
+ Returns:
83
+ {
84
+ "segments": [...],
85
+ "has_story": True/False,
86
+ "story_arc": "...",
87
+ "detected_sequence": [...],
88
+ "missing_elements": [...],
89
+ "raw_response": "..."
90
+ }
91
+ """
92
+ url = f"{self.base_url}/text/chatcompletion_v2"
93
+
94
+ headers = {
95
+ 'Authorization': f'Bearer {self.api_key}',
96
+ 'Content-Type': 'application/json'
97
+ }
98
+
99
+ prompt = self._build_prompt(segments)
100
+
101
+ payload = {
102
+ "model": "MiniMax-Text-01",
103
+ "messages": [
104
+ {"role": "user", "content": prompt}
105
+ ],
106
+ "temperature": 0.3 # Lower temperature for more consistent classification
107
+ }
108
+
109
+ response = requests.post(url, headers=headers, json=payload)
110
+
111
+ if response.status_code != 200:
112
+ print(f"Classification API error: {response.text}")
113
+ return self._fallback_result(segments)
114
+
115
+ result = response.json()
116
+ raw_response = result['choices'][0]['message']['content']
117
+
118
+ # Parse JSON from response
119
+ try:
120
+ # Extract JSON from response (may be wrapped in markdown code block)
121
+ json_str = raw_response
122
+ if "```json" in json_str:
123
+ json_str = json_str.split("```json")[1].split("```")[0]
124
+ elif "```" in json_str:
125
+ json_str = json_str.split("```")[1].split("```")[0]
126
+
127
+ parsed = json.loads(json_str.strip())
128
+
129
+ # Merge with original segment data
130
+ for i, seg_analysis in enumerate(parsed.get('segments', [])):
131
+ if i < len(segments):
132
+ segments[i]['functional_role'] = seg_analysis.get('functional_role', 'Unknown')
133
+ segments[i]['role_category'] = seg_analysis.get('role_category', 'OTHER')
134
+ segments[i]['reasoning'] = seg_analysis.get('reasoning', '')
135
+
136
+ return {
137
+ "segments": segments,
138
+ "has_story": parsed.get('has_story', False),
139
+ "story_explanation": parsed.get('story_explanation', ''),
140
+ "story_arc": parsed.get('story_arc', 'Unknown'),
141
+ "detected_sequence": parsed.get('detected_sequence', []),
142
+ "missing_elements": parsed.get('missing_elements', []),
143
+ "raw_response": raw_response
144
+ }
145
+
146
+ except json.JSONDecodeError as e:
147
+ print(f"JSON parse error: {e}")
148
+ print(f"Raw response: {raw_response}")
149
+ return self._fallback_result(segments, raw_response)
150
+
151
+ def _fallback_result(self, segments: List[Dict], raw_response: str = "") -> Dict:
152
+ """Return fallback result when parsing fails."""
153
+ for seg in segments:
154
+ seg['functional_role'] = 'Unknown'
155
+ seg['role_category'] = 'OTHER'
156
+ seg['reasoning'] = 'Classification failed'
157
+
158
+ return {
159
+ "segments": segments,
160
+ "has_story": False,
161
+ "story_explanation": "Unable to determine",
162
+ "story_arc": "Unknown",
163
+ "detected_sequence": [],
164
+ "missing_elements": [],
165
+ "raw_response": raw_response
166
+ }
report_generator.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict
2
+
3
+ from benchmarks import get_benchmark, get_missing_element_recommendation, GOAL_MAPPING
4
+
5
+
6
+ class ReportGenerator:
7
+ def generate(
8
+ self,
9
+ analysis: Dict,
10
+ industry: str,
11
+ campaign_goal: str
12
+ ) -> Dict:
13
+ """
14
+ Generate actionable report with recommendations.
15
+
16
+ Args:
17
+ analysis: Output from NarrativeClassifier
18
+ industry: Selected industry
19
+ campaign_goal: Selected campaign goal
20
+
21
+ Returns:
22
+ Complete report with summary, segments, and recommendations
23
+ """
24
+ benchmark = get_benchmark(industry, campaign_goal)
25
+ goal_key = GOAL_MAPPING.get(campaign_goal, "retention")
26
+
27
+ # Build recommendations
28
+ recommendations = []
29
+
30
+ # 1. Check if current arc matches optimal
31
+ current_arc = analysis.get('story_arc', 'Unknown')
32
+ optimal_arc = benchmark.get('best_arc', 'Unknown')
33
+
34
+ arc_matches = self._arcs_match(current_arc, optimal_arc)
35
+
36
+ if not arc_matches and optimal_arc != 'Unknown':
37
+ recommendations.append({
38
+ "priority": "HIGH",
39
+ "type": "arc_mismatch",
40
+ "action": f"Consider restructuring to {optimal_arc} arc",
41
+ "expected_impact": f"+{benchmark.get('uplift_percent', '?')}% {goal_key}",
42
+ "reasoning": benchmark.get('recommendation', '')
43
+ })
44
+
45
+ # 2. Check missing elements
46
+ missing = analysis.get('missing_elements', [])
47
+ for element in missing:
48
+ rec = get_missing_element_recommendation(element)
49
+ recommendations.append({
50
+ "priority": "MEDIUM" if element in ['Hook', 'Call-to-Action'] else "LOW",
51
+ "type": "missing_element",
52
+ "action": f"Add {element}",
53
+ "expected_impact": rec.get('impact', ''),
54
+ "reasoning": rec.get('suggestion', '')
55
+ })
56
+
57
+ # 3. Story presence recommendation
58
+ if not analysis.get('has_story', False):
59
+ recommendations.append({
60
+ "priority": "MEDIUM",
61
+ "type": "no_story",
62
+ "action": "Consider adding narrative elements",
63
+ "expected_impact": "+5-10% retention",
64
+ "reasoning": "Ads with stories show 5-10% better retention than feature-focused ads"
65
+ })
66
+
67
+ # Sort by priority
68
+ priority_order = {"HIGH": 0, "MEDIUM": 1, "LOW": 2}
69
+ recommendations.sort(key=lambda x: priority_order.get(x['priority'], 3))
70
+
71
+ return {
72
+ "summary": {
73
+ "has_story": analysis.get('has_story', False),
74
+ "story_explanation": analysis.get('story_explanation', ''),
75
+ "detected_arc": current_arc,
76
+ "optimal_arc_for_goal": optimal_arc,
77
+ "arc_matches_optimal": arc_matches,
78
+ "potential_uplift": f"+{benchmark.get('uplift_percent', '?')}%"
79
+ },
80
+ "segments": analysis.get('segments', []),
81
+ "detected_sequence": analysis.get('detected_sequence', []),
82
+ "missing_elements": missing,
83
+ "recommendations": recommendations,
84
+ "benchmark": benchmark
85
+ }
86
+
87
+ def _arcs_match(self, current: str, optimal: str) -> bool:
88
+ """Check if arcs match (fuzzy matching)."""
89
+ if current == optimal:
90
+ return True
91
+
92
+ # Normalize
93
+ current_norm = current.lower().replace('-', '').replace(' ', '')
94
+ optimal_norm = optimal.lower().replace('-', '').replace(' ', '')
95
+
96
+ return current_norm == optimal_norm
requirements.txt ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # StoryLens - Requirements
2
+
3
+ # Web framework
4
+ streamlit>=1.28.0
5
+
6
+ # Video processing
7
+ yt-dlp>=2023.10.13
8
+ moviepy>=1.0.3
9
+ ffmpeg-python>=0.2.0
10
+
11
+ # Image processing
12
+ Pillow>=10.0.0
13
+
14
+ # HTTP requests
15
+ requests>=2.31.0
16
+
17
+ # Environment variables
18
+ python-dotenv>=1.0.0
19
+
20
+ # Speech-to-Text (OpenAI Whisper API)
21
+ openai>=1.0.0
segment_synchronizer.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Dict
2
+
3
+
4
+ class SegmentSynchronizer:
5
+ def synchronize(
6
+ self,
7
+ frames: List[Dict], # [{"timestamp": 0.0, "path": "...", "description": "..."}]
8
+ transcript: List[Dict] # [{"start": 0.0, "end": 3.2, "text": "..."}]
9
+ ) -> List[Dict]:
10
+ """
11
+ Create unified segments with visual + speech.
12
+
13
+ Returns:
14
+ List of synchronized segments:
15
+ [
16
+ {
17
+ "start": 0.0,
18
+ "end": 2.0,
19
+ "frame_path": "/tmp/frame_001.jpg",
20
+ "visual": "Woman looking frustrated in kitchen",
21
+ "speech": "Tired of everyday exhaustion?"
22
+ },
23
+ ...
24
+ ]
25
+ """
26
+ segments = []
27
+
28
+ for i, frame in enumerate(frames):
29
+ timestamp = frame['timestamp']
30
+
31
+ # Calculate segment end (next frame timestamp or +interval)
32
+ if i < len(frames) - 1:
33
+ end_time = frames[i + 1]['timestamp']
34
+ else:
35
+ end_time = timestamp + 2.0 # Default interval
36
+
37
+ # Find overlapping speech
38
+ speech_text = ""
39
+ for t in transcript:
40
+ # Check if speech segment overlaps with this frame's time window
41
+ if t['end'] > timestamp and t['start'] < end_time:
42
+ speech_text += " " + t['text']
43
+
44
+ speech_text = speech_text.strip()
45
+
46
+ segments.append({
47
+ "start": timestamp,
48
+ "end": end_time,
49
+ "frame_path": frame['path'],
50
+ "visual": frame['description'],
51
+ "speech": speech_text if speech_text else None
52
+ })
53
+
54
+ return segments
taxonomy.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FUNCTIONAL_ROLES = {
2
+ "OPENING": {
3
+ "Hook": "Grabs viewers' attention or interest; appears in first few seconds",
4
+ "Establish Context": "Sets up the status quo—who, where, or when—before story progression"
5
+ },
6
+ "PROBLEM": {
7
+ "Problem Setup": "Presents a problem, need, or pain point to resolve for the first time",
8
+ "Problem Agitation": "Amplifies the problem to make it relatable or severe"
9
+ },
10
+ "PRODUCT": {
11
+ "Feature Explanation": "Explains product features and why it delivers benefits; goes beyond just showing",
12
+ "Product Highlight": "Presents key product attributes or benefits (surface-level showcasing)",
13
+ "Demonstration": "Shows the product being used or tested to accomplish a task",
14
+ "Comparison": "Contrasts product with competitors or previous states",
15
+ "Social Proof": "Shows reviews or testimonials from other people",
16
+ "Solution Reveal": "Presents product as solution to a problem"
17
+ },
18
+ "PERSUASIVE": {
19
+ "Emotional Appeal": "Uses emotions to connect with and engage the audience",
20
+ "Humor": "Uses comedic elements to entertain and make message relatable",
21
+ "Aspirational Vision": "Depicts an ideal lifestyle or future enabled by the product",
22
+ "Promotion": "Communicates offer mechanics: discount, bundle, code, pricing terms",
23
+ "Urgency Trigger": "Adds time pressure to accelerate action",
24
+ "Scarcity Trigger": "Highlights limited availability to create FOMO"
25
+ },
26
+ "CLOSURE": {
27
+ "Call-to-Action": "Cues to act; drives immediate action",
28
+ "Outcome": "Shows post-intervention payoff or transformation",
29
+ "Branding Moment": "Displays brand identity (logo, tagline, slogans)",
30
+ "Insight/Philosophy": "Expresses brand philosophy; leads viewers to discover something new"
31
+ },
32
+ "OTHER": {
33
+ "Visual Filler": "Provides transitional pacing without narrative contribution"
34
+ }
35
+ }
36
+
37
+ STORY_ARCS = {
38
+ "Problem-Solution-Outcome": {
39
+ "sequence": ["Problem Setup", "Solution Reveal", "Outcome"],
40
+ "description": "Introduces a problem, offers a solution, and shows the outcome"
41
+ },
42
+ "Hook-Feature-Benefit-Action": {
43
+ "sequence": ["Hook", "Feature Explanation", "Product Highlight", "Call-to-Action"],
44
+ "abbreviation": "HFBA",
45
+ "description": "Grabs attention, explains features, highlights benefits, drives action"
46
+ },
47
+ "AIDA": {
48
+ "sequence": ["Hook", "Feature Explanation", "Aspirational Vision", "Call-to-Action"],
49
+ "description": "Attention-Interest-Desire-Action classic marketing funnel"
50
+ },
51
+ "Social-Proof-Action": {
52
+ "sequence": ["Social Proof", "Call-to-Action"],
53
+ "abbreviation": "SPA",
54
+ "description": "Shows testimonials/reviews then drives action"
55
+ },
56
+ "Problem-Agitate-Solution": {
57
+ "sequence": ["Problem Setup", "Problem Agitation", "Solution Reveal"],
58
+ "abbreviation": "PAS",
59
+ "description": "Presents problem, amplifies pain, offers solution"
60
+ },
61
+ "Before-After-Bridge": {
62
+ "sequence": ["Establish Context", "Outcome", "Solution Reveal"],
63
+ "abbreviation": "BAB",
64
+ "description": "Shows current situation, desired outcome, product as bridge"
65
+ },
66
+ "Hook-Problem-Solution": {
67
+ "sequence": ["Hook", "Problem Setup", "Solution Reveal"],
68
+ "abbreviation": "HPS",
69
+ "description": "Grabs attention, presents problem, offers solution"
70
+ },
71
+ "Feature-Benefit-Action": {
72
+ "sequence": ["Feature Explanation", "Product Highlight", "Call-to-Action"],
73
+ "abbreviation": "FBA",
74
+ "description": "Direct product-focused approach"
75
+ }
76
+ }
77
+
78
+ STORY_DEFINITION = """
79
+ A story is an account of an event or a sequence of connected events
80
+ that leads to a transition from an initial state to a later stage or outcome.
81
+
82
+ Signals of STORY PRESENT:
83
+ - Dialogues between characters
84
+ - Sharing of personal experiences
85
+ - Inclusion of challenges/conflicts/problem solutions
86
+ - Character transformation or journey
87
+
88
+ Signals of STORY ABSENT:
89
+ - Announcer/narrator voiceover only
90
+ - Promotional language dominance
91
+ - Heavy focus on product features without context
92
+ - Visual mashups without narrative connection
93
+ """
94
+
95
+
96
+ def get_taxonomy_formatted() -> str:
97
+ """Return taxonomy as formatted string for prompts."""
98
+ lines = []
99
+ for category, roles in FUNCTIONAL_ROLES.items():
100
+ lines.append(f"\n**{category}**")
101
+ for role, description in roles.items():
102
+ lines.append(f"- {role}: {description}")
103
+ return "\n".join(lines)
104
+
105
+
106
+ def get_role_category(role_name: str) -> str:
107
+ """Get category for a role name."""
108
+ for category, roles in FUNCTIONAL_ROLES.items():
109
+ if role_name in roles:
110
+ return category
111
+ return "OTHER"
video_loader.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tempfile
2
+ import os
3
+ import subprocess
4
+ import json
5
+ import shutil
6
+ from typing import Optional
7
+
8
+ import yt_dlp
9
+
10
+
11
+ class VideoLoader:
12
+ def __init__(self):
13
+ self.temp_dir = tempfile.mkdtemp()
14
+
15
+ def load_youtube(self, url: str) -> Optional[str]:
16
+ """
17
+ Download YouTube video.
18
+
19
+ Args:
20
+ url: YouTube URL
21
+
22
+ Returns:
23
+ Path to downloaded video file, or None if failed
24
+ """
25
+ output_path = os.path.join(self.temp_dir, "video.mp4")
26
+
27
+ ydl_opts = {
28
+ 'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/bestvideo+bestaudio/best',
29
+ 'outtmpl': output_path,
30
+ 'merge_output_format': 'mp4',
31
+ 'quiet': True,
32
+ 'no_warnings': True,
33
+ 'postprocessors': [{
34
+ 'key': 'FFmpegVideoConvertor',
35
+ 'preferedformat': 'mp4',
36
+ }],
37
+ }
38
+
39
+ try:
40
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
41
+ ydl.download([url])
42
+ return output_path
43
+ except Exception as e:
44
+ print(f"Error downloading YouTube video: {e}")
45
+ return None
46
+
47
+ def load_local(self, uploaded_file) -> Optional[str]:
48
+ """
49
+ Save uploaded file to temp directory.
50
+
51
+ Args:
52
+ uploaded_file: Streamlit UploadedFile object
53
+
54
+ Returns:
55
+ Path to saved file
56
+ """
57
+ output_path = os.path.join(self.temp_dir, uploaded_file.name)
58
+
59
+ with open(output_path, "wb") as f:
60
+ f.write(uploaded_file.read())
61
+
62
+ return output_path
63
+
64
+ def get_video_duration(self, video_path: str) -> float:
65
+ """Get video duration in seconds using ffprobe."""
66
+ cmd = [
67
+ 'ffprobe', '-v', 'quiet', '-print_format', 'json',
68
+ '-show_format', video_path
69
+ ]
70
+
71
+ result = subprocess.run(cmd, capture_output=True, text=True)
72
+ data = json.loads(result.stdout)
73
+
74
+ return float(data['format']['duration'])
75
+
76
+ def cleanup(self):
77
+ """Remove temp files."""
78
+ if os.path.exists(self.temp_dir):
79
+ shutil.rmtree(self.temp_dir)
vision_analyzer.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import requests
3
+ from typing import List, Dict
4
+
5
+
6
+ class VisionAnalyzer:
7
+ def __init__(self, api_key: str, group_id: str):
8
+ self.api_key = api_key
9
+ self.group_id = group_id
10
+ self.base_url = "https://api.minimaxi.chat/v1"
11
+
12
+ self.prompt = """Describe this video frame in one concise sentence. Focus on:
13
+ - Who/what is shown (people, products, text overlays)
14
+ - Setting/environment
15
+ - Actions or emotions displayed
16
+ - Any visible brand elements or text
17
+
18
+ Be factual and specific. Do not interpret or add assumptions."""
19
+
20
+ def _encode_image(self, image_path: str) -> str:
21
+ """Encode image to base64."""
22
+ with open(image_path, "rb") as f:
23
+ return base64.b64encode(f.read()).decode('utf-8')
24
+
25
+ def describe_frame(self, image_path: str, timestamp: float) -> str:
26
+ """
27
+ Generate description of a single frame.
28
+
29
+ Returns:
30
+ Description string, e.g., "Woman looking frustrated in messy kitchen"
31
+ """
32
+ url = f"{self.base_url}/text/chatcompletion_v2"
33
+
34
+ headers = {
35
+ 'Authorization': f'Bearer {self.api_key}',
36
+ 'Content-Type': 'application/json'
37
+ }
38
+
39
+ image_data = self._encode_image(image_path)
40
+
41
+ payload = {
42
+ "model": "MiniMax-Text-01",
43
+ "messages": [
44
+ {
45
+ "role": "user",
46
+ "content": [
47
+ {"type": "text", "text": self.prompt},
48
+ {
49
+ "type": "image_url",
50
+ "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}
51
+ }
52
+ ]
53
+ }
54
+ ]
55
+ }
56
+
57
+ response = requests.post(url, headers=headers, json=payload)
58
+
59
+ if response.status_code != 200:
60
+ print(f"Vision API error: {response.text}")
61
+ return f"[Frame at {timestamp}s - description unavailable]"
62
+
63
+ result = response.json()
64
+
65
+ try:
66
+ return result['choices'][0]['message']['content']
67
+ except (KeyError, IndexError):
68
+ return f"[Frame at {timestamp}s - description unavailable]"
69
+
70
+ def describe_frames_batch(self, frames: List[Dict]) -> List[Dict]:
71
+ """
72
+ Describe all frames.
73
+
74
+ Args:
75
+ frames: [{"timestamp": 0.0, "path": "/tmp/frame_001.jpg"}, ...]
76
+
77
+ Returns:
78
+ [{"timestamp": 0.0, "path": "...", "description": "Woman looking..."}, ...]
79
+ """
80
+ results = []
81
+
82
+ for frame in frames:
83
+ description = self.describe_frame(frame['path'], frame['timestamp'])
84
+
85
+ results.append({
86
+ "timestamp": frame['timestamp'],
87
+ "path": frame['path'],
88
+ "description": description
89
+ })
90
+
91
+ return results