ADXabhi commited on
Commit
a3655da
·
verified ·
1 Parent(s): 623f8c3

Upload 5 files

Browse files
Files changed (2) hide show
  1. app.py +75 -244
  2. client.html +6 -16
app.py CHANGED
@@ -71,269 +71,119 @@ def parse_cloudinary_url(url: str) -> dict:
71
  def get_face_info_url(video_id: str, time_sec: float) -> str:
72
  """
73
  Build URL to fetch face data for a specific frame.
74
- Uses g_faces (plural) to trigger detection of ALL faces.
75
- fl_getinfo returns face coordinates relative to the INPUT image,
76
- so we get positions in original frame space.
77
  """
78
- return f"{CLOUDINARY_BASE}/so_{time_sec},f_jpg/c_thumb,g_faces,w_450/fl_getinfo/{video_id}.jpg"
79
 
80
 
81
  async def fetch_face_data(client: httpx.AsyncClient, video_id: str, time_sec: float) -> dict:
82
  """
83
  Fetch face detection data for a specific timestamp.
84
- Returns face count, bounding boxes [x, y, w, h], and image dimensions.
85
  """
86
  url = get_face_info_url(video_id, time_sec)
87
  try:
88
  response = await client.get(url, timeout=10.0)
89
  if response.status_code == 200:
90
  data = response.json()
91
- # Get image dimensions from the input info (original frame before cropping)
92
- input_info = data.get("input", {})
93
- img_width = input_info.get("width", 1920)
94
- img_height = input_info.get("height", 1080)
95
-
96
- # Cloudinary returns face data in 'landmarks' with g_face/g_faces
97
- # landmarks[0] is the array of face coordinate arrays
98
- # Each face is [x, y, width, height] relative to the INPUT image
99
- faces_raw = []
100
  landmarks = data.get("landmarks", [[]])
101
- if landmarks and landmarks[0]:
102
- faces_raw = landmarks[0]
103
-
104
- # Also check 'faces' key as fallback
105
- if not faces_raw:
106
- faces_raw = data.get("faces", [])
107
-
108
- face_count = len(faces_raw) if faces_raw else 0
109
-
110
- # Debug: log first few frames
111
- if time_sec <= 2.0 or face_count >= 2:
112
- print(f" [face_data] t={time_sec}s: {face_count} faces, img={img_width}x{img_height}, raw={faces_raw[:3] if faces_raw else '[]'}")
113
-
114
- # Parse face bounding boxes
115
- faces = []
116
- for face in faces_raw:
117
- if isinstance(face, (list, tuple)) and len(face) >= 4:
118
- faces.append({
119
- "x": face[0],
120
- "y": face[1],
121
- "w": face[2],
122
- "h": face[3],
123
- "center_x": face[0] + face[2] / 2
124
- })
125
- elif isinstance(face, dict):
126
- fx = face.get("x", 0)
127
- fy = face.get("y", 0)
128
- fw = face.get("width", face.get("w", 0))
129
- fh = face.get("height", face.get("h", 0))
130
- faces.append({
131
- "x": fx, "y": fy, "w": fw, "h": fh,
132
- "center_x": fx + fw / 2
133
- })
134
-
135
  return {
136
  "time": time_sec,
137
  "face_count": face_count,
138
- "faces": faces,
139
- "img_width": img_width,
140
- "img_height": img_height
141
  }
142
  except Exception as e:
143
  print(f"Error fetching face data at {time_sec}s: {e}")
144
 
145
- return {"time": time_sec, "face_count": 0, "faces": [], "img_width": 1920, "img_height": 1080}
146
-
147
-
148
- # ------------------------------------------
149
- # LAYOUT MODES
150
- # ------------------------------------------
151
- LAYOUT_LETTERBOX = "LETTERBOX" # 0 faces - full frame with blurred bars
152
- LAYOUT_SINGLE_TRACK = "SINGLE_TRACK" # 1 face - track and crop on face
153
- LAYOUT_SPLIT_SCREEN = "SPLIT_SCREEN" # 2 faces far apart - top/bottom split
154
- LAYOUT_DUAL_TRACK = "DUAL_TRACK" # 2 faces close together - crop around both
155
- LAYOUT_GROUP_SHOT = "GROUP_SHOT" # 3+ faces - crop to fit group
156
-
157
- # Threshold: if two faces' centers are more than 40% of frame width apart, split screen
158
- FACE_DISTANCE_THRESHOLD = 0.40
159
- # Minimum segment duration in seconds (segments shorter than this get merged)
160
- MIN_SEGMENT_DURATION = 1.5
161
-
162
-
163
- def classify_frame(frame: dict) -> str:
164
- """
165
- Classify a frame into one of the 5 layout modes based on face data.
166
- """
167
- face_count = frame["face_count"]
168
- faces = frame["faces"]
169
- img_width = frame["img_width"]
170
-
171
- if face_count == 0:
172
- return LAYOUT_LETTERBOX
173
-
174
- if face_count == 1:
175
- return LAYOUT_SINGLE_TRACK
176
-
177
- if face_count == 2:
178
- # Check distance between the two faces
179
- if len(faces) >= 2 and img_width > 0:
180
- distance = abs(faces[0]["center_x"] - faces[1]["center_x"])
181
- relative_distance = distance / img_width
182
- if relative_distance > FACE_DISTANCE_THRESHOLD:
183
- return LAYOUT_SPLIT_SCREEN
184
- else:
185
- return LAYOUT_DUAL_TRACK
186
- # Fallback if face position data is missing
187
- return LAYOUT_SPLIT_SCREEN
188
-
189
- # 3+ faces
190
- return LAYOUT_GROUP_SHOT
191
 
192
 
193
- def build_layout_segments(frame_data: List[dict]) -> List[dict]:
194
  """
195
- Build layout segments from classified frame data.
196
- 1. Classify each frame
197
- 2. Group consecutive frames with same mode into segments
198
- 3. Smooth: merge segments shorter than MIN_SEGMENT_DURATION into neighbors
199
  """
200
- if not frame_data:
201
- return []
 
202
 
203
- # Step 1: Classify all frames
204
  for frame in frame_data:
205
- frame["layout"] = classify_frame(frame)
206
-
207
- # Step 2: Group consecutive frames with same layout into segments
208
- raw_segments = []
209
- current_mode = frame_data[0]["layout"]
210
- segment_start = frame_data[0]["time"]
211
-
212
- for i in range(1, len(frame_data)):
213
- if frame_data[i]["layout"] != current_mode:
214
- raw_segments.append({
215
- "start": segment_start,
216
- "end": frame_data[i]["time"],
217
- "mode": current_mode
218
- })
219
- current_mode = frame_data[i]["layout"]
220
- segment_start = frame_data[i]["time"]
221
-
222
- # Close final segment
223
- raw_segments.append({
224
- "start": segment_start,
225
- "end": frame_data[-1]["time"],
226
- "mode": current_mode
227
- })
228
-
229
- # Step 3: Smooth - merge short segments into their neighbors
230
- if len(raw_segments) <= 1:
231
- return raw_segments
232
-
233
- smoothed = [raw_segments[0]]
234
- for seg in raw_segments[1:]:
235
- seg_duration = seg["end"] - seg["start"]
236
- if seg_duration < MIN_SEGMENT_DURATION:
237
- # Merge into previous segment (extend previous)
238
- smoothed[-1]["end"] = seg["end"]
239
  else:
240
- # Check if previous segment is too short, merge it into this one
241
- prev_duration = smoothed[-1]["end"] - smoothed[-1]["start"]
242
- if prev_duration < MIN_SEGMENT_DURATION and len(smoothed) > 1:
243
- # Extend the one before that
244
- smoothed[-2]["end"] = smoothed[-1]["end"]
245
- smoothed[-1] = seg
246
- else:
247
- smoothed.append(seg)
248
 
249
- # Final pass: merge any remaining tiny segments
250
- final = [smoothed[0]]
251
- for seg in smoothed[1:]:
252
- if seg["end"] - seg["start"] < MIN_SEGMENT_DURATION:
253
- final[-1]["end"] = seg["end"]
254
- else:
255
- final.append(seg)
256
 
257
- return final
258
 
259
 
260
- def build_final_url(video_id: str, start_time: float, end_time: float, segments: List[dict]) -> str:
261
  """
262
- Build the final Cloudinary URL with timed overlay layers for each layout segment.
263
-
264
- Base: Full 9:16 video with g_auto:face (handles SINGLE_TRACK natively)
265
 
266
- Overlay layers are added for other modes:
267
- - LETTERBOX: Full-cover layer with c_pad,b_blurred:400:15
268
- - SPLIT_SCREEN: Two layers (g_west top, g_east bottom)
269
- - DUAL_TRACK: Full-cover layer with g_auto:faces
270
- - GROUP_SHOT: Full-cover layer with g_auto:faces
271
 
272
  Important:
273
- - Layers shorter than 1 second are skipped
274
  - eo_X in fl_layer_apply makes layers DISAPPEAR completely (not freeze)
275
- - SINGLE_TRACK segments need no layers (handled by base)
276
  """
277
- # Base transformation: 9:16 vertical with single-face tracking
 
 
278
  base = f"so_{start_time},eo_{end_time}/w_1080,h_1920,c_fill,g_auto:face"
279
 
280
- # Build layers for each non-SINGLE_TRACK segment
281
  layers = []
282
- for segment in segments:
283
  seg_start = segment["start"]
284
  seg_end = segment["end"]
285
  seg_duration = seg_end - seg_start
286
- mode = segment["mode"]
287
-
288
- # SINGLE_TRACK is handled by the base transformation - no layer needed
289
- if mode == LAYOUT_SINGLE_TRACK:
290
- continue
291
 
292
  # Skip segments shorter than 1 second
293
  if seg_duration < 1:
294
  continue
295
 
296
  # Calculate offsets in OUTPUT video timeline
297
- layer_start_offset = seg_start - start_time
298
- layer_end_offset = seg_end - start_time
299
 
300
- if mode == LAYOUT_LETTERBOX:
301
- # Full-cover letterbox: pad to 9:16 with blurred background
302
- letterbox_layer = (
303
- f"l_video:{video_id},"
304
- f"so_{seg_start},eo_{seg_end},du_{seg_duration},"
305
- f"w_1080,h_1920,c_pad,b_blurred:400:15,ac_none/"
306
- f"fl_layer_apply,g_center,so_{layer_start_offset},eo_{layer_end_offset}"
307
- )
308
- layers.append(letterbox_layer)
309
 
310
- elif mode == LAYOUT_SPLIT_SCREEN:
311
- # Top layer - left side of original (g_west)
312
- top_layer = (
313
- f"l_video:{video_id},"
314
- f"so_{seg_start},eo_{seg_end},du_{seg_duration},"
315
- f"w_1080,h_960,c_fill,g_west,ac_none/"
316
- f"fl_layer_apply,g_north,so_{layer_start_offset},eo_{layer_end_offset}"
317
- )
318
- # Bottom layer - right side of original (g_east)
319
- bottom_layer = (
320
- f"l_video:{video_id},"
321
- f"so_{seg_start},eo_{seg_end},du_{seg_duration},"
322
- f"w_1080,h_960,c_fill,g_east,ac_none/"
323
- f"fl_layer_apply,g_south,so_{layer_start_offset},eo_{layer_end_offset}"
324
- )
325
- layers.append(top_layer)
326
- layers.append(bottom_layer)
327
 
328
- elif mode in (LAYOUT_DUAL_TRACK, LAYOUT_GROUP_SHOT):
329
- # Full-cover layer with multi-face tracking (g_auto:faces)
330
- faces_layer = (
331
- f"l_video:{video_id},"
332
- f"so_{seg_start},eo_{seg_end},du_{seg_duration},"
333
- f"w_1080,h_1920,c_fill,g_auto:faces,ac_none/"
334
- f"fl_layer_apply,g_center,so_{layer_start_offset},eo_{layer_end_offset}"
335
- )
336
- layers.append(faces_layer)
337
 
338
  # Combine all parts
339
  if layers:
@@ -359,9 +209,8 @@ async def process_video_async(job_id: str, video_url: str):
359
  Main video processing logic:
360
  1. Parse URL to get video_id and time range
361
  2. Fetch face data for each frame (500ms intervals)
362
- 3. Classify frames into layout modes
363
- 4. Build layout segments with smoothing
364
- 5. Build final URL with timed layers
365
  """
366
  print(f"[{job_id}] Starting job: {video_url}")
367
  JOBS[job_id]["status"] = "processing"
@@ -404,22 +253,14 @@ async def process_video_async(job_id: str, video_url: str):
404
  progress_pct = min(100, int((i + batch_size) / total_frames * 100))
405
  JOBS[job_id]["progress"] = f"Analyzing frames... {progress_pct}%"
406
 
407
- # 3. Classify frames & build layout segments with smoothing
408
- JOBS[job_id]["progress"] = "Classifying layout segments..."
409
- layout_segments = build_layout_segments(frame_data)
410
-
411
- # Log segment breakdown
412
- mode_counts = {}
413
- for seg in layout_segments:
414
- mode = seg["mode"]
415
- mode_counts[mode] = mode_counts.get(mode, 0) + 1
416
- print(f"[{job_id}] Layout segments: {mode_counts}")
417
- for seg in layout_segments:
418
- print(f" {seg['mode']}: {seg['start']}s - {seg['end']}s ({seg['end'] - seg['start']:.1f}s)")
419
 
420
  # 4. Build final URL
421
  JOBS[job_id]["progress"] = "Building final video URL..."
422
- final_url = build_final_url(video_id, start_time, end_time, layout_segments)
423
 
424
  # 5. Complete
425
  JOBS[job_id]["status"] = "completed"
@@ -429,7 +270,7 @@ async def process_video_async(job_id: str, video_url: str):
429
  "video_id": video_id,
430
  "start_time": start_time,
431
  "end_time": end_time,
432
- "layout_segments": layout_segments,
433
  "total_frames_analyzed": total_frames
434
  }
435
  print(f"[{job_id}] Completed: {final_url}")
@@ -670,7 +511,7 @@ def serve_client():
670
  <strong>How it works:</strong><br>
671
  1. Paste your Cloudinary video URL with <code>so_X,du_Y</code> (start time, duration)<br>
672
  2. We analyze each frame for faces (every 500ms)<br>
673
- 3. Smart layout: 👤 Face Track · 🎭 Split Screen · 👥 Dual Track · 👨‍👩‍👧 Group · 📺 Letterbox<br>
674
  4. Get your final 9:16 video URL!
675
  </div>
676
 
@@ -781,28 +622,18 @@ def serve_client():
781
 
782
  function showResults(result) {
783
  const box = document.getElementById('resultBox');
784
- const segments = result.layout_segments || [];
785
-
786
- const modeIcons = {
787
- 'SINGLE_TRACK': '👤 Face Track',
788
- 'SPLIT_SCREEN': '🎭 Split Screen',
789
- 'DUAL_TRACK': '👥 Dual Track',
790
- 'GROUP_SHOT': '👨‍👩‍👧 Group Shot',
791
- 'LETTERBOX': '📺 Letterbox'
792
- };
793
 
794
  let segmentsHtml = '';
795
  if (segments.length > 0) {
796
  segmentsHtml = `
797
  <div class="segments-info">
798
- <strong>🎬 Layout segments:</strong><br>
799
- ${segments.map((s, i) => {
800
- const icon = modeIcons[s.mode] || s.mode;
801
- const dur = (s.end - s.start).toFixed(1);
802
- return `${icon}: ${s.start}s - ${s.end}s (${dur}s)`;
803
- }).join('<br>')}
804
  </div>
805
  `;
 
 
806
  }
807
 
808
  box.innerHTML = `
 
71
  def get_face_info_url(video_id: str, time_sec: float) -> str:
72
  """
73
  Build URL to fetch face data for a specific frame.
74
+ Returns JSON with landmarks when fetched.
 
 
75
  """
76
+ return f"{CLOUDINARY_BASE}/so_{time_sec},f_jpg/c_thumb,g_face,w_450/fl_getinfo/{video_id}.jpg"
77
 
78
 
79
  async def fetch_face_data(client: httpx.AsyncClient, video_id: str, time_sec: float) -> dict:
80
  """
81
  Fetch face detection data for a specific timestamp.
82
+ Returns the number of faces and their positions.
83
  """
84
  url = get_face_info_url(video_id, time_sec)
85
  try:
86
  response = await client.get(url, timeout=10.0)
87
  if response.status_code == 200:
88
  data = response.json()
 
 
 
 
 
 
 
 
 
89
  landmarks = data.get("landmarks", [[]])
90
+ # landmarks[0] is array of face objects
91
+ face_count = len(landmarks[0]) if landmarks and landmarks[0] else 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  return {
93
  "time": time_sec,
94
  "face_count": face_count,
95
+ "landmarks": landmarks[0] if landmarks else []
 
 
96
  }
97
  except Exception as e:
98
  print(f"Error fetching face data at {time_sec}s: {e}")
99
 
100
+ return {"time": time_sec, "face_count": 0, "landmarks": []}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
 
103
+ def find_multi_face_segments(frame_data: List[dict]) -> List[dict]:
104
  """
105
+ Analyze frame data to find segments where 2+ faces are detected.
106
+ Returns list of segments with start/end times.
 
 
107
  """
108
+ segments = []
109
+ in_multi_face = False
110
+ segment_start = None
111
 
 
112
  for frame in frame_data:
113
+ if frame["face_count"] >= 2:
114
+ if not in_multi_face:
115
+ # Start new segment
116
+ in_multi_face = True
117
+ segment_start = frame["time"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  else:
119
+ if in_multi_face:
120
+ # End segment
121
+ in_multi_face = False
122
+ segments.append({
123
+ "start": segment_start,
124
+ "end": frame["time"]
125
+ })
 
126
 
127
+ # Close any open segment
128
+ if in_multi_face and segment_start is not None:
129
+ segments.append({
130
+ "start": segment_start,
131
+ "end": frame_data[-1]["time"] if frame_data else segment_start
132
+ })
 
133
 
134
+ return segments
135
 
136
 
137
+ def build_final_url(video_id: str, start_time: float, end_time: float, multi_face_segments: List[dict]) -> str:
138
  """
139
+ Build the final Cloudinary URL with layers for multi-face segments.
 
 
140
 
141
+ Base: Full 9:16 video with g_auto:face
142
+ Layers: Split-screen overlays during multi-face segments
 
 
 
143
 
144
  Important:
145
+ - Layers shorter than 1 second are ignored
146
  - eo_X in fl_layer_apply makes layers DISAPPEAR completely (not freeze)
 
147
  """
148
+ duration = end_time - start_time
149
+
150
+ # Base transformation: 9:16 vertical with face tracking
151
  base = f"so_{start_time},eo_{end_time}/w_1080,h_1920,c_fill,g_auto:face"
152
 
153
+ # Build layers for each multi-face segment
154
  layers = []
155
+ for segment in multi_face_segments:
156
  seg_start = segment["start"]
157
  seg_end = segment["end"]
158
  seg_duration = seg_end - seg_start
 
 
 
 
 
159
 
160
  # Skip segments shorter than 1 second
161
  if seg_duration < 1:
162
  continue
163
 
164
  # Calculate offsets in OUTPUT video timeline
165
+ layer_start_offset = seg_start - start_time # When layer appears (so_X in fl_layer_apply)
166
+ layer_end_offset = seg_end - start_time # When layer disappears (eo_X in fl_layer_apply)
167
 
168
+ # Top layer - left side of original (g_west)
169
+ # du_X sets the layer video duration, eo_X in fl_layer_apply makes it VANISH at that time
170
+ top_layer = (
171
+ f"l_video:{video_id},"
172
+ f"so_{seg_start},eo_{seg_end},du_{seg_duration},"
173
+ f"w_1080,h_960,c_fill,g_west,ac_none/"
174
+ f"fl_layer_apply,g_north,so_{layer_start_offset},eo_{layer_end_offset}"
175
+ )
 
176
 
177
+ # Bottom layer - right side of original (g_east)
178
+ bottom_layer = (
179
+ f"l_video:{video_id},"
180
+ f"so_{seg_start},eo_{seg_end},du_{seg_duration},"
181
+ f"w_1080,h_960,c_fill,g_east,ac_none/"
182
+ f"fl_layer_apply,g_south,so_{layer_start_offset},eo_{layer_end_offset}"
183
+ )
 
 
 
 
 
 
 
 
 
 
184
 
185
+ layers.append(top_layer)
186
+ layers.append(bottom_layer)
 
 
 
 
 
 
 
187
 
188
  # Combine all parts
189
  if layers:
 
209
  Main video processing logic:
210
  1. Parse URL to get video_id and time range
211
  2. Fetch face data for each frame (500ms intervals)
212
+ 3. Find multi-face segments
213
+ 4. Build final URL with layers
 
214
  """
215
  print(f"[{job_id}] Starting job: {video_url}")
216
  JOBS[job_id]["status"] = "processing"
 
253
  progress_pct = min(100, int((i + batch_size) / total_frames * 100))
254
  JOBS[job_id]["progress"] = f"Analyzing frames... {progress_pct}%"
255
 
256
+ # 3. Find multi-face segments
257
+ JOBS[job_id]["progress"] = "Detecting multi-face segments..."
258
+ multi_face_segments = find_multi_face_segments(frame_data)
259
+ print(f"[{job_id}] Found {len(multi_face_segments)} multi-face segments")
 
 
 
 
 
 
 
 
260
 
261
  # 4. Build final URL
262
  JOBS[job_id]["progress"] = "Building final video URL..."
263
+ final_url = build_final_url(video_id, start_time, end_time, multi_face_segments)
264
 
265
  # 5. Complete
266
  JOBS[job_id]["status"] = "completed"
 
270
  "video_id": video_id,
271
  "start_time": start_time,
272
  "end_time": end_time,
273
+ "multi_face_segments": multi_face_segments,
274
  "total_frames_analyzed": total_frames
275
  }
276
  print(f"[{job_id}] Completed: {final_url}")
 
511
  <strong>How it works:</strong><br>
512
  1. Paste your Cloudinary video URL with <code>so_X,du_Y</code> (start time, duration)<br>
513
  2. We analyze each frame for faces (every 500ms)<br>
514
+ 3. When 2+ faces detected split-screen layout<br>
515
  4. Get your final 9:16 video URL!
516
  </div>
517
 
 
622
 
623
  function showResults(result) {
624
  const box = document.getElementById('resultBox');
625
+ const segments = result.multi_face_segments || [];
 
 
 
 
 
 
 
 
626
 
627
  let segmentsHtml = '';
628
  if (segments.length > 0) {
629
  segmentsHtml = `
630
  <div class="segments-info">
631
+ <strong>🎭 Multi-face segments found:</strong><br>
632
+ ${segments.map((s, i) => `Segment ${i+1}: ${s.start}s - ${s.end}s`).join('<br>')}
 
 
 
 
633
  </div>
634
  `;
635
+ } else {
636
+ segmentsHtml = `<div class="segments-info">No multi-face segments detected (single speaker throughout)</div>`;
637
  }
638
 
639
  box.innerHTML = `
client.html CHANGED
@@ -237,7 +237,7 @@
237
  <strong>How it works:</strong><br>
238
  1. Paste your Cloudinary video URL with <code>so_X,du_Y</code> (start time, duration)<br>
239
  2. We analyze each frame for faces (every 500ms)<br>
240
- 3. Smart layout: 👤 Face Track · 🎭 Split Screen · 👥 Dual Track · 👨‍👩‍👧 Group · 📺 Letterbox<br>
241
  4. Get your final 9:16 video URL!
242
  </div>
243
 
@@ -359,28 +359,18 @@
359
 
360
  function showResults(result) {
361
  const box = document.getElementById('resultBox');
362
- const segments = result.layout_segments || [];
363
-
364
- const modeIcons = {
365
- 'SINGLE_TRACK': '👤 Face Track',
366
- 'SPLIT_SCREEN': '🎭 Split Screen',
367
- 'DUAL_TRACK': '👥 Dual Track',
368
- 'GROUP_SHOT': '👨‍👩‍👧 Group Shot',
369
- 'LETTERBOX': '📺 Letterbox'
370
- };
371
 
372
  let segmentsHtml = '';
373
  if (segments.length > 0) {
374
  segmentsHtml = `
375
  <div class="segments-info">
376
- <strong>🎬 Layout segments:</strong><br>
377
- ${segments.map((s, i) => {
378
- const icon = modeIcons[s.mode] || s.mode;
379
- const dur = (s.end - s.start).toFixed(1);
380
- return `${icon}: ${s.start}s - ${s.end}s (${dur}s)`;
381
- }).join('<br>')}
382
  </div>
383
  `;
 
 
384
  }
385
 
386
  box.innerHTML = `
 
237
  <strong>How it works:</strong><br>
238
  1. Paste your Cloudinary video URL with <code>so_X,du_Y</code> (start time, duration)<br>
239
  2. We analyze each frame for faces (every 500ms)<br>
240
+ 3. When 2+ faces detected split-screen layout<br>
241
  4. Get your final 9:16 video URL!
242
  </div>
243
 
 
359
 
360
  function showResults(result) {
361
  const box = document.getElementById('resultBox');
362
+ const segments = result.multi_face_segments || [];
 
 
 
 
 
 
 
 
363
 
364
  let segmentsHtml = '';
365
  if (segments.length > 0) {
366
  segmentsHtml = `
367
  <div class="segments-info">
368
+ <strong>🎭 Multi-face segments found:</strong><br>
369
+ ${segments.map((s, i) => `Segment ${i + 1}: ${s.start}s - ${s.end}s`).join('<br>')}
 
 
 
 
370
  </div>
371
  `;
372
+ } else {
373
+ segmentsHtml = `<div class="segments-info">No multi-face segments detected (single speaker throughout)</div>`;
374
  }
375
 
376
  box.innerHTML = `