magic_cut / app.py
ADXabhi's picture
Upload app.py
3600cce verified
import os
import re
import uuid
import time
import asyncio
from typing import Dict, List, Optional
from fastapi import FastAPI, BackgroundTasks, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import HTMLResponse
from pydantic import BaseModel
import httpx
# ------------------------------------------
# CONFIGURATION
# ------------------------------------------
def _fetch_cloud_name():
import urllib.request as _ur, json as _j, ssl as _ssl
ctx = _ssl.create_default_context()
req = _ur.Request("https://media.toolxp.org/config", headers={"User-Agent": "Mozilla/5.0"})
for _i in range(3):
try:
with _ur.urlopen(req, timeout=10, context=ctx) as r:
name = _j.loads(r.read().decode())["cloud_name"]
if name:
print(f"[config] cloud_name={name}")
return name
except Exception as _e:
print(f"[config] attempt {_i+1} failed: {_e}")
raise RuntimeError("[config] FATAL: could not fetch cloud_name after 3 attempts")
CLOUD_NAME = _fetch_cloud_name()
# Media proxy hides Cloudinary origin from end-users.
# Route: media.toolxp.org → res.cloudinary.com/doxoms9hd (via Cloudflare Worker)
CLOUDINARY_BASE = f"https://media.toolxp.org/video/upload"
# ------------------------------------------
# IN-MEMORY JOB STORE
# ------------------------------------------
JOBS: Dict[str, dict] = {}
# ------------------------------------------
# APP SETUP
# ------------------------------------------
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
class VideoRequest(BaseModel):
video_url: str
# ------------------------------------------
# URL PARSING HELPERS
# ------------------------------------------
def parse_cloudinary_url(url: str) -> dict:
"""
Parse Cloudinary URL to extract video_id, start_time, and duration.
Expected format: https://res.cloudinary.com/.../so_55,du_30/.../video_id.mp4
or: https://res.cloudinary.com/.../so_55,du_30/fl_getinfo/video_id.jpg
"""
# Extract video ID (last segment before extension)
video_id_match = re.search(r'/([^/]+)\.(mp4|jpg|webm|mov)$', url)
video_id = video_id_match.group(1) if video_id_match else None
# Extract start offset (so_X)
start_match = re.search(r'so_(\d+(?:\.\d+)?)', url)
start_time = float(start_match.group(1)) if start_match else 0
# Extract duration (du_X)
duration_match = re.search(r'du_(\d+(?:\.\d+)?)', url)
duration = float(duration_match.group(1)) if duration_match else 30
return {
"video_id": video_id,
"start_time": start_time,
"duration": duration,
"end_time": start_time + duration
}
def get_face_info_url(video_id: str, time_sec: float) -> str:
"""
Build URL to fetch face data for a specific frame.
Returns JSON with landmarks when fetched.
"""
return f"{CLOUDINARY_BASE}/so_{time_sec},f_jpg/c_thumb,g_face,w_450/fl_getinfo/{video_id}.jpg"
async def fetch_face_data(client: httpx.AsyncClient, video_id: str, time_sec: float) -> dict:
"""
Fetch face detection data for a specific timestamp.
Returns the number of faces, their positions, and source video dimensions.
"""
url = get_face_info_url(video_id, time_sec)
try:
response = await client.get(url, timeout=10.0)
if response.status_code == 200:
data = response.json()
landmarks = data.get("landmarks", [[]])
input_info = data.get("input", {})
# landmarks[0] is array of face objects
face_count = len(landmarks[0]) if landmarks and landmarks[0] else 0
return {
"time": time_sec,
"face_count": face_count,
"landmarks": landmarks[0] if landmarks else [],
"source_w": input_info.get("width", 1920),
"source_h": input_info.get("height", 1080)
}
except Exception as e:
print(f"Error fetching face data at {time_sec}s: {e}")
return {"time": time_sec, "face_count": 0, "landmarks": [], "source_w": 1920, "source_h": 1080}
def find_multi_face_segments(frame_data: List[dict]) -> List[dict]:
"""
Analyze frame data to find segments where 2+ REAL faces are detected.
Ghost face filtering happens HERE (before segment detection), not downstream.
This prevents hands/objects from ever triggering a false split-screen.
Returns list of segments with start/end times and averaged face coordinates.
"""
# Extract source video dimensions from the first frame's API response
source_w = frame_data[0].get("source_w", 1920) if frame_data else 1920
source_h = frame_data[0].get("source_h", 1080) if frame_data else 1080
segments = []
in_multi_face = False
segment_start = None
segment_faces = [] # collect FILTERED face landmarks for calculating averages
for frame in frame_data:
# STEP 1: Extract face centers from raw landmarks
raw_faces = []
for face in frame.get("landmarks", []):
center = _extract_face_center(face)
if center:
raw_faces.append(center)
# STEP 2: Filter out ghost/fake faces BEFORE deciding face count
real_faces = _filter_ghost_faces(raw_faces)
real_face_count = len(real_faces)
if real_face_count >= 2:
if not in_multi_face:
# Start new segment
in_multi_face = True
segment_start = frame["time"]
segment_faces = []
segment_faces.append(frame["landmarks"])
else:
if in_multi_face:
# End segment and calculate averages
in_multi_face = False
left_avg, right_avg = compute_face_crops(segment_faces, source_w, source_h)
segments.append({
"start": segment_start,
"end": frame["time"],
"top_face": left_avg,
"bottom_face": right_avg
})
# Close any open segment
if in_multi_face and segment_start is not None:
left_avg, right_avg = compute_face_crops(segment_faces, source_w, source_h)
segments.append({
"start": segment_start,
"end": frame_data[-1]["time"] if frame_data else segment_start,
"top_face": left_avg,
"bottom_face": right_avg
})
return segments
def _extract_face_center(face: dict) -> dict:
"""
Extract the geometric center (cx, cy_eyes) of a face from Cloudinary landmarks.
Also computes 'span' — the diagonal of the landmark bounding box — used to
detect and reject ghost/fake face detections.
cy_eyes = eye-level Y, which is the most reliable vertical anchor.
Works for both frontal and profile views.
"""
pts = [v for v in face.values() if isinstance(v, dict) and 'x' in v and 'y' in v]
if not pts:
return None
xs = [p['x'] for p in pts]
ys = [p['y'] for p in pts]
cx = sum(xs) / len(xs)
# Use the topmost Y coordinate as the eye-level reference
# (eyes are always the highest landmarks returned)
cy_eyes = min(ys)
# Landmark bounding box diagonal — measures "face size on screen"
# Real faces: 80-300px diagonal. Ghost faces (hands, objects): 10-40px.
span_x = max(xs) - min(xs)
span_y = max(ys) - min(ys)
span = (span_x ** 2 + span_y ** 2) ** 0.5
return {'cx': cx, 'cy_eyes': cy_eyes, 'span': span}
def _filter_ghost_faces(processed_faces: list) -> list:
"""
Filter out ghost/fake face detections from a single frame.
Ghost faces are typically:
- Hands, fingers, or objects misidentified as faces
- Very small landmark span compared to real faces in the same frame
- Landmark span < 40% of the largest face → rejected
- Absolute minimum span of 30px (any face smaller than this is too tiny to be real)
"""
if len(processed_faces) < 2:
return processed_faces
# Find the largest face in this frame
max_span = max(f['span'] for f in processed_faces)
# Reject faces whose span is less than 40% of the largest face
# Also reject faces with absolute span < 30px (too small to be a real face)
MIN_RELATIVE_SPAN = 0.40
MIN_ABSOLUTE_SPAN = 30.0
filtered = [
f for f in processed_faces
if f['span'] >= max_span * MIN_RELATIVE_SPAN and f['span'] >= MIN_ABSOLUTE_SPAN
]
return filtered if filtered else processed_faces[:1] # Always keep at least the biggest face
def compute_face_crops(segment_faces_data: List[List[dict]], source_w: int, source_h: int) -> tuple[dict, dict]:
"""
╔═══════════════════════════════════════════════════════════════╗
║ PROPORTIONAL FACE CROP ALGORITHM ║
║ ║
║ Core principle: Crop SIZE comes from the source video ║
║ dimensions (always proportional). Landmarks are used ║
║ ONLY for positioning (centering on the face). ║
║ ║
║ This ensures consistent framing regardless of whether ║
║ the subject is close-up or far from the camera. ║
╚═══════════════════════════════════════════════════════════════╝
Algorithm steps:
1. Collect face center points from all frames in the segment
2. Filter ghost/fake faces (hands, objects) using landmark span comparison
3. Sort left vs right speaker by horizontal position
4. Average each speaker's position across all frames (temporal smoothing)
5. Calculate crop width = 50% of source video width (standard interview framing)
6. Apply anti-overlap: if faces are close, reduce crop width so boxes don't overlap
7. Force 9:8 aspect ratio (matches 1080x960 layer) so c_fill = pure scale
8. Position: face centered horizontally, eye-level at 35% from top (rule of thirds)
9. Clamp to source video bounds
"""
TARGET_ASPECT = 1080 / 960 # 9:8 = 1.125
BASE_CROP_RATIO = 0.50 # Each speaker gets 50% of source width as base crop
FACE_VERTICAL_POS = 0.35 # Eyes sit at 35% from top of frame (rule of thirds)
# --- STEP 1-3: Collect, filter, and average face centers ---
left_centers = []
right_centers = []
for frame_faces in segment_faces_data:
processed = []
for face in frame_faces:
center = _extract_face_center(face)
if center:
processed.append(center)
# Filter out ghost/fake faces (hands, objects, etc.)
processed = _filter_ghost_faces(processed)
# Sort left-to-right by horizontal position
sorted_faces = sorted(processed, key=lambda f: f['cx'])
if len(sorted_faces) >= 2:
left_centers.append(sorted_faces[0])
right_centers.append(sorted_faces[-1])
# Average positions across all frames (temporal smoothing)
def avg_center(centers, fallback_x, fallback_y):
if not centers:
return fallback_x, fallback_y
cx = sum(c['cx'] for c in centers) / len(centers)
cy = sum(c['cy_eyes'] for c in centers) / len(centers)
return cx, cy
left_cx, left_cy = avg_center(left_centers, source_w * 0.25, source_h * 0.40)
right_cx, right_cy = avg_center(right_centers, source_w * 0.75, source_h * 0.40)
# --- STEP 4: Base crop size from source dimensions ---
crop_w = int(source_w * BASE_CROP_RATIO)
# --- STEP 5: Anti-overlap ---
# If the two faces are close together, reduce crop width so boxes don't overlap
face_gap = abs(right_cx - left_cx)
max_allowed_w = int(face_gap * 0.92) # Leave 8% gap between the two crops
if crop_w > max_allowed_w and max_allowed_w > 200:
crop_w = max_allowed_w
# --- STEP 6: Force 9:8 aspect ratio ---
crop_h = int(crop_w / TARGET_ASPECT)
# Ensure crop height fits within source video
if crop_h > source_h:
crop_h = source_h
crop_w = int(crop_h * TARGET_ASPECT)
# --- STEP 7-8: Position each crop ---
def position_crop(face_cx, face_cy_eyes):
# Center horizontally on the face
x = int(face_cx - crop_w / 2)
# Vertically: place eye-level at 35% from top of crop (rule of thirds)
# This naturally gives correct headroom above and shows shoulders below
y = int(face_cy_eyes - crop_h * FACE_VERTICAL_POS)
# Clamp to source video bounds
x = max(0, min(x, source_w - crop_w))
y = max(0, min(y, source_h - crop_h))
return {"x": x, "y": y, "w": crop_w, "h": crop_h}
left_crop = position_crop(left_cx, left_cy)
right_crop = position_crop(right_cx, right_cy)
return left_crop, right_crop
def build_final_url(video_id: str, start_time: float, end_time: float, multi_face_segments: List[dict]) -> str:
"""
Build the final Cloudinary URL with layers for multi-face segments.
Base: Full 9:16 video with g_auto:face
Layers: Split-screen overlays during multi-face segments using exact face coordinates
"""
duration = end_time - start_time
# Base transformation: 9:16 vertical with face tracking fallback
base = f"so_{start_time},eo_{end_time}/w_1080,h_1920,c_fill,g_auto:face"
# Build layers for each multi-face segment
layers = []
for segment in multi_face_segments:
seg_start = round(segment["start"], 2)
seg_end = round(segment["end"], 2)
seg_duration = round(seg_end - seg_start, 2)
# Skip segments shorter than 1 second
if seg_duration < 1:
continue
# Calculate offsets in OUTPUT video timeline
layer_start_offset = round(seg_start - start_time, 2)
layer_end_offset = round(seg_end - start_time, 2)
# Use our pre-calculated bounding boxes
t_face = segment.get("top_face", {"x": 0, "y": 0, "w": 300, "h": 300})
b_face = segment.get("bottom_face", {"x": 0, "y": 0, "w": 300, "h": 300})
# Top layer - left speaker
# 1. c_crop extracts just their face box
# 2. c_fill scales that tight box strictly up/down to 1080x960
top_layer = (
f"l_video:{video_id},"
f"so_{seg_start},eo_{seg_end},du_{seg_duration},ac_none/"
f"c_crop,w_{t_face['w']},h_{t_face['h']},x_{t_face['x']},y_{t_face['y']}/"
f"c_fill,w_1080,h_960/"
f"fl_layer_apply,g_north,so_{layer_start_offset},eo_{layer_end_offset}"
)
# Bottom layer - right speaker
bottom_layer = (
f"l_video:{video_id},"
f"so_{seg_start},eo_{seg_end},du_{seg_duration},ac_none/"
f"c_crop,w_{b_face['w']},h_{b_face['h']},x_{b_face['x']},y_{b_face['y']}/"
f"c_fill,w_1080,h_960/"
f"fl_layer_apply,g_south,so_{layer_start_offset},eo_{layer_end_offset}"
)
layers.append(top_layer)
layers.append(bottom_layer)
# Combine all parts
if layers:
transformations = f"{base}/{'/'.join(layers)}"
else:
transformations = f"{base}"
return f"{CLOUDINARY_BASE}/{transformations}/{video_id}.mp4"
# ------------------------------------------
# BACKGROUND WORKER
# ------------------------------------------
def process_video_sync(job_id: str, video_url: str):
"""
Synchronous wrapper for async processing.
"""
asyncio.run(process_video_async(job_id, video_url))
async def process_video_async(job_id: str, video_url: str):
"""
Main video processing logic:
1. Parse URL to get video_id and time range
2. Fetch face data for each frame (500ms intervals)
3. Find multi-face segments
4. Build final URL with layers
"""
print(f"[{job_id}] Starting job: {video_url}")
JOBS[job_id]["status"] = "processing"
JOBS[job_id]["progress"] = "Parsing video URL..."
try:
# 1. Parse URL
parsed = parse_cloudinary_url(video_url)
video_id = parsed["video_id"]
start_time = parsed["start_time"]
end_time = parsed["end_time"]
duration = parsed["duration"]
if not video_id:
raise Exception("Could not extract video ID from URL")
JOBS[job_id]["progress"] = f"Analyzing {duration}s of video..."
print(f"[{job_id}] Video: {video_id}, Range: {start_time}s - {end_time}s")
# 2. Fetch face data for each frame (500ms intervals)
frame_times = []
t = start_time
while t <= end_time:
frame_times.append(round(t, 1))
t += 0.5
total_frames = len(frame_times)
JOBS[job_id]["progress"] = f"Fetching face data for {total_frames} frames..."
frame_data = []
async with httpx.AsyncClient() as client:
# Process in batches of 10 to avoid overwhelming the API
batch_size = 10
for i in range(0, len(frame_times), batch_size):
batch = frame_times[i:i + batch_size]
tasks = [fetch_face_data(client, video_id, t) for t in batch]
results = await asyncio.gather(*tasks)
frame_data.extend(results)
progress_pct = min(100, int((i + batch_size) / total_frames * 100))
JOBS[job_id]["progress"] = f"Analyzing frames... {progress_pct}%"
# 3. Find multi-face segments
JOBS[job_id]["progress"] = "Detecting multi-face segments..."
multi_face_segments = find_multi_face_segments(frame_data)
print(f"[{job_id}] Found {len(multi_face_segments)} multi-face segments")
# 4. Build final URL
JOBS[job_id]["progress"] = "Building final video URL..."
final_url = build_final_url(video_id, start_time, end_time, multi_face_segments)
# 5. Complete
JOBS[job_id]["status"] = "completed"
JOBS[job_id]["progress"] = "Done"
JOBS[job_id]["result"] = {
"video_url": final_url,
"video_id": video_id,
"start_time": start_time,
"end_time": end_time,
"multi_face_segments": multi_face_segments,
"total_frames_analyzed": total_frames
}
print(f"[{job_id}] Completed: {final_url}")
except Exception as e:
print(f"[{job_id}] FAILED: {str(e)}")
JOBS[job_id]["status"] = "failed"
JOBS[job_id]["error"] = str(e)
JOBS[job_id]["progress"] = "Failed"
# ------------------------------------------
# API ENDPOINTS
# ------------------------------------------
@app.post("/jobs")
def submit_job(req: VideoRequest, background_tasks: BackgroundTasks):
job_id = str(uuid.uuid4())
JOBS[job_id] = {
"status": "queued",
"progress": "Waiting in queue...",
"result": None,
"error": None,
"created_at": time.time()
}
background_tasks.add_task(process_video_sync, job_id, req.video_url)
return {"job_id": job_id, "status": "queued"}
@app.get("/jobs/{job_id}")
def get_job_status(job_id: str):
job = JOBS.get(job_id)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
return job
@app.get("/")
def home():
return {"message": "Magic Cut API is Running", "version": "1.0"}
@app.get("/client", response_class=HTMLResponse)
def serve_client():
"""Serve the embedded HTML client."""
html_content = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Magic Cut - Video Face Splitter</title>
<style>
:root {
--primary: #a855f7;
--bg: #0f0f1a;
--surface: #1a1a2e;
--text: #f3f4f6;
}
body {
font-family: 'Inter', system-ui, sans-serif;
background: var(--bg);
color: var(--text);
display: flex;
justify-content: center;
align-items: center;
min-height: 100vh;
margin: 0;
padding: 1rem;
}
.container {
background: var(--surface);
padding: 2rem;
border-radius: 16px;
width: 100%;
max-width: 600px;
box-shadow: 0 20px 40px rgba(0,0,0,0.4);
border: 1px solid #2a2a4a;
}
h2 {
margin-top: 0;
text-align: center;
background: linear-gradient(135deg, #a855f7, #ec4899);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
font-size: 1.8rem;
}
h4 {
margin: 0;
color: #9ca3af;
text-align: center;
font-weight: 400;
margin-bottom: 1.5rem;
}
.form-group {
margin-bottom: 1.5rem;
}
label {
display: block;
margin-bottom: 0.5rem;
font-size: 0.9rem;
color: #d1d5db;
}
input, textarea {
width: 100%;
padding: 0.75rem;
background: #0f0f1a;
border: 1px solid #374151;
border-radius: 8px;
color: white;
box-sizing: border-box;
font-family: inherit;
}
input:focus, textarea:focus {
outline: 2px solid var(--primary);
border-color: transparent;
}
button {
width: 100%;
padding: 0.875rem;
background: linear-gradient(135deg, #a855f7, #ec4899);
color: white;
border: none;
border-radius: 8px;
font-weight: 700;
cursor: pointer;
transition: all 0.2s;
font-size: 1rem;
}
button:hover {
transform: translateY(-2px);
box-shadow: 0 10px 20px rgba(168, 85, 247, 0.3);
}
button:disabled {
opacity: 0.5;
cursor: not-allowed;
transform: none;
box-shadow: none;
}
#statusBox {
margin-top: 2rem;
display: none;
background: #0f0f1a;
padding: 1.5rem;
border-radius: 12px;
border: 1px solid #374151;
}
.status-badge {
display: inline-block;
padding: 6px 14px;
border-radius: 99px;
font-size: 0.8rem;
font-weight: 600;
margin-bottom: 1rem;
}
.status-badge.queued { background: #f59e0b; color: black; }
.status-badge.processing { background: #3b82f6; color: white; }
.status-badge.completed { background: #10b981; color: black; }
.status-badge.failed { background: #ef4444; color: white; }
#progressText {
color: #d1d5db;
margin-bottom: 1rem;
font-size: 0.95rem;
}
.result-box {
background: #1a1a2e;
padding: 1rem;
border-radius: 8px;
margin-top: 1rem;
}
.result-url {
word-break: break-all;
font-size: 0.85rem;
color: var(--primary);
margin-bottom: 0.5rem;
}
.copy-btn {
background: #374151;
border: none;
color: white;
padding: 8px 16px;
border-radius: 6px;
cursor: pointer;
font-size: 0.85rem;
width: auto;
margin-top: 0.5rem;
}
.copy-btn:hover {
background: #4b5563;
transform: none;
box-shadow: none;
}
.spinner {
border: 4px solid #374151;
border-top: 4px solid var(--primary);
border-radius: 50%;
width: 30px;
height: 30px;
animation: spin 1s linear infinite;
margin: 0 auto 1rem auto;
display: none;
}
@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
}
.info-box {
background: rgba(168, 85, 247, 0.1);
border: 1px solid rgba(168, 85, 247, 0.3);
border-radius: 8px;
padding: 1rem;
margin-bottom: 1.5rem;
font-size: 0.85rem;
color: #d1d5db;
}
.segments-info {
margin-top: 1rem;
font-size: 0.85rem;
color: #9ca3af;
}
video {
width: 100%;
max-height: 400px;
border-radius: 8px;
margin-top: 1rem;
}
</style>
</head>
<body>
<div class="container">
<h2>✂️ Magic Cut</h2>
<h4>Transform 16:9 videos into vertical shorts with face tracking</h4>
<div class="info-box">
<strong>How it works:</strong><br>
1. Paste your Cloudinary video URL with <code>so_X,du_Y</code> (start time, duration)<br>
2. We analyze each frame for faces (every 500ms)<br>
3. When 2+ faces detected → split-screen layout<br>
4. Get your final 9:16 video URL!
</div>
<div class="form-group">
<label>Cloudinary Video URL</label>
<textarea id="videoUrl" rows="3" placeholder="https://res.cloudinary.com/doxoms9hd/video/upload/so_55,du_30/fl_getinfo/video_id.jpg"></textarea>
<small style="color: #6b7280; display: block; margin-top: 4px;">
Format: so_X,du_Y (start at X seconds, duration Y seconds)
</small>
</div>
<button id="processBtn" onclick="submitJob()">🎬 Process Video</button>
<div id="statusBox">
<div id="spinner" class="spinner"></div>
<span id="statusBadge" class="status-badge">Waiting</span>
<div id="progressText">Initializing...</div>
<div id="resultBox"></div>
</div>
</div>
<script>
const API_BASE = window.location.origin;
let pollInterval = null;
async function submitJob() {
const videoUrl = document.getElementById('videoUrl').value.trim();
const btn = document.getElementById('processBtn');
const statusBox = document.getElementById('statusBox');
if (!videoUrl) {
alert("Please enter a video URL");
return;
}
btn.disabled = true;
statusBox.style.display = 'block';
document.getElementById('resultBox').innerHTML = '';
updateStatus("queued", "Submitting job...");
try {
const response = await fetch(`${API_BASE}/jobs`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ video_url: videoUrl })
});
const data = await response.json();
if (data.job_id) {
console.log("Job Submitted:", data.job_id);
startPolling(data.job_id);
} else {
updateStatus("failed", "Failed to get Job ID");
btn.disabled = false;
}
} catch (error) {
console.error(error);
updateStatus("failed", "Connection Error. Check URL.");
btn.disabled = false;
}
}
function startPolling(jobId) {
if (pollInterval) clearInterval(pollInterval);
pollInterval = setInterval(async () => {
try {
const res = await fetch(`${API_BASE}/jobs/${jobId}`);
const job = await res.json();
updateStatus(job.status, job.progress);
if (job.status === 'completed') {
clearInterval(pollInterval);
showResults(job.result);
document.getElementById('processBtn').disabled = false;
}
if (job.status === 'failed') {
clearInterval(pollInterval);
document.getElementById('progressText').innerText = "Error: " + job.error;
document.getElementById('processBtn').disabled = false;
}
} catch (e) {
console.error("Polling error", e);
}
}, 2000);
}
function updateStatus(status, message) {
const badge = document.getElementById('statusBadge');
const spinner = document.getElementById('spinner');
const text = document.getElementById('progressText');
badge.className = `status-badge ${status}`;
badge.innerText = status.toUpperCase();
text.innerText = message || "Processing...";
if (status === 'processing' || status === 'queued') {
spinner.style.display = 'block';
} else {
spinner.style.display = 'none';
}
}
function showResults(result) {
const box = document.getElementById('resultBox');
const segments = result.multi_face_segments || [];
let segmentsHtml = '';
if (segments.length > 0) {
segmentsHtml = `
<div class="segments-info">
<strong>🎭 Multi-face segments found:</strong><br>
${segments.map((s, i) => `Segment ${i+1}: ${s.start}s - ${s.end}s`).join('<br>')}
</div>
`;
} else {
segmentsHtml = `<div class="segments-info">No multi-face segments detected (single speaker throughout)</div>`;
}
box.innerHTML = `
<div class="result-box">
<div style="margin-bottom: 0.5rem; color: #10b981; font-weight: 600;">✅ Video Ready!</div>
<div class="result-url">${result.video_url}</div>
<button class="copy-btn" onclick="navigator.clipboard.writeText('${result.video_url}').then(() => this.innerText = 'Copied!')">
📋 Copy URL
</button>
${segmentsHtml}
<div class="segments-info">
<strong>📊 Stats:</strong> ${result.total_frames_analyzed} frames analyzed
</div>
<video controls src="${result.video_url}"></video>
</div>
`;
}
</script>
</body>
</html>
"""
return html_content