| import os |
| import re |
| import uuid |
| import time |
| import asyncio |
| from typing import Dict, List, Optional |
| from fastapi import FastAPI, BackgroundTasks, HTTPException |
| from fastapi.middleware.cors import CORSMiddleware |
| from fastapi.responses import HTMLResponse |
| from pydantic import BaseModel |
| import httpx |
|
|
| |
| |
| |
| def _fetch_cloud_name(): |
| import urllib.request as _ur, json as _j, ssl as _ssl |
| ctx = _ssl.create_default_context() |
| req = _ur.Request("https://media.toolxp.org/config", headers={"User-Agent": "Mozilla/5.0"}) |
| for _i in range(3): |
| try: |
| with _ur.urlopen(req, timeout=10, context=ctx) as r: |
| name = _j.loads(r.read().decode())["cloud_name"] |
| if name: |
| print(f"[config] cloud_name={name}") |
| return name |
| except Exception as _e: |
| print(f"[config] attempt {_i+1} failed: {_e}") |
| raise RuntimeError("[config] FATAL: could not fetch cloud_name after 3 attempts") |
| CLOUD_NAME = _fetch_cloud_name() |
| |
| |
| CLOUDINARY_BASE = f"https://media.toolxp.org/video/upload" |
|
|
| |
| |
| |
| JOBS: Dict[str, dict] = {} |
|
|
| |
| |
| |
| app = FastAPI() |
|
|
| app.add_middleware( |
| CORSMiddleware, |
| allow_origins=["*"], |
| allow_credentials=True, |
| allow_methods=["*"], |
| allow_headers=["*"], |
| ) |
|
|
|
|
| class VideoRequest(BaseModel): |
| video_url: str |
|
|
|
|
| |
| |
| |
| def parse_cloudinary_url(url: str) -> dict: |
| """ |
| Parse Cloudinary URL to extract video_id, start_time, and duration. |
| Expected format: https://res.cloudinary.com/.../so_55,du_30/.../video_id.mp4 |
| or: https://res.cloudinary.com/.../so_55,du_30/fl_getinfo/video_id.jpg |
| """ |
| |
| video_id_match = re.search(r'/([^/]+)\.(mp4|jpg|webm|mov)$', url) |
| video_id = video_id_match.group(1) if video_id_match else None |
| |
| |
| start_match = re.search(r'so_(\d+(?:\.\d+)?)', url) |
| start_time = float(start_match.group(1)) if start_match else 0 |
| |
| |
| duration_match = re.search(r'du_(\d+(?:\.\d+)?)', url) |
| duration = float(duration_match.group(1)) if duration_match else 30 |
| |
| return { |
| "video_id": video_id, |
| "start_time": start_time, |
| "duration": duration, |
| "end_time": start_time + duration |
| } |
|
|
|
|
| def get_face_info_url(video_id: str, time_sec: float) -> str: |
| """ |
| Build URL to fetch face data for a specific frame. |
| Returns JSON with landmarks when fetched. |
| """ |
| return f"{CLOUDINARY_BASE}/so_{time_sec},f_jpg/c_thumb,g_face,w_450/fl_getinfo/{video_id}.jpg" |
|
|
|
|
| async def fetch_face_data(client: httpx.AsyncClient, video_id: str, time_sec: float) -> dict: |
| """ |
| Fetch face detection data for a specific timestamp. |
| Returns the number of faces, their positions, and source video dimensions. |
| """ |
| url = get_face_info_url(video_id, time_sec) |
| try: |
| response = await client.get(url, timeout=10.0) |
| if response.status_code == 200: |
| data = response.json() |
| landmarks = data.get("landmarks", [[]]) |
| input_info = data.get("input", {}) |
| |
| face_count = len(landmarks[0]) if landmarks and landmarks[0] else 0 |
| return { |
| "time": time_sec, |
| "face_count": face_count, |
| "landmarks": landmarks[0] if landmarks else [], |
| "source_w": input_info.get("width", 1920), |
| "source_h": input_info.get("height", 1080) |
| } |
| except Exception as e: |
| print(f"Error fetching face data at {time_sec}s: {e}") |
| |
| return {"time": time_sec, "face_count": 0, "landmarks": [], "source_w": 1920, "source_h": 1080} |
|
|
|
|
| def find_multi_face_segments(frame_data: List[dict]) -> List[dict]: |
| """ |
| Analyze frame data to find segments where 2+ REAL faces are detected. |
| |
| Ghost face filtering happens HERE (before segment detection), not downstream. |
| This prevents hands/objects from ever triggering a false split-screen. |
| |
| Returns list of segments with start/end times and averaged face coordinates. |
| """ |
| |
| source_w = frame_data[0].get("source_w", 1920) if frame_data else 1920 |
| source_h = frame_data[0].get("source_h", 1080) if frame_data else 1080 |
| |
| segments = [] |
| in_multi_face = False |
| segment_start = None |
| segment_faces = [] |
| |
| for frame in frame_data: |
| |
| raw_faces = [] |
| for face in frame.get("landmarks", []): |
| center = _extract_face_center(face) |
| if center: |
| raw_faces.append(center) |
| |
| |
| real_faces = _filter_ghost_faces(raw_faces) |
| real_face_count = len(real_faces) |
| |
| if real_face_count >= 2: |
| if not in_multi_face: |
| |
| in_multi_face = True |
| segment_start = frame["time"] |
| segment_faces = [] |
| segment_faces.append(frame["landmarks"]) |
| else: |
| if in_multi_face: |
| |
| in_multi_face = False |
| left_avg, right_avg = compute_face_crops(segment_faces, source_w, source_h) |
| segments.append({ |
| "start": segment_start, |
| "end": frame["time"], |
| "top_face": left_avg, |
| "bottom_face": right_avg |
| }) |
| |
| |
| if in_multi_face and segment_start is not None: |
| left_avg, right_avg = compute_face_crops(segment_faces, source_w, source_h) |
| segments.append({ |
| "start": segment_start, |
| "end": frame_data[-1]["time"] if frame_data else segment_start, |
| "top_face": left_avg, |
| "bottom_face": right_avg |
| }) |
| |
| return segments |
|
|
|
|
| def _extract_face_center(face: dict) -> dict: |
| """ |
| Extract the geometric center (cx, cy_eyes) of a face from Cloudinary landmarks. |
| Also computes 'span' — the diagonal of the landmark bounding box — used to |
| detect and reject ghost/fake face detections. |
| |
| cy_eyes = eye-level Y, which is the most reliable vertical anchor. |
| Works for both frontal and profile views. |
| """ |
| pts = [v for v in face.values() if isinstance(v, dict) and 'x' in v and 'y' in v] |
| if not pts: |
| return None |
| |
| xs = [p['x'] for p in pts] |
| ys = [p['y'] for p in pts] |
| |
| cx = sum(xs) / len(xs) |
| |
| |
| |
| cy_eyes = min(ys) |
| |
| |
| |
| span_x = max(xs) - min(xs) |
| span_y = max(ys) - min(ys) |
| span = (span_x ** 2 + span_y ** 2) ** 0.5 |
| |
| return {'cx': cx, 'cy_eyes': cy_eyes, 'span': span} |
|
|
|
|
| def _filter_ghost_faces(processed_faces: list) -> list: |
| """ |
| Filter out ghost/fake face detections from a single frame. |
| |
| Ghost faces are typically: |
| - Hands, fingers, or objects misidentified as faces |
| - Very small landmark span compared to real faces in the same frame |
| - Landmark span < 40% of the largest face → rejected |
| - Absolute minimum span of 30px (any face smaller than this is too tiny to be real) |
| """ |
| if len(processed_faces) < 2: |
| return processed_faces |
| |
| |
| max_span = max(f['span'] for f in processed_faces) |
| |
| |
| |
| MIN_RELATIVE_SPAN = 0.40 |
| MIN_ABSOLUTE_SPAN = 30.0 |
| |
| filtered = [ |
| f for f in processed_faces |
| if f['span'] >= max_span * MIN_RELATIVE_SPAN and f['span'] >= MIN_ABSOLUTE_SPAN |
| ] |
| |
| return filtered if filtered else processed_faces[:1] |
|
|
|
|
| def compute_face_crops(segment_faces_data: List[List[dict]], source_w: int, source_h: int) -> tuple[dict, dict]: |
| """ |
| ╔═══════════════════════════════════════════════════════════════╗ |
| ║ PROPORTIONAL FACE CROP ALGORITHM ║ |
| ║ ║ |
| ║ Core principle: Crop SIZE comes from the source video ║ |
| ║ dimensions (always proportional). Landmarks are used ║ |
| ║ ONLY for positioning (centering on the face). ║ |
| ║ ║ |
| ║ This ensures consistent framing regardless of whether ║ |
| ║ the subject is close-up or far from the camera. ║ |
| ╚═══════════════════════════════════════════════════════════════╝ |
| |
| Algorithm steps: |
| 1. Collect face center points from all frames in the segment |
| 2. Filter ghost/fake faces (hands, objects) using landmark span comparison |
| 3. Sort left vs right speaker by horizontal position |
| 4. Average each speaker's position across all frames (temporal smoothing) |
| 5. Calculate crop width = 50% of source video width (standard interview framing) |
| 6. Apply anti-overlap: if faces are close, reduce crop width so boxes don't overlap |
| 7. Force 9:8 aspect ratio (matches 1080x960 layer) so c_fill = pure scale |
| 8. Position: face centered horizontally, eye-level at 35% from top (rule of thirds) |
| 9. Clamp to source video bounds |
| """ |
| TARGET_ASPECT = 1080 / 960 |
| BASE_CROP_RATIO = 0.50 |
| FACE_VERTICAL_POS = 0.35 |
| |
| |
| left_centers = [] |
| right_centers = [] |
| |
| for frame_faces in segment_faces_data: |
| processed = [] |
| for face in frame_faces: |
| center = _extract_face_center(face) |
| if center: |
| processed.append(center) |
| |
| |
| processed = _filter_ghost_faces(processed) |
| |
| |
| sorted_faces = sorted(processed, key=lambda f: f['cx']) |
| if len(sorted_faces) >= 2: |
| left_centers.append(sorted_faces[0]) |
| right_centers.append(sorted_faces[-1]) |
| |
| |
| def avg_center(centers, fallback_x, fallback_y): |
| if not centers: |
| return fallback_x, fallback_y |
| cx = sum(c['cx'] for c in centers) / len(centers) |
| cy = sum(c['cy_eyes'] for c in centers) / len(centers) |
| return cx, cy |
| |
| left_cx, left_cy = avg_center(left_centers, source_w * 0.25, source_h * 0.40) |
| right_cx, right_cy = avg_center(right_centers, source_w * 0.75, source_h * 0.40) |
| |
| |
| crop_w = int(source_w * BASE_CROP_RATIO) |
| |
| |
| |
| face_gap = abs(right_cx - left_cx) |
| max_allowed_w = int(face_gap * 0.92) |
| if crop_w > max_allowed_w and max_allowed_w > 200: |
| crop_w = max_allowed_w |
| |
| |
| crop_h = int(crop_w / TARGET_ASPECT) |
| |
| |
| if crop_h > source_h: |
| crop_h = source_h |
| crop_w = int(crop_h * TARGET_ASPECT) |
| |
| |
| def position_crop(face_cx, face_cy_eyes): |
| |
| x = int(face_cx - crop_w / 2) |
| |
| |
| |
| y = int(face_cy_eyes - crop_h * FACE_VERTICAL_POS) |
| |
| |
| x = max(0, min(x, source_w - crop_w)) |
| y = max(0, min(y, source_h - crop_h)) |
| |
| return {"x": x, "y": y, "w": crop_w, "h": crop_h} |
| |
| left_crop = position_crop(left_cx, left_cy) |
| right_crop = position_crop(right_cx, right_cy) |
| |
| return left_crop, right_crop |
|
|
|
|
|
|
| def build_final_url(video_id: str, start_time: float, end_time: float, multi_face_segments: List[dict]) -> str: |
| """ |
| Build the final Cloudinary URL with layers for multi-face segments. |
| |
| Base: Full 9:16 video with g_auto:face |
| Layers: Split-screen overlays during multi-face segments using exact face coordinates |
| """ |
| duration = end_time - start_time |
| |
| |
| base = f"so_{start_time},eo_{end_time}/w_1080,h_1920,c_fill,g_auto:face" |
| |
| |
| layers = [] |
| for segment in multi_face_segments: |
| seg_start = round(segment["start"], 2) |
| seg_end = round(segment["end"], 2) |
| seg_duration = round(seg_end - seg_start, 2) |
| |
| |
| if seg_duration < 1: |
| continue |
| |
| |
| layer_start_offset = round(seg_start - start_time, 2) |
| layer_end_offset = round(seg_end - start_time, 2) |
| |
| |
| t_face = segment.get("top_face", {"x": 0, "y": 0, "w": 300, "h": 300}) |
| b_face = segment.get("bottom_face", {"x": 0, "y": 0, "w": 300, "h": 300}) |
| |
| |
| |
| |
| top_layer = ( |
| f"l_video:{video_id}," |
| f"so_{seg_start},eo_{seg_end},du_{seg_duration},ac_none/" |
| f"c_crop,w_{t_face['w']},h_{t_face['h']},x_{t_face['x']},y_{t_face['y']}/" |
| f"c_fill,w_1080,h_960/" |
| f"fl_layer_apply,g_north,so_{layer_start_offset},eo_{layer_end_offset}" |
| ) |
| |
| |
| bottom_layer = ( |
| f"l_video:{video_id}," |
| f"so_{seg_start},eo_{seg_end},du_{seg_duration},ac_none/" |
| f"c_crop,w_{b_face['w']},h_{b_face['h']},x_{b_face['x']},y_{b_face['y']}/" |
| f"c_fill,w_1080,h_960/" |
| f"fl_layer_apply,g_south,so_{layer_start_offset},eo_{layer_end_offset}" |
| ) |
| |
| layers.append(top_layer) |
| layers.append(bottom_layer) |
| |
| |
| if layers: |
| transformations = f"{base}/{'/'.join(layers)}" |
| else: |
| transformations = f"{base}" |
| |
| return f"{CLOUDINARY_BASE}/{transformations}/{video_id}.mp4" |
|
|
|
|
| |
| |
| |
| def process_video_sync(job_id: str, video_url: str): |
| """ |
| Synchronous wrapper for async processing. |
| """ |
| asyncio.run(process_video_async(job_id, video_url)) |
|
|
|
|
| async def process_video_async(job_id: str, video_url: str): |
| """ |
| Main video processing logic: |
| 1. Parse URL to get video_id and time range |
| 2. Fetch face data for each frame (500ms intervals) |
| 3. Find multi-face segments |
| 4. Build final URL with layers |
| """ |
| print(f"[{job_id}] Starting job: {video_url}") |
| JOBS[job_id]["status"] = "processing" |
| JOBS[job_id]["progress"] = "Parsing video URL..." |
| |
| try: |
| |
| parsed = parse_cloudinary_url(video_url) |
| video_id = parsed["video_id"] |
| start_time = parsed["start_time"] |
| end_time = parsed["end_time"] |
| duration = parsed["duration"] |
| |
| if not video_id: |
| raise Exception("Could not extract video ID from URL") |
| |
| JOBS[job_id]["progress"] = f"Analyzing {duration}s of video..." |
| print(f"[{job_id}] Video: {video_id}, Range: {start_time}s - {end_time}s") |
| |
| |
| frame_times = [] |
| t = start_time |
| while t <= end_time: |
| frame_times.append(round(t, 1)) |
| t += 0.5 |
| |
| total_frames = len(frame_times) |
| JOBS[job_id]["progress"] = f"Fetching face data for {total_frames} frames..." |
| |
| frame_data = [] |
| async with httpx.AsyncClient() as client: |
| |
| batch_size = 10 |
| for i in range(0, len(frame_times), batch_size): |
| batch = frame_times[i:i + batch_size] |
| tasks = [fetch_face_data(client, video_id, t) for t in batch] |
| results = await asyncio.gather(*tasks) |
| frame_data.extend(results) |
| |
| progress_pct = min(100, int((i + batch_size) / total_frames * 100)) |
| JOBS[job_id]["progress"] = f"Analyzing frames... {progress_pct}%" |
| |
| |
| JOBS[job_id]["progress"] = "Detecting multi-face segments..." |
| multi_face_segments = find_multi_face_segments(frame_data) |
| print(f"[{job_id}] Found {len(multi_face_segments)} multi-face segments") |
| |
| |
| JOBS[job_id]["progress"] = "Building final video URL..." |
| final_url = build_final_url(video_id, start_time, end_time, multi_face_segments) |
| |
| |
| JOBS[job_id]["status"] = "completed" |
| JOBS[job_id]["progress"] = "Done" |
| JOBS[job_id]["result"] = { |
| "video_url": final_url, |
| "video_id": video_id, |
| "start_time": start_time, |
| "end_time": end_time, |
| "multi_face_segments": multi_face_segments, |
| "total_frames_analyzed": total_frames |
| } |
| print(f"[{job_id}] Completed: {final_url}") |
| |
| except Exception as e: |
| print(f"[{job_id}] FAILED: {str(e)}") |
| JOBS[job_id]["status"] = "failed" |
| JOBS[job_id]["error"] = str(e) |
| JOBS[job_id]["progress"] = "Failed" |
|
|
|
|
| |
| |
| |
|
|
| @app.post("/jobs") |
| def submit_job(req: VideoRequest, background_tasks: BackgroundTasks): |
| job_id = str(uuid.uuid4()) |
| |
| JOBS[job_id] = { |
| "status": "queued", |
| "progress": "Waiting in queue...", |
| "result": None, |
| "error": None, |
| "created_at": time.time() |
| } |
| |
| background_tasks.add_task(process_video_sync, job_id, req.video_url) |
| |
| return {"job_id": job_id, "status": "queued"} |
|
|
|
|
| @app.get("/jobs/{job_id}") |
| def get_job_status(job_id: str): |
| job = JOBS.get(job_id) |
| if not job: |
| raise HTTPException(status_code=404, detail="Job not found") |
| return job |
|
|
|
|
| @app.get("/") |
| def home(): |
| return {"message": "Magic Cut API is Running", "version": "1.0"} |
|
|
|
|
| @app.get("/client", response_class=HTMLResponse) |
| def serve_client(): |
| """Serve the embedded HTML client.""" |
| html_content = """ |
| <!DOCTYPE html> |
| <html lang="en"> |
| <head> |
| <meta charset="UTF-8"> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| <title>Magic Cut - Video Face Splitter</title> |
| <style> |
| :root { |
| --primary: #a855f7; |
| --bg: #0f0f1a; |
| --surface: #1a1a2e; |
| --text: #f3f4f6; |
| } |
| body { |
| font-family: 'Inter', system-ui, sans-serif; |
| background: var(--bg); |
| color: var(--text); |
| display: flex; |
| justify-content: center; |
| align-items: center; |
| min-height: 100vh; |
| margin: 0; |
| padding: 1rem; |
| } |
| .container { |
| background: var(--surface); |
| padding: 2rem; |
| border-radius: 16px; |
| width: 100%; |
| max-width: 600px; |
| box-shadow: 0 20px 40px rgba(0,0,0,0.4); |
| border: 1px solid #2a2a4a; |
| } |
| h2 { |
| margin-top: 0; |
| text-align: center; |
| background: linear-gradient(135deg, #a855f7, #ec4899); |
| -webkit-background-clip: text; |
| -webkit-text-fill-color: transparent; |
| font-size: 1.8rem; |
| } |
| h4 { |
| margin: 0; |
| color: #9ca3af; |
| text-align: center; |
| font-weight: 400; |
| margin-bottom: 1.5rem; |
| } |
| .form-group { |
| margin-bottom: 1.5rem; |
| } |
| label { |
| display: block; |
| margin-bottom: 0.5rem; |
| font-size: 0.9rem; |
| color: #d1d5db; |
| } |
| input, textarea { |
| width: 100%; |
| padding: 0.75rem; |
| background: #0f0f1a; |
| border: 1px solid #374151; |
| border-radius: 8px; |
| color: white; |
| box-sizing: border-box; |
| font-family: inherit; |
| } |
| input:focus, textarea:focus { |
| outline: 2px solid var(--primary); |
| border-color: transparent; |
| } |
| button { |
| width: 100%; |
| padding: 0.875rem; |
| background: linear-gradient(135deg, #a855f7, #ec4899); |
| color: white; |
| border: none; |
| border-radius: 8px; |
| font-weight: 700; |
| cursor: pointer; |
| transition: all 0.2s; |
| font-size: 1rem; |
| } |
| button:hover { |
| transform: translateY(-2px); |
| box-shadow: 0 10px 20px rgba(168, 85, 247, 0.3); |
| } |
| button:disabled { |
| opacity: 0.5; |
| cursor: not-allowed; |
| transform: none; |
| box-shadow: none; |
| } |
| #statusBox { |
| margin-top: 2rem; |
| display: none; |
| background: #0f0f1a; |
| padding: 1.5rem; |
| border-radius: 12px; |
| border: 1px solid #374151; |
| } |
| .status-badge { |
| display: inline-block; |
| padding: 6px 14px; |
| border-radius: 99px; |
| font-size: 0.8rem; |
| font-weight: 600; |
| margin-bottom: 1rem; |
| } |
| .status-badge.queued { background: #f59e0b; color: black; } |
| .status-badge.processing { background: #3b82f6; color: white; } |
| .status-badge.completed { background: #10b981; color: black; } |
| .status-badge.failed { background: #ef4444; color: white; } |
| #progressText { |
| color: #d1d5db; |
| margin-bottom: 1rem; |
| font-size: 0.95rem; |
| } |
| .result-box { |
| background: #1a1a2e; |
| padding: 1rem; |
| border-radius: 8px; |
| margin-top: 1rem; |
| } |
| .result-url { |
| word-break: break-all; |
| font-size: 0.85rem; |
| color: var(--primary); |
| margin-bottom: 0.5rem; |
| } |
| .copy-btn { |
| background: #374151; |
| border: none; |
| color: white; |
| padding: 8px 16px; |
| border-radius: 6px; |
| cursor: pointer; |
| font-size: 0.85rem; |
| width: auto; |
| margin-top: 0.5rem; |
| } |
| .copy-btn:hover { |
| background: #4b5563; |
| transform: none; |
| box-shadow: none; |
| } |
| .spinner { |
| border: 4px solid #374151; |
| border-top: 4px solid var(--primary); |
| border-radius: 50%; |
| width: 30px; |
| height: 30px; |
| animation: spin 1s linear infinite; |
| margin: 0 auto 1rem auto; |
| display: none; |
| } |
| @keyframes spin { |
| 0% { transform: rotate(0deg); } |
| 100% { transform: rotate(360deg); } |
| } |
| .info-box { |
| background: rgba(168, 85, 247, 0.1); |
| border: 1px solid rgba(168, 85, 247, 0.3); |
| border-radius: 8px; |
| padding: 1rem; |
| margin-bottom: 1.5rem; |
| font-size: 0.85rem; |
| color: #d1d5db; |
| } |
| .segments-info { |
| margin-top: 1rem; |
| font-size: 0.85rem; |
| color: #9ca3af; |
| } |
| video { |
| width: 100%; |
| max-height: 400px; |
| border-radius: 8px; |
| margin-top: 1rem; |
| } |
| </style> |
| </head> |
| <body> |
| <div class="container"> |
| <h2>✂️ Magic Cut</h2> |
| <h4>Transform 16:9 videos into vertical shorts with face tracking</h4> |
| |
| <div class="info-box"> |
| <strong>How it works:</strong><br> |
| 1. Paste your Cloudinary video URL with <code>so_X,du_Y</code> (start time, duration)<br> |
| 2. We analyze each frame for faces (every 500ms)<br> |
| 3. When 2+ faces detected → split-screen layout<br> |
| 4. Get your final 9:16 video URL! |
| </div> |
| |
| <div class="form-group"> |
| <label>Cloudinary Video URL</label> |
| <textarea id="videoUrl" rows="3" placeholder="https://res.cloudinary.com/doxoms9hd/video/upload/so_55,du_30/fl_getinfo/video_id.jpg"></textarea> |
| <small style="color: #6b7280; display: block; margin-top: 4px;"> |
| Format: so_X,du_Y (start at X seconds, duration Y seconds) |
| </small> |
| </div> |
| |
| <button id="processBtn" onclick="submitJob()">🎬 Process Video</button> |
| |
| <div id="statusBox"> |
| <div id="spinner" class="spinner"></div> |
| <span id="statusBadge" class="status-badge">Waiting</span> |
| <div id="progressText">Initializing...</div> |
| <div id="resultBox"></div> |
| </div> |
| </div> |
| |
| <script> |
| const API_BASE = window.location.origin; |
| let pollInterval = null; |
| |
| async function submitJob() { |
| const videoUrl = document.getElementById('videoUrl').value.trim(); |
| const btn = document.getElementById('processBtn'); |
| const statusBox = document.getElementById('statusBox'); |
| |
| if (!videoUrl) { |
| alert("Please enter a video URL"); |
| return; |
| } |
| |
| btn.disabled = true; |
| statusBox.style.display = 'block'; |
| document.getElementById('resultBox').innerHTML = ''; |
| updateStatus("queued", "Submitting job..."); |
| |
| try { |
| const response = await fetch(`${API_BASE}/jobs`, { |
| method: 'POST', |
| headers: { 'Content-Type': 'application/json' }, |
| body: JSON.stringify({ video_url: videoUrl }) |
| }); |
| |
| const data = await response.json(); |
| |
| if (data.job_id) { |
| console.log("Job Submitted:", data.job_id); |
| startPolling(data.job_id); |
| } else { |
| updateStatus("failed", "Failed to get Job ID"); |
| btn.disabled = false; |
| } |
| |
| } catch (error) { |
| console.error(error); |
| updateStatus("failed", "Connection Error. Check URL."); |
| btn.disabled = false; |
| } |
| } |
| |
| function startPolling(jobId) { |
| if (pollInterval) clearInterval(pollInterval); |
| |
| pollInterval = setInterval(async () => { |
| try { |
| const res = await fetch(`${API_BASE}/jobs/${jobId}`); |
| const job = await res.json(); |
| |
| updateStatus(job.status, job.progress); |
| |
| if (job.status === 'completed') { |
| clearInterval(pollInterval); |
| showResults(job.result); |
| document.getElementById('processBtn').disabled = false; |
| } |
| |
| if (job.status === 'failed') { |
| clearInterval(pollInterval); |
| document.getElementById('progressText').innerText = "Error: " + job.error; |
| document.getElementById('processBtn').disabled = false; |
| } |
| |
| } catch (e) { |
| console.error("Polling error", e); |
| } |
| }, 2000); |
| } |
| |
| function updateStatus(status, message) { |
| const badge = document.getElementById('statusBadge'); |
| const spinner = document.getElementById('spinner'); |
| const text = document.getElementById('progressText'); |
| |
| badge.className = `status-badge ${status}`; |
| badge.innerText = status.toUpperCase(); |
| text.innerText = message || "Processing..."; |
| |
| if (status === 'processing' || status === 'queued') { |
| spinner.style.display = 'block'; |
| } else { |
| spinner.style.display = 'none'; |
| } |
| } |
| |
| function showResults(result) { |
| const box = document.getElementById('resultBox'); |
| const segments = result.multi_face_segments || []; |
| |
| let segmentsHtml = ''; |
| if (segments.length > 0) { |
| segmentsHtml = ` |
| <div class="segments-info"> |
| <strong>🎭 Multi-face segments found:</strong><br> |
| ${segments.map((s, i) => `Segment ${i+1}: ${s.start}s - ${s.end}s`).join('<br>')} |
| </div> |
| `; |
| } else { |
| segmentsHtml = `<div class="segments-info">No multi-face segments detected (single speaker throughout)</div>`; |
| } |
| |
| box.innerHTML = ` |
| <div class="result-box"> |
| <div style="margin-bottom: 0.5rem; color: #10b981; font-weight: 600;">✅ Video Ready!</div> |
| <div class="result-url">${result.video_url}</div> |
| <button class="copy-btn" onclick="navigator.clipboard.writeText('${result.video_url}').then(() => this.innerText = 'Copied!')"> |
| 📋 Copy URL |
| </button> |
| ${segmentsHtml} |
| <div class="segments-info"> |
| <strong>📊 Stats:</strong> ${result.total_frames_analyzed} frames analyzed |
| </div> |
| <video controls src="${result.video_url}"></video> |
| </div> |
| `; |
| } |
| </script> |
| </body> |
| </html> |
| """ |
| return html_content |
|
|