Spaces:

WeReCooking
/

Face-ReAging-CPU

Paused

App Files Files Community

Nekochu commited on Mar 23

Commit

f2a1251

1 Parent(s): 844e775

unified view: single input for image or video

Browse files

Files changed (1) hide show

app.py +181 -298

app.py CHANGED Viewed

@@ -2,7 +2,7 @@
 Face Re-Aging with ONNX (CPU)
 Based on Disney's FRAN (Face Re-Aging Network) architecture.
 Model: face_reaging.onnx from VisoMaster-Fusion.
-Supports single image and video re-aging.
 """
 import os
@@ -47,18 +47,16 @@ sess = ort.InferenceSession(
 print("Model loaded.")
 # ---------------------------------------------------------------------------
-# OpenCV DNN face detection (no extra dependencies)
 # ---------------------------------------------------------------------------
 _face_cascade = cv2.CascadeClassifier(
     cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
 )
 _dnn_model_path = os.path.join(os.path.dirname(__file__), "face_detection_yunet_2023mar.onnx")
 YUNET_URL = "https://github.com/opencv/opencv_zoo/raw/main/models/face_detection_yunet/face_detection_yunet_2023mar.onnx"
 def _ensure_yunet():
-    """Download YuNet face detector if not present."""
     global _dnn_model_path
     if not os.path.exists(_dnn_model_path):
         print("Downloading YuNet face detector...")
@@ -76,26 +74,13 @@ def _ensure_yunet():
 def detect_face_box(image_rgb: np.ndarray):
-    """
-    Detect the largest face bounding box.
-    Returns (x1, y1, x2, y2) in pixel coords or None.
-    """
     h, w = image_rgb.shape[:2]
-    # Try YuNet first (more accurate)
     try:
         yunet_path = _ensure_yunet()
         detector = cv2.FaceDetectorYN.create(yunet_path, "", (w, h), 0.5, 0.3, 5000)
         _, faces = detector.detect(image_rgb)
         if faces is not None and len(faces) > 0:
-            best_idx = 0
-            best_area = 0
-            for i, face in enumerate(faces):
-                fw, fh = face[2], face[3]
-                area = fw * fh
-                if area > best_area:
-                    best_area = area
-                    best_idx = i
             f = faces[best_idx]
             x1, y1 = int(f[0]), int(f[1])
             x2, y2 = int(f[0] + f[2]), int(f[1] + f[3])
@@ -103,172 +88,104 @@ def detect_face_box(image_rgb: np.ndarray):
     except Exception as e:
         print(f"YuNet failed, falling back to Haar: {e}")
-    # Fallback: Haar cascade
     gray = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2GRAY)
     faces = _face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(60, 60))
     if len(faces) == 0:
         return None
     best_idx = np.argmax([fw * fh for (_, _, fw, fh) in faces])
     x, y, fw, fh = faces[best_idx]
     return (x, y, x + fw, y + fh)
 # ---------------------------------------------------------------------------
-# Face cropping with margin
 # ---------------------------------------------------------------------------
-def crop_face_region(image_rgb: np.ndarray, box):
-    """
-    Crop a square region around the detected face with generous margins.
-    Returns: cropped image, (l_x, l_y, r_x, r_y) paste-back coords.
-    """
     h, w = image_rgb.shape[:2]
     x1, y1, x2, y2 = box
-    face_w = x2 - x1
-    face_h = y2 - y1
     margin_top = int(face_h * 0.63 * 0.85)
     margin_bot = int(face_h * 0.37 * 0.85)
     margin_x = int(face_w * 0.85 / 2)
     margin_top += 2 * margin_x - margin_top - margin_bot
-    l_y = max(y1 - margin_top, 0)
-    r_y = min(y2 + margin_bot, h)
-    l_x = max(x1 - margin_x, 0)
-    r_x = min(x2 + margin_x, w)
-    cropped = image_rgb[l_y:r_y, l_x:r_x, :]
-    return cropped, (l_x, l_y, r_x, r_y)
-# ---------------------------------------------------------------------------
-# Blending mask (soft feathered edges)
-# ---------------------------------------------------------------------------
 def create_blend_mask(crop_h, crop_w, feather=0.15):
-    """Create a soft feathered blending mask."""
     mask = np.ones((crop_h, crop_w), dtype=np.float32)
-    border_y = max(int(crop_h * feather), 1)
-    border_x = max(int(crop_w * feather), 1)
-    for i in range(border_y):
-        alpha = i / border_y
-        mask[i, :] *= alpha
-        mask[crop_h - 1 - i, :] *= alpha
-    for j in range(border_x):
-        alpha = j / border_x
-        mask[:, j] *= alpha
-        mask[:, crop_w - 1 - j] *= alpha
     return mask[:, :, np.newaxis]
-# ---------------------------------------------------------------------------
-# Core inference on a single frame (numpy RGB in, numpy RGB out)
-# ---------------------------------------------------------------------------
-def reage_frame(image_rgb: np.ndarray, source_age: int, target_age: int) -> np.ndarray:
-    """
-    Re-age the face in a numpy RGB image.
-    Returns the re-aged image (same size), or original if no face found.
-    """
     box = detect_face_box(image_rgb)
     if box is None:
-        return image_rgb  # no face, return unchanged
     cropped, (l_x, l_y, r_x, r_y) = crop_face_region(image_rgb, box)
     crop_h, crop_w = cropped.shape[:2]
     cropped_resized = cv2.resize(cropped, (512, 512), interpolation=cv2.INTER_LINEAR)
-    img_tensor = cropped_resized.astype(np.float32) / 255.0
-    img_tensor = np.transpose(img_tensor, (2, 0, 1))
-    src_age_ch = np.full((1, 512, 512), source_age / 100.0, dtype=np.float32)
-    tgt_age_ch = np.full((1, 512, 512), target_age / 100.0, dtype=np.float32)
-    input_tensor = np.concatenate([img_tensor, src_age_ch, tgt_age_ch], axis=0)
-    input_tensor = input_tensor[np.newaxis, ...]
-    delta = sess.run(None, {"input": input_tensor})[0]
-    aged = img_tensor + delta[0]
-    aged = np.clip(aged, 0.0, 1.0)
-    aged_hwc = np.transpose(aged, (1, 2, 0))
-    aged_hwc = (aged_hwc * 255).astype(np.uint8)
     aged_resized = cv2.resize(aged_hwc, (crop_w, crop_h), interpolation=cv2.INTER_LINEAR)
     result = image_rgb.copy()
-    blend_mask = create_blend_mask(crop_h, crop_w, feather=0.12)
     region = result[l_y:r_y, l_x:r_x].astype(np.float32)
-    aged_f = aged_resized.astype(np.float32)
-    blended = region * (1 - blend_mask) + aged_f * blend_mask
     result[l_y:r_y, l_x:r_x] = blended.astype(np.uint8)
     return result
-# ---------------------------------------------------------------------------
-# Image re-aging (wraps reage_frame for Gradio)
-# ---------------------------------------------------------------------------
-def reage_face(image_pil: Image.Image, source_age: int, target_age: int):
-    """Re-age the face in the given PIL image."""
-    t0 = time.time()
-    image_rgb = np.array(image_pil.convert("RGB"))
-    box = detect_face_box(image_rgb)
-    if box is None:
-        raise gr.Error("No face detected in the image. Please upload a clear photo with a visible face.")
-    result = reage_frame(image_rgb, source_age, target_age)
-    elapsed = time.time() - t0
-    info = f"Done in {elapsed:.2f}s | Source age: {source_age} | Target age: {target_age}"
-    return Image.fromarray(result), info
 # ---------------------------------------------------------------------------
 # ffmpeg helpers
 # ---------------------------------------------------------------------------
 def _find_ffmpeg():
-    """Return ffmpeg path."""
     path = shutil.which("ffmpeg")
     if path:
         return path
-    # HF Spaces usually have it
     for p in ["/usr/bin/ffmpeg", "/usr/local/bin/ffmpeg"]:
         if os.path.isfile(p):
             return p
-    raise gr.Error("ffmpeg not found. Video processing requires ffmpeg.")
-def _get_video_info(video_path: str):
-    """Get fps and frame count using ffprobe."""
     ffprobe = shutil.which("ffprobe") or shutil.which("ffprobe", path="/usr/bin:/usr/local/bin")
     if not ffprobe:
-        # Fallback: use OpenCV just to read metadata
         cap = cv2.VideoCapture(video_path)
         fps = cap.get(cv2.CAP_PROP_FPS) or 25.0
         count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
         cap.release()
         return fps, count
     try:
         r = subprocess.run(
             [ffprobe, "-v", "quiet", "-print_format", "json",
              "-show_streams", "-select_streams", "v:0", video_path],
             capture_output=True, text=True, timeout=30,
         )
-        import json
-        info = json.loads(r.stdout)
-        stream = info["streams"][0]
-        # fps
-        fps_str = stream.get("r_frame_rate", "25/1")
-        num, den = fps_str.split("/")
         fps = float(num) / float(den)
-        # frame count
         nb = stream.get("nb_frames")
-        if nb and nb != "N/A":
-            count = int(nb)
-        else:
-            dur = float(stream.get("duration", 0))
-            count = int(dur * fps)
         return fps, count
     except Exception:
         cap = cv2.VideoCapture(video_path)
@@ -278,216 +195,182 @@ def _get_video_info(video_path: str):
         return fps, count
-def _extract_frames(video_path: str, out_dir: str):
-    """Extract frames from video using ffmpeg."""
     ffmpeg = _find_ffmpeg()
-    out_pattern = os.path.join(out_dir, "frame_%06d.png")
-    cmd = [ffmpeg, "-i", video_path, "-vsync", "0", out_pattern, "-y"]
     r = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
     if r.returncode != 0:
-        raise gr.Error(f"ffmpeg frame extraction failed: {r.stderr[-500:]}")
-def _assemble_video(frames_dir: str, output_path: str, fps: float, audio_source: str = None):
-    """Reassemble frames into MP4 using ffmpeg."""
     ffmpeg = _find_ffmpeg()
-    in_pattern = os.path.join(frames_dir, "frame_%06d.png")
-    cmd = [
-        ffmpeg, "-y",
-        "-framerate", str(fps),
-        "-i", in_pattern,
-    ]
-    # Try to copy audio from original
     if audio_source:
         cmd += ["-i", audio_source, "-map", "0:v", "-map", "1:a?", "-shortest"]
-    cmd += [
-        "-c:v", "libx264",
-        "-pix_fmt", "yuv420p",
-        "-preset", "fast",
-        "-crf", "20",
-        "-movflags", "+faststart",
-        output_path,
-    ]
     r = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
     if r.returncode != 0:
-        raise gr.Error(f"ffmpeg assembly failed: {r.stderr[-500:]}")
 # ---------------------------------------------------------------------------
-# Video re-aging
 # ---------------------------------------------------------------------------
-def reage_video(video_path: str, source_age: int, target_age: int, progress=gr.Progress()):
-    """Re-age faces in every frame of a video."""
-    if video_path is None:
-        raise gr.Error("Please upload a video.")
-    t0 = time.time()
-    # Get video info
-    fps, total_frames = _get_video_info(video_path)
-    duration = total_frames / max(fps, 1)
-    if duration > MAX_VIDEO_SECONDS:
-        raise gr.Error(
-            f"Video is {duration:.1f}s long. Maximum allowed is {MAX_VIDEO_SECONDS}s. "
-            f"Please trim your video first."
-        )
-    if total_frames > MAX_FRAMES:
-        raise gr.Error(
-            f"Video has {total_frames} frames. Maximum allowed is {MAX_FRAMES}. "
-            f"Please use a shorter video."
-        )
-    # Create temp dirs
-    tmp_root = tempfile.mkdtemp(prefix="reage_")
-    frames_in = os.path.join(tmp_root, "in")
-    frames_out = os.path.join(tmp_root, "out")
-    os.makedirs(frames_in, exist_ok=True)
-    os.makedirs(frames_out, exist_ok=True)
-    try:
-        # Extract frames
-        progress(0, desc="Extracting frames...")
-        _extract_frames(video_path, frames_in)
-        # Get frame list
-        frame_files = sorted(glob_mod.glob(os.path.join(frames_in, "frame_*.png")))
-        n_frames = len(frame_files)
-        if n_frames == 0:
-            raise gr.Error("No frames extracted from video. Is the file a valid video?")
-        # Re-check limit after extraction
-        if n_frames > MAX_FRAMES:
-            raise gr.Error(f"Video has {n_frames} frames (max {MAX_FRAMES}). Please use a shorter video.")
-        faces_found = 0
-        faces_missed = 0
-        # Process each frame
-        for idx, fpath in enumerate(frame_files):
-            progress((idx + 1) / n_frames, desc=f"Re-aging frame {idx + 1}/{n_frames}...")
-            # Read frame (BGR -> RGB)
-            frame_bgr = cv2.imread(fpath)
-            if frame_bgr is None:
-                continue
-            frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
-            # Detect and re-age
-            box = detect_face_box(frame_rgb)
-            if box is not None:
-                result_rgb = reage_frame(frame_rgb, source_age, target_age)
-                faces_found += 1
-            else:
-                result_rgb = frame_rgb
-                faces_missed += 1
-            # Save (RGB -> BGR)
-            fname = os.path.basename(fpath)
-            out_path = os.path.join(frames_out, fname)
-            result_bgr = cv2.cvtColor(result_rgb, cv2.COLOR_RGB2BGR)
-            cv2.imwrite(out_path, result_bgr)
-        # Assemble video
-        progress(1.0, desc="Assembling video...")
-        output_path = os.path.join(tmp_root, "output.mp4")
-        _assemble_video(frames_out, output_path, fps, audio_source=video_path)
         elapsed = time.time() - t0
-        speed = n_frames / max(elapsed, 0.01)
-        info = (
-            f"Done in {elapsed:.1f}s | {n_frames} frames at {speed:.1f} fps | "
-            f"Faces found: {faces_found}, skipped: {faces_missed} | "
-            f"Source age: {source_age} -> Target age: {target_age}"
-        )
-        return output_path, info
-    except gr.Error:
-        raise
-    except Exception as e:
-        raise gr.Error(f"Video processing failed: {str(e)}")
 # ---------------------------------------------------------------------------
-# Gradio UI
 # ---------------------------------------------------------------------------
-def process_image(image, source_age, target_age):
-    if image is None:
-        raise gr.Error("Please upload an image.")
-    return reage_face(image, int(source_age), int(target_age))
-def process_video(video, source_age, target_age, progress=gr.Progress()):
-    if video is None:
-        raise gr.Error("Please upload a video.")
-    return reage_video(video, int(source_age), int(target_age), progress)
 with gr.Blocks(title="Face Re-Aging (CPU)") as demo:
     gr.Markdown(
         "# Face Re-Aging (CPU)\n"
-        "Age or de-age faces using Disney FRAN-style model. "
-        "Works on both **images** and **videos**."
     )
-    with gr.Tabs():
-        # ---- Image Tab ----
-        with gr.TabItem("Image"):
-            with gr.Row():
-                with gr.Column():
-                    img_input = gr.Image(type="pil", label="Input Image")
-                    img_src_age = gr.Slider(
-                        minimum=5, maximum=95, value=25, step=1,
-                        label="Source Age (current age)",
-                    )
-                    img_tgt_age = gr.Slider(
-                        minimum=5, maximum=95, value=65, step=1,
-                        label="Target Age (desired age)",
-                    )
-                    img_btn = gr.Button("Re-Age Face", variant="primary")
-                with gr.Column():
-                    img_output = gr.Image(type="pil", label="Re-Aged Result")
-                    img_info = gr.Textbox(label="Info", interactive=False)
-            img_btn.click(
-                fn=process_image,
-                inputs=[img_input, img_src_age, img_tgt_age],
-                outputs=[img_output, img_info],
-            )
-        # ---- Video Tab ----
-        with gr.TabItem("Video"):
-            gr.Markdown(
-                f"Upload a video (max **{MAX_VIDEO_SECONDS}s** / **{MAX_FRAMES} frames**). "
-                f"Each frame is processed individually on CPU, so expect ~0.5-2 fps."
             )
-            with gr.Row():
-                with gr.Column():
-                    vid_input = gr.Video(label="Input Video")
-                    vid_src_age = gr.Slider(
-                        minimum=5, maximum=95, value=25, step=1,
-                        label="Source Age (current age)",
-                    )
-                    vid_tgt_age = gr.Slider(
-                        minimum=5, maximum=95, value=65, step=1,
-                        label="Target Age (desired age)",
-                    )
-                    vid_btn = gr.Button("Re-Age Video", variant="primary")
-                with gr.Column():
-                    vid_output = gr.Video(label="Re-Aged Video")
-                    vid_info = gr.Textbox(label="Info", interactive=False)
-            vid_btn.click(
-                fn=process_video,
-                inputs=[vid_input, vid_src_age, vid_tgt_age],
-                outputs=[vid_output, vid_info],
             )
     gr.Markdown(
         "**Model:** `face_reaging.onnx` (118 MB) from "

 Face Re-Aging with ONNX (CPU)
 Based on Disney's FRAN (Face Re-Aging Network) architecture.
 Model: face_reaging.onnx from VisoMaster-Fusion.
+Supports image and video re-aging in a single unified view.
 """
 import os
 print("Model loaded.")
 # ---------------------------------------------------------------------------
+# Face detection
 # ---------------------------------------------------------------------------
 _face_cascade = cv2.CascadeClassifier(
     cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
 )
 _dnn_model_path = os.path.join(os.path.dirname(__file__), "face_detection_yunet_2023mar.onnx")
 YUNET_URL = "https://github.com/opencv/opencv_zoo/raw/main/models/face_detection_yunet/face_detection_yunet_2023mar.onnx"
 def _ensure_yunet():
     global _dnn_model_path
     if not os.path.exists(_dnn_model_path):
         print("Downloading YuNet face detector...")
 def detect_face_box(image_rgb: np.ndarray):
     h, w = image_rgb.shape[:2]
     try:
         yunet_path = _ensure_yunet()
         detector = cv2.FaceDetectorYN.create(yunet_path, "", (w, h), 0.5, 0.3, 5000)
         _, faces = detector.detect(image_rgb)
         if faces is not None and len(faces) > 0:
+            best_idx = int(np.argmax([f[2] * f[3] for f in faces]))
             f = faces[best_idx]
             x1, y1 = int(f[0]), int(f[1])
             x2, y2 = int(f[0] + f[2]), int(f[1] + f[3])
     except Exception as e:
         print(f"YuNet failed, falling back to Haar: {e}")
     gray = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2GRAY)
     faces = _face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(60, 60))
     if len(faces) == 0:
         return None
     best_idx = np.argmax([fw * fh for (_, _, fw, fh) in faces])
     x, y, fw, fh = faces[best_idx]
     return (x, y, x + fw, y + fh)
 # ---------------------------------------------------------------------------
+# Core inference
 # ---------------------------------------------------------------------------
+def crop_face_region(image_rgb, box):
     h, w = image_rgb.shape[:2]
     x1, y1, x2, y2 = box
+    face_w, face_h = x2 - x1, y2 - y1
     margin_top = int(face_h * 0.63 * 0.85)
     margin_bot = int(face_h * 0.37 * 0.85)
     margin_x = int(face_w * 0.85 / 2)
     margin_top += 2 * margin_x - margin_top - margin_bot
+    l_y, r_y = max(y1 - margin_top, 0), min(y2 + margin_bot, h)
+    l_x, r_x = max(x1 - margin_x, 0), min(x2 + margin_x, w)
+    return image_rgb[l_y:r_y, l_x:r_x, :], (l_x, l_y, r_x, r_y)
 def create_blend_mask(crop_h, crop_w, feather=0.15):
     mask = np.ones((crop_h, crop_w), dtype=np.float32)
+    by, bx = max(int(crop_h * feather), 1), max(int(crop_w * feather), 1)
+    for i in range(by):
+        a = i / by
+        mask[i, :] *= a
+        mask[crop_h - 1 - i, :] *= a
+    for j in range(bx):
+        a = j / bx
+        mask[:, j] *= a
+        mask[:, crop_w - 1 - j] *= a
     return mask[:, :, np.newaxis]
+def reage_frame(image_rgb, source_age, target_age):
     box = detect_face_box(image_rgb)
     if box is None:
+        return image_rgb
     cropped, (l_x, l_y, r_x, r_y) = crop_face_region(image_rgb, box)
     crop_h, crop_w = cropped.shape[:2]
     cropped_resized = cv2.resize(cropped, (512, 512), interpolation=cv2.INTER_LINEAR)
+    img_t = cropped_resized.astype(np.float32) / 255.0
+    img_t = np.transpose(img_t, (2, 0, 1))
+    src_ch = np.full((1, 512, 512), source_age / 100.0, dtype=np.float32)
+    tgt_ch = np.full((1, 512, 512), target_age / 100.0, dtype=np.float32)
+    inp = np.concatenate([img_t, src_ch, tgt_ch], axis=0)[np.newaxis, ...]
+    delta = sess.run(None, {"input": inp})[0]
+    aged = np.clip(img_t + delta[0], 0.0, 1.0)
+    aged_hwc = (np.transpose(aged, (1, 2, 0)) * 255).astype(np.uint8)
     aged_resized = cv2.resize(aged_hwc, (crop_w, crop_h), interpolation=cv2.INTER_LINEAR)
     result = image_rgb.copy()
+    mask = create_blend_mask(crop_h, crop_w, feather=0.12)
     region = result[l_y:r_y, l_x:r_x].astype(np.float32)
+    blended = region * (1 - mask) + aged_resized.astype(np.float32) * mask
     result[l_y:r_y, l_x:r_x] = blended.astype(np.uint8)
     return result
 # ---------------------------------------------------------------------------
 # ffmpeg helpers
 # ---------------------------------------------------------------------------
 def _find_ffmpeg():
     path = shutil.which("ffmpeg")
     if path:
         return path
     for p in ["/usr/bin/ffmpeg", "/usr/local/bin/ffmpeg"]:
         if os.path.isfile(p):
             return p
+    raise gr.Error("ffmpeg not found.")
+def _get_video_info(video_path):
     ffprobe = shutil.which("ffprobe") or shutil.which("ffprobe", path="/usr/bin:/usr/local/bin")
     if not ffprobe:
         cap = cv2.VideoCapture(video_path)
         fps = cap.get(cv2.CAP_PROP_FPS) or 25.0
         count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
         cap.release()
         return fps, count
     try:
+        import json
         r = subprocess.run(
             [ffprobe, "-v", "quiet", "-print_format", "json",
              "-show_streams", "-select_streams", "v:0", video_path],
             capture_output=True, text=True, timeout=30,
         )
+        stream = json.loads(r.stdout)["streams"][0]
+        num, den = stream.get("r_frame_rate", "25/1").split("/")
         fps = float(num) / float(den)
         nb = stream.get("nb_frames")
+        count = int(nb) if nb and nb != "N/A" else int(float(stream.get("duration", 0)) * fps)
         return fps, count
     except Exception:
         cap = cv2.VideoCapture(video_path)
         return fps, count
+def _extract_frames(video_path, out_dir):
     ffmpeg = _find_ffmpeg()
+    cmd = [ffmpeg, "-i", video_path, "-vsync", "0", os.path.join(out_dir, "frame_%06d.png"), "-y"]
     r = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
     if r.returncode != 0:
+        raise gr.Error(f"Frame extraction failed: {r.stderr[-500:]}")
+def _assemble_video(frames_dir, output_path, fps, audio_source=None):
     ffmpeg = _find_ffmpeg()
+    cmd = [ffmpeg, "-y", "-framerate", str(fps), "-i", os.path.join(frames_dir, "frame_%06d.png")]
     if audio_source:
         cmd += ["-i", audio_source, "-map", "0:v", "-map", "1:a?", "-shortest"]
+    cmd += ["-c:v", "libx264", "-pix_fmt", "yuv420p", "-preset", "fast", "-crf", "20",
+            "-movflags", "+faststart", output_path]
     r = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
     if r.returncode != 0:
+        raise gr.Error(f"Video assembly failed: {r.stderr[-500:]}")
 # ---------------------------------------------------------------------------
+# Unified process function
 # ---------------------------------------------------------------------------
+VIDEO_EXTS = {".mp4", ".avi", ".mov", ".mkv", ".webm", ".flv", ".wmv", ".m4v"}
+def process(input_file, source_age, target_age, progress=gr.Progress()):
+    if input_file is None:
+        raise gr.Error("Please upload an image or video.")
+    t0 = time.time()
+    source_age, target_age = int(source_age), int(target_age)
+    # Determine if image or video
+    if isinstance(input_file, Image.Image):
+        # Direct PIL image from gr.Image
+        image_rgb = np.array(input_file.convert("RGB"))
+        box = detect_face_box(image_rgb)
+        if box is None:
+            raise gr.Error("No face detected. Please upload a clear photo with a visible face.")
+        result = reage_frame(image_rgb, source_age, target_age)
+        elapsed = time.time() - t0
+        info = f"Done in {elapsed:.2f}s | {source_age} -> {target_age} years"
+        return Image.fromarray(result), None, info
+    # File path (could be image or video)
+    file_path = input_file if isinstance(input_file, str) else str(input_file)
+    ext = os.path.splitext(file_path)[1].lower()
+    if ext in VIDEO_EXTS:
+        # --- Video processing ---
+        fps, total_frames = _get_video_info(file_path)
+        duration = total_frames / max(fps, 1)
+        if duration > MAX_VIDEO_SECONDS:
+            raise gr.Error(f"Video is {duration:.1f}s (max {MAX_VIDEO_SECONDS}s). Please trim it.")
+        if total_frames > MAX_FRAMES:
+            raise gr.Error(f"Video has {total_frames} frames (max {MAX_FRAMES}).")
+        tmp_root = tempfile.mkdtemp(prefix="reage_")
+        frames_in = os.path.join(tmp_root, "in")
+        frames_out = os.path.join(tmp_root, "out")
+        os.makedirs(frames_in, exist_ok=True)
+        os.makedirs(frames_out, exist_ok=True)
+        try:
+            progress(0, desc="Extracting frames...")
+            _extract_frames(file_path, frames_in)
+            frame_files = sorted(glob_mod.glob(os.path.join(frames_in, "frame_*.png")))
+            n_frames = len(frame_files)
+            if n_frames == 0:
+                raise gr.Error("No frames extracted. Is this a valid video?")
+            if n_frames > MAX_FRAMES:
+                raise gr.Error(f"{n_frames} frames (max {MAX_FRAMES}).")
+            faces_found, faces_missed = 0, 0
+            for idx, fpath in enumerate(frame_files):
+                progress((idx + 1) / n_frames, desc=f"Re-aging frame {idx + 1}/{n_frames}...")
+                frame_bgr = cv2.imread(fpath)
+                if frame_bgr is None:
+                    continue
+                frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
+                box = detect_face_box(frame_rgb)
+                if box is not None:
+                    result_rgb = reage_frame(frame_rgb, source_age, target_age)
+                    faces_found += 1
+                else:
+                    result_rgb = frame_rgb
+                    faces_missed += 1
+                out_path = os.path.join(frames_out, os.path.basename(fpath))
+                cv2.imwrite(out_path, cv2.cvtColor(result_rgb, cv2.COLOR_RGB2BGR))
+            progress(1.0, desc="Assembling video...")
+            output_path = os.path.join(tmp_root, "output.mp4")
+            _assemble_video(frames_out, output_path, fps, audio_source=file_path)
+            elapsed = time.time() - t0
+            speed = n_frames / max(elapsed, 0.01)
+            info = (f"Done in {elapsed:.1f}s | {n_frames} frames at {speed:.1f} fps | "
+                    f"Faces: {faces_found} found, {faces_missed} skipped | "
+                    f"{source_age} -> {target_age} years")
+            return None, output_path, info
+        except gr.Error:
+            raise
+        except Exception as e:
+            raise gr.Error(f"Video processing failed: {e}")
+    else:
+        # --- Image processing ---
+        image_rgb = cv2.imread(file_path)
+        if image_rgb is None:
+            raise gr.Error("Could not read the file. Please upload a valid image or video.")
+        image_rgb = cv2.cvtColor(image_rgb, cv2.COLOR_BGR2RGB)
+        box = detect_face_box(image_rgb)
+        if box is None:
+            raise gr.Error("No face detected.")
+        result = reage_frame(image_rgb, source_age, target_age)
         elapsed = time.time() - t0
+        info = f"Done in {elapsed:.2f}s | {source_age} -> {target_age} years"
+        return Image.fromarray(result), None, info
 # ---------------------------------------------------------------------------
+# Gradio UI - Single unified view
 # ---------------------------------------------------------------------------
 with gr.Blocks(title="Face Re-Aging (CPU)") as demo:
     gr.Markdown(
         "# Face Re-Aging (CPU)\n"
+        "Upload an **image or video** to age or de-age faces. "
+        f"Videos: max {MAX_VIDEO_SECONDS}s, ~0.5-2 fps on CPU."
     )
+    with gr.Row():
+        with gr.Column():
+            file_input = gr.File(
+                label="Drop Image or Video Here",
+                file_types=["image", "video"],
             )
+            # Also accept pasted/webcam images
+            img_input = gr.Image(
+                type="pil", label="Or paste/capture an image",
+                visible=True,
             )
+            src_age = gr.Slider(minimum=5, maximum=95, value=25, step=1,
+                                label="Source Age (current)")
+            tgt_age = gr.Slider(minimum=5, maximum=95, value=65, step=1,
+                                label="Target Age (desired)")
+            btn = gr.Button("Re-Age", variant="primary", size="lg")
+        with gr.Column():
+            img_output = gr.Image(type="pil", label="Result (Image)")
+            vid_output = gr.Video(label="Result (Video)")
+            info_box = gr.Textbox(label="Info", interactive=False)
+    def on_submit_file(file_obj, source_age, target_age, progress=gr.Progress()):
+        if file_obj is None:
+            raise gr.Error("Please upload a file.")
+        return process(file_obj, source_age, target_age, progress)
+    def on_submit_image(image, source_age, target_age, progress=gr.Progress()):
+        if image is None:
+            raise gr.Error("Please provide an image.")
+        return process(image, source_age, target_age, progress)
+    btn.click(
+        fn=on_submit_file,
+        inputs=[file_input, src_age, tgt_age],
+        outputs=[img_output, vid_output, info_box],
+    )
+    # Also trigger on image input (for paste/webcam)
+    img_input.change(
+        fn=on_submit_image,
+        inputs=[img_input, src_age, tgt_age],
+        outputs=[img_output, vid_output, info_box],
+    )
     gr.Markdown(
         "**Model:** `face_reaging.onnx` (118 MB) from "