Spaces:

WeReCooking
/

Face-ReAging-CPU

Paused

App Files Files Community

Nekochu commited on Mar 22

Commit

844e775

1 Parent(s): 2a828f1

Add video re-aging support with ffmpeg

Browse files

Files changed (2) hide show

app.py +295 -68
packages.txt +1 -0

app.py CHANGED Viewed

@@ -2,10 +2,16 @@
 Face Re-Aging with ONNX (CPU)
 Based on Disney's FRAN (Face Re-Aging Network) architecture.
 Model: face_reaging.onnx from VisoMaster-Fusion.
 """
 import os
 import time
 import cv2
 import numpy as np
 import onnxruntime as ort
@@ -14,11 +20,16 @@ from PIL import Image
 from huggingface_hub import hf_hub_download
 # ---------------------------------------------------------------------------
-# Model loading
 # ---------------------------------------------------------------------------
 MODEL_PATH = "face_reaging.onnx"
 REPO_ID = "Luminia/Face-ReAging-CPU"
 def get_model_path():
     if os.path.exists(MODEL_PATH):
         return MODEL_PATH
@@ -38,16 +49,14 @@ print("Model loaded.")
 # ---------------------------------------------------------------------------
 # OpenCV DNN face detection (no extra dependencies)
 # ---------------------------------------------------------------------------
-# Use OpenCV's built-in Haar cascade as primary, with DNN SSD as fallback
 _face_cascade = cv2.CascadeClassifier(
     cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
 )
-# Try to use the more accurate DNN face detector if available
-_dnn_net = None
 _dnn_model_path = os.path.join(os.path.dirname(__file__), "face_detection_yunet_2023mar.onnx")
 YUNET_URL = "https://github.com/opencv/opencv_zoo/raw/main/models/face_detection_yunet/face_detection_yunet_2023mar.onnx"
 def _ensure_yunet():
     """Download YuNet face detector if not present."""
     global _dnn_model_path
@@ -79,7 +88,6 @@ def detect_face_box(image_rgb: np.ndarray):
         detector = cv2.FaceDetectorYN.create(yunet_path, "", (w, h), 0.5, 0.3, 5000)
         _, faces = detector.detect(image_rgb)
         if faces is not None and len(faces) > 0:
-            # Pick largest face by area
             best_idx = 0
             best_area = 0
             for i, face in enumerate(faces):
@@ -101,7 +109,6 @@ def detect_face_box(image_rgb: np.ndarray):
     if len(faces) == 0:
         return None
-    # Pick largest
     best_idx = np.argmax([fw * fh for (_, _, fw, fh) in faces])
     x, y, fw, fh = faces[best_idx]
     return (x, y, x + fw, y + fh)
@@ -111,8 +118,7 @@ def detect_face_box(image_rgb: np.ndarray):
 # ---------------------------------------------------------------------------
 def crop_face_region(image_rgb: np.ndarray, box):
     """
-    Crop a square region around the detected face with generous margins
-    (similar to FRAN's approach: forehead gets more margin).
     Returns: cropped image, (l_x, l_y, r_x, r_y) paste-back coords.
     """
     h, w = image_rgb.shape[:2]
@@ -121,12 +127,9 @@ def crop_face_region(image_rgb: np.ndarray, box):
     face_w = x2 - x1
     face_h = y2 - y1
-    # Margins: top is larger (forehead), bottom smaller
     margin_top = int(face_h * 0.63 * 0.85)
     margin_bot = int(face_h * 0.37 * 0.85)
     margin_x = int(face_w * 0.85 / 2)
-    # Adjust top margin to keep square
     margin_top += 2 * margin_x - margin_top - margin_bot
     l_y = max(y1 - margin_top, 0)
@@ -141,10 +144,7 @@ def crop_face_region(image_rgb: np.ndarray, box):
 # Blending mask (soft feathered edges)
 # ---------------------------------------------------------------------------
 def create_blend_mask(crop_h, crop_w, feather=0.15):
-    """
-    Create a soft feathered blending mask to avoid hard edges
-    when pasting the re-aged face back.
-    """
     mask = np.ones((crop_h, crop_w), dtype=np.float32)
     border_y = max(int(crop_h * feather), 1)
     border_x = max(int(crop_w * feather), 1)
@@ -159,63 +159,44 @@ def create_blend_mask(crop_h, crop_w, feather=0.15):
         mask[:, j] *= alpha
         mask[:, crop_w - 1 - j] *= alpha
-    return mask[:, :, np.newaxis]  # (H, W, 1)
 # ---------------------------------------------------------------------------
-# Core inference
 # ---------------------------------------------------------------------------
-def reage_face(
-    image_pil: Image.Image,
-    source_age: int,
-    target_age: int,
-):
     """
-    Re-age the face in the given PIL image.
     """
-    t0 = time.time()
-    image_rgb = np.array(image_pil.convert("RGB"))
-    h_orig, w_orig = image_rgb.shape[:2]
-    # Detect face
     box = detect_face_box(image_rgb)
     if box is None:
-        raise gr.Error("No face detected in the image. Please upload a clear photo with a visible face.")
-    # Crop face region
     cropped, (l_x, l_y, r_x, r_y) = crop_face_region(image_rgb, box)
     crop_h, crop_w = cropped.shape[:2]
-    # Resize to 512x512 for the model
     cropped_resized = cv2.resize(cropped, (512, 512), interpolation=cv2.INTER_LINEAR)
-    # Normalize to [0, 1] float32, CHW
     img_tensor = cropped_resized.astype(np.float32) / 255.0
-    img_tensor = np.transpose(img_tensor, (2, 0, 1))  # (3, 512, 512)
-    # Create age channels
     src_age_ch = np.full((1, 512, 512), source_age / 100.0, dtype=np.float32)
     tgt_age_ch = np.full((1, 512, 512), target_age / 100.0, dtype=np.float32)
-    # Stack: (5, 512, 512) -> (1, 5, 512, 512)
     input_tensor = np.concatenate([img_tensor, src_age_ch, tgt_age_ch], axis=0)
     input_tensor = input_tensor[np.newaxis, ...]
-    # Run inference
-    delta = sess.run(None, {"input": input_tensor})[0]  # (1, 3, 512, 512)
-    # Apply delta to the cropped image
-    aged = img_tensor + delta[0]  # (3, 512, 512)
     aged = np.clip(aged, 0.0, 1.0)
-    # Convert back to HWC uint8
-    aged_hwc = np.transpose(aged, (1, 2, 0))  # (512, 512, 3)
     aged_hwc = (aged_hwc * 255).astype(np.uint8)
-    # Resize back to original crop size
     aged_resized = cv2.resize(aged_hwc, (crop_w, crop_h), interpolation=cv2.INTER_LINEAR)
-    # Blend back into original image
     result = image_rgb.copy()
     blend_mask = create_blend_mask(crop_h, crop_w, feather=0.12)
     region = result[l_y:r_y, l_x:r_x].astype(np.float32)
@@ -223,45 +204,291 @@ def reage_face(
     blended = region * (1 - blend_mask) + aged_f * blend_mask
     result[l_y:r_y, l_x:r_x] = blended.astype(np.uint8)
     elapsed = time.time() - t0
     info = f"Done in {elapsed:.2f}s | Source age: {source_age} | Target age: {target_age}"
     return Image.fromarray(result), info
 # ---------------------------------------------------------------------------
 # Gradio UI
 # ---------------------------------------------------------------------------
-def process(image, source_age, target_age):
     if image is None:
         raise gr.Error("Please upload an image.")
     return reage_face(image, int(source_age), int(target_age))
-with gr.Blocks(title="Face Re-Aging (CPU)") as demo:
-    gr.Markdown("# Face Re-Aging (CPU)\nAge or de-age faces using Disney FRAN-style model. Upload a photo, set source & target age.")
-    with gr.Row():
-        with gr.Column():
-            input_image = gr.Image(type="pil", label="Input Image")
-            source_age = gr.Slider(
-                minimum=5, maximum=95, value=25, step=1,
-                label="Source Age (current age of the person)",
-            )
-            target_age = gr.Slider(
-                minimum=5, maximum=95, value=65, step=1,
-                label="Target Age (desired age)",
-            )
-            run_btn = gr.Button("Re-Age Face", variant="primary")
-        with gr.Column():
-            output_image = gr.Image(type="pil", label="Re-Aged Result")
-            info_text = gr.Textbox(label="Info", interactive=False)
-    run_btn.click(
-        fn=process,
-        inputs=[input_image, source_age, target_age],
-        outputs=[output_image, info_text],
     )
     gr.Markdown(
         "**Model:** `face_reaging.onnx` (118 MB) from "
         "[VisoMaster-Fusion](https://github.com/VisoMasterFusion/VisoMaster-Fusion) | "

 Face Re-Aging with ONNX (CPU)
 Based on Disney's FRAN (Face Re-Aging Network) architecture.
 Model: face_reaging.onnx from VisoMaster-Fusion.
+Supports single image and video re-aging.
 """
 import os
+import shutil
+import subprocess
+import tempfile
 import time
+import glob as glob_mod
 import cv2
 import numpy as np
 import onnxruntime as ort
 from huggingface_hub import hf_hub_download
 # ---------------------------------------------------------------------------
+# Constants
 # ---------------------------------------------------------------------------
+MAX_VIDEO_SECONDS = 30
+MAX_FRAMES = 900
 MODEL_PATH = "face_reaging.onnx"
 REPO_ID = "Luminia/Face-ReAging-CPU"
+# ---------------------------------------------------------------------------
+# Model loading
+# ---------------------------------------------------------------------------
 def get_model_path():
     if os.path.exists(MODEL_PATH):
         return MODEL_PATH
 # ---------------------------------------------------------------------------
 # OpenCV DNN face detection (no extra dependencies)
 # ---------------------------------------------------------------------------
 _face_cascade = cv2.CascadeClassifier(
     cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
 )
 _dnn_model_path = os.path.join(os.path.dirname(__file__), "face_detection_yunet_2023mar.onnx")
 YUNET_URL = "https://github.com/opencv/opencv_zoo/raw/main/models/face_detection_yunet/face_detection_yunet_2023mar.onnx"
 def _ensure_yunet():
     """Download YuNet face detector if not present."""
     global _dnn_model_path
         detector = cv2.FaceDetectorYN.create(yunet_path, "", (w, h), 0.5, 0.3, 5000)
         _, faces = detector.detect(image_rgb)
         if faces is not None and len(faces) > 0:
             best_idx = 0
             best_area = 0
             for i, face in enumerate(faces):
     if len(faces) == 0:
         return None
     best_idx = np.argmax([fw * fh for (_, _, fw, fh) in faces])
     x, y, fw, fh = faces[best_idx]
     return (x, y, x + fw, y + fh)
 # ---------------------------------------------------------------------------
 def crop_face_region(image_rgb: np.ndarray, box):
     """
+    Crop a square region around the detected face with generous margins.
     Returns: cropped image, (l_x, l_y, r_x, r_y) paste-back coords.
     """
     h, w = image_rgb.shape[:2]
     face_w = x2 - x1
     face_h = y2 - y1
     margin_top = int(face_h * 0.63 * 0.85)
     margin_bot = int(face_h * 0.37 * 0.85)
     margin_x = int(face_w * 0.85 / 2)
     margin_top += 2 * margin_x - margin_top - margin_bot
     l_y = max(y1 - margin_top, 0)
 # Blending mask (soft feathered edges)
 # ---------------------------------------------------------------------------
 def create_blend_mask(crop_h, crop_w, feather=0.15):
+    """Create a soft feathered blending mask."""
     mask = np.ones((crop_h, crop_w), dtype=np.float32)
     border_y = max(int(crop_h * feather), 1)
     border_x = max(int(crop_w * feather), 1)
         mask[:, j] *= alpha
         mask[:, crop_w - 1 - j] *= alpha
+    return mask[:, :, np.newaxis]
 # ---------------------------------------------------------------------------
+# Core inference on a single frame (numpy RGB in, numpy RGB out)
 # ---------------------------------------------------------------------------
+def reage_frame(image_rgb: np.ndarray, source_age: int, target_age: int) -> np.ndarray:
     """
+    Re-age the face in a numpy RGB image.
+    Returns the re-aged image (same size), or original if no face found.
     """
     box = detect_face_box(image_rgb)
     if box is None:
+        return image_rgb  # no face, return unchanged
     cropped, (l_x, l_y, r_x, r_y) = crop_face_region(image_rgb, box)
     crop_h, crop_w = cropped.shape[:2]
     cropped_resized = cv2.resize(cropped, (512, 512), interpolation=cv2.INTER_LINEAR)
     img_tensor = cropped_resized.astype(np.float32) / 255.0
+    img_tensor = np.transpose(img_tensor, (2, 0, 1))
     src_age_ch = np.full((1, 512, 512), source_age / 100.0, dtype=np.float32)
     tgt_age_ch = np.full((1, 512, 512), target_age / 100.0, dtype=np.float32)
     input_tensor = np.concatenate([img_tensor, src_age_ch, tgt_age_ch], axis=0)
     input_tensor = input_tensor[np.newaxis, ...]
+    delta = sess.run(None, {"input": input_tensor})[0]
+    aged = img_tensor + delta[0]
     aged = np.clip(aged, 0.0, 1.0)
+    aged_hwc = np.transpose(aged, (1, 2, 0))
     aged_hwc = (aged_hwc * 255).astype(np.uint8)
     aged_resized = cv2.resize(aged_hwc, (crop_w, crop_h), interpolation=cv2.INTER_LINEAR)
     result = image_rgb.copy()
     blend_mask = create_blend_mask(crop_h, crop_w, feather=0.12)
     region = result[l_y:r_y, l_x:r_x].astype(np.float32)
     blended = region * (1 - blend_mask) + aged_f * blend_mask
     result[l_y:r_y, l_x:r_x] = blended.astype(np.uint8)
+    return result
+# ---------------------------------------------------------------------------
+# Image re-aging (wraps reage_frame for Gradio)
+# ---------------------------------------------------------------------------
+def reage_face(image_pil: Image.Image, source_age: int, target_age: int):
+    """Re-age the face in the given PIL image."""
+    t0 = time.time()
+    image_rgb = np.array(image_pil.convert("RGB"))
+    box = detect_face_box(image_rgb)
+    if box is None:
+        raise gr.Error("No face detected in the image. Please upload a clear photo with a visible face.")
+    result = reage_frame(image_rgb, source_age, target_age)
     elapsed = time.time() - t0
     info = f"Done in {elapsed:.2f}s | Source age: {source_age} | Target age: {target_age}"
     return Image.fromarray(result), info
+# ---------------------------------------------------------------------------
+# ffmpeg helpers
+# ---------------------------------------------------------------------------
+def _find_ffmpeg():
+    """Return ffmpeg path."""
+    path = shutil.which("ffmpeg")
+    if path:
+        return path
+    # HF Spaces usually have it
+    for p in ["/usr/bin/ffmpeg", "/usr/local/bin/ffmpeg"]:
+        if os.path.isfile(p):
+            return p
+    raise gr.Error("ffmpeg not found. Video processing requires ffmpeg.")
+def _get_video_info(video_path: str):
+    """Get fps and frame count using ffprobe."""
+    ffprobe = shutil.which("ffprobe") or shutil.which("ffprobe", path="/usr/bin:/usr/local/bin")
+    if not ffprobe:
+        # Fallback: use OpenCV just to read metadata
+        cap = cv2.VideoCapture(video_path)
+        fps = cap.get(cv2.CAP_PROP_FPS) or 25.0
+        count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        cap.release()
+        return fps, count
+    try:
+        r = subprocess.run(
+            [ffprobe, "-v", "quiet", "-print_format", "json",
+             "-show_streams", "-select_streams", "v:0", video_path],
+            capture_output=True, text=True, timeout=30,
+        )
+        import json
+        info = json.loads(r.stdout)
+        stream = info["streams"][0]
+        # fps
+        fps_str = stream.get("r_frame_rate", "25/1")
+        num, den = fps_str.split("/")
+        fps = float(num) / float(den)
+        # frame count
+        nb = stream.get("nb_frames")
+        if nb and nb != "N/A":
+            count = int(nb)
+        else:
+            dur = float(stream.get("duration", 0))
+            count = int(dur * fps)
+        return fps, count
+    except Exception:
+        cap = cv2.VideoCapture(video_path)
+        fps = cap.get(cv2.CAP_PROP_FPS) or 25.0
+        count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        cap.release()
+        return fps, count
+def _extract_frames(video_path: str, out_dir: str):
+    """Extract frames from video using ffmpeg."""
+    ffmpeg = _find_ffmpeg()
+    out_pattern = os.path.join(out_dir, "frame_%06d.png")
+    cmd = [ffmpeg, "-i", video_path, "-vsync", "0", out_pattern, "-y"]
+    r = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
+    if r.returncode != 0:
+        raise gr.Error(f"ffmpeg frame extraction failed: {r.stderr[-500:]}")
+def _assemble_video(frames_dir: str, output_path: str, fps: float, audio_source: str = None):
+    """Reassemble frames into MP4 using ffmpeg."""
+    ffmpeg = _find_ffmpeg()
+    in_pattern = os.path.join(frames_dir, "frame_%06d.png")
+    cmd = [
+        ffmpeg, "-y",
+        "-framerate", str(fps),
+        "-i", in_pattern,
+    ]
+    # Try to copy audio from original
+    if audio_source:
+        cmd += ["-i", audio_source, "-map", "0:v", "-map", "1:a?", "-shortest"]
+    cmd += [
+        "-c:v", "libx264",
+        "-pix_fmt", "yuv420p",
+        "-preset", "fast",
+        "-crf", "20",
+        "-movflags", "+faststart",
+        output_path,
+    ]
+    r = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
+    if r.returncode != 0:
+        raise gr.Error(f"ffmpeg assembly failed: {r.stderr[-500:]}")
+# ---------------------------------------------------------------------------
+# Video re-aging
+# ---------------------------------------------------------------------------
+def reage_video(video_path: str, source_age: int, target_age: int, progress=gr.Progress()):
+    """Re-age faces in every frame of a video."""
+    if video_path is None:
+        raise gr.Error("Please upload a video.")
+    t0 = time.time()
+    # Get video info
+    fps, total_frames = _get_video_info(video_path)
+    duration = total_frames / max(fps, 1)
+    if duration > MAX_VIDEO_SECONDS:
+        raise gr.Error(
+            f"Video is {duration:.1f}s long. Maximum allowed is {MAX_VIDEO_SECONDS}s. "
+            f"Please trim your video first."
+        )
+    if total_frames > MAX_FRAMES:
+        raise gr.Error(
+            f"Video has {total_frames} frames. Maximum allowed is {MAX_FRAMES}. "
+            f"Please use a shorter video."
+        )
+    # Create temp dirs
+    tmp_root = tempfile.mkdtemp(prefix="reage_")
+    frames_in = os.path.join(tmp_root, "in")
+    frames_out = os.path.join(tmp_root, "out")
+    os.makedirs(frames_in, exist_ok=True)
+    os.makedirs(frames_out, exist_ok=True)
+    try:
+        # Extract frames
+        progress(0, desc="Extracting frames...")
+        _extract_frames(video_path, frames_in)
+        # Get frame list
+        frame_files = sorted(glob_mod.glob(os.path.join(frames_in, "frame_*.png")))
+        n_frames = len(frame_files)
+        if n_frames == 0:
+            raise gr.Error("No frames extracted from video. Is the file a valid video?")
+        # Re-check limit after extraction
+        if n_frames > MAX_FRAMES:
+            raise gr.Error(f"Video has {n_frames} frames (max {MAX_FRAMES}). Please use a shorter video.")
+        faces_found = 0
+        faces_missed = 0
+        # Process each frame
+        for idx, fpath in enumerate(frame_files):
+            progress((idx + 1) / n_frames, desc=f"Re-aging frame {idx + 1}/{n_frames}...")
+            # Read frame (BGR -> RGB)
+            frame_bgr = cv2.imread(fpath)
+            if frame_bgr is None:
+                continue
+            frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
+            # Detect and re-age
+            box = detect_face_box(frame_rgb)
+            if box is not None:
+                result_rgb = reage_frame(frame_rgb, source_age, target_age)
+                faces_found += 1
+            else:
+                result_rgb = frame_rgb
+                faces_missed += 1
+            # Save (RGB -> BGR)
+            fname = os.path.basename(fpath)
+            out_path = os.path.join(frames_out, fname)
+            result_bgr = cv2.cvtColor(result_rgb, cv2.COLOR_RGB2BGR)
+            cv2.imwrite(out_path, result_bgr)
+        # Assemble video
+        progress(1.0, desc="Assembling video...")
+        output_path = os.path.join(tmp_root, "output.mp4")
+        _assemble_video(frames_out, output_path, fps, audio_source=video_path)
+        elapsed = time.time() - t0
+        speed = n_frames / max(elapsed, 0.01)
+        info = (
+            f"Done in {elapsed:.1f}s | {n_frames} frames at {speed:.1f} fps | "
+            f"Faces found: {faces_found}, skipped: {faces_missed} | "
+            f"Source age: {source_age} -> Target age: {target_age}"
+        )
+        return output_path, info
+    except gr.Error:
+        raise
+    except Exception as e:
+        raise gr.Error(f"Video processing failed: {str(e)}")
 # ---------------------------------------------------------------------------
 # Gradio UI
 # ---------------------------------------------------------------------------
+def process_image(image, source_age, target_age):
     if image is None:
         raise gr.Error("Please upload an image.")
     return reage_face(image, int(source_age), int(target_age))
+def process_video(video, source_age, target_age, progress=gr.Progress()):
+    if video is None:
+        raise gr.Error("Please upload a video.")
+    return reage_video(video, int(source_age), int(target_age), progress)
+with gr.Blocks(title="Face Re-Aging (CPU)") as demo:
+    gr.Markdown(
+        "# Face Re-Aging (CPU)\n"
+        "Age or de-age faces using Disney FRAN-style model. "
+        "Works on both **images** and **videos**."
     )
+    with gr.Tabs():
+        # ---- Image Tab ----
+        with gr.TabItem("Image"):
+            with gr.Row():
+                with gr.Column():
+                    img_input = gr.Image(type="pil", label="Input Image")
+                    img_src_age = gr.Slider(
+                        minimum=5, maximum=95, value=25, step=1,
+                        label="Source Age (current age)",
+                    )
+                    img_tgt_age = gr.Slider(
+                        minimum=5, maximum=95, value=65, step=1,
+                        label="Target Age (desired age)",
+                    )
+                    img_btn = gr.Button("Re-Age Face", variant="primary")
+                with gr.Column():
+                    img_output = gr.Image(type="pil", label="Re-Aged Result")
+                    img_info = gr.Textbox(label="Info", interactive=False)
+            img_btn.click(
+                fn=process_image,
+                inputs=[img_input, img_src_age, img_tgt_age],
+                outputs=[img_output, img_info],
+            )
+        # ---- Video Tab ----
+        with gr.TabItem("Video"):
+            gr.Markdown(
+                f"Upload a video (max **{MAX_VIDEO_SECONDS}s** / **{MAX_FRAMES} frames**). "
+                f"Each frame is processed individually on CPU, so expect ~0.5-2 fps."
+            )
+            with gr.Row():
+                with gr.Column():
+                    vid_input = gr.Video(label="Input Video")
+                    vid_src_age = gr.Slider(
+                        minimum=5, maximum=95, value=25, step=1,
+                        label="Source Age (current age)",
+                    )
+                    vid_tgt_age = gr.Slider(
+                        minimum=5, maximum=95, value=65, step=1,
+                        label="Target Age (desired age)",
+                    )
+                    vid_btn = gr.Button("Re-Age Video", variant="primary")
+                with gr.Column():
+                    vid_output = gr.Video(label="Re-Aged Video")
+                    vid_info = gr.Textbox(label="Info", interactive=False)
+            vid_btn.click(
+                fn=process_video,
+                inputs=[vid_input, vid_src_age, vid_tgt_age],
+                outputs=[vid_output, vid_info],
+            )
     gr.Markdown(
         "**Model:** `face_reaging.onnx` (118 MB) from "
         "[VisoMaster-Fusion](https://github.com/VisoMasterFusion/VisoMaster-Fusion) | "

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ffmpeg