Spaces:

Kikut
/

ig-test-profile-studio

Paused

App Files Files Community

Kikut commited on Mar 1

Commit

fab3293

verified ·

1 Parent(s): 167b17d

Deploy IG test profile studio

Browse files

Files changed (3) hide show

README.md +14 -20
app.py +181 -504
requirements.txt +4 -3

README.md CHANGED Viewed

@@ -1,33 +1,27 @@
 ---
-title: Live Upper-Body Swap (FaceFusion)
 colorFrom: blue
 colorTo: pink
-sdk: docker
-app_port: 7860
 pinned: false
 ---
-# Live Upper-Body Swap (FaceFusion)
-This Space runs FaceFusion in `webcam` mode for realistic live face swap.
-## How it works
 1. Open the Space in browser.
-2. In FaceFusion UI, set `Source` image to a clear front-facing portrait.
-3. Start webcam target and run with:
-   - `processors`: `face_swapper`, `face_enhancer`, `expression_restorer`
-   - `face masker`: `occlusion + region`
-4. Tune blend until movement and identity look natural.
-## Why this is more realistic than overlay
-- true face swap model (not static image paste)
-- temporal consistency from webcam layout
-- enhancer and expression restorer for live motion quality
 ## Notes
-- Best result comes from good lighting and neutral background.
-- This is designed for consent-based identity effects only.
-- A stronger GPU (for example `a10g-large`) is recommended.

 ---
+title: Live Upper-Body Swap
 colorFrom: blue
 colorTo: pink
+sdk: gradio
+sdk_version: 5.44.1
+app_file: app.py
+python_version: "3.10"
 pinned: false
 ---
+# Live Upper-Body Swap
+This Space performs real-time face swap from browser webcam frames.
+## Workflow
 1. Open the Space in browser.
+2. Upload source portrait (or use the default one).
+3. Start webcam stream.
+4. Adjust swap strength and max faces.
 ## Notes
+- Uses `inswapper_128` through `insightface`.
+- Works with browser webcam input on Hugging Face Spaces.
+- Keep strong front lighting for better realism.

app.py CHANGED Viewed

@@ -7,555 +7,233 @@ from typing import Any
 import cv2
 import gradio as gr
 import numpy as np
-try:
-    import mediapipe as mp  # type: ignore
-except Exception:
-    mp = None
-FACE_CASCADE = cv2.CascadeClassifier(
-    cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
-)
-UPPER_BODY_CASCADE = cv2.CascadeClassifier(
-    cv2.data.haarcascades + "haarcascade_upperbody.xml"
-)
 APP_DIR = Path(__file__).resolve().parent
-DEFAULT_AVATAR_PATH = APP_DIR / "assets" / "default_persona.jpg"
-DEFAULT_AVATAR_VALUE = str(DEFAULT_AVATAR_PATH) if DEFAULT_AVATAR_PATH.exists() else None
-def _init_selfie_segmenter() -> tuple[Any | None, str]:
-    if mp is None:
-        return None, "mediapipe import failed"
-    try:
-        solutions = getattr(mp, "solutions", None)
-        if solutions is None:
-            from mediapipe.python import solutions as mp_solutions  # type: ignore
-            solutions = mp_solutions
-        segmenter = solutions.selfie_segmentation.SelfieSegmentation(model_selection=0)
-        return segmenter, ""
-    except Exception as exc:
-        return None, f"{type(exc).__name__}: {exc}"
-SELFIE_SEGMENTER, SELFIE_SEGMENTER_ERROR = _init_selfie_segmenter()
 LAST_FRAME_TS = 0.0
 EMA_FPS = 0.0
-AVATAR_CACHE_KEY = ""
-AVATAR_CACHE_CROP: np.ndarray | None = None
-AVATAR_CACHE_MASK: np.ndarray | None = None
-TRACK_BOX: tuple[int, int, int, int] | None = None
-TRACK_TEMPLATE: np.ndarray | None = None
-TRACK_CONFIDENCE = 0.0
-FRAME_INDEX = 0
-def _prepare_bgr(frame: np.ndarray) -> np.ndarray:
-    if frame.dtype != np.uint8:
-        frame = np.clip(frame, 0, 255).astype(np.uint8)
-    if frame.ndim == 2:
-        return cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR)
-    if frame.shape[2] == 4:
-        return cv2.cvtColor(frame, cv2.COLOR_RGBA2BGR)
-    return cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
-def _resize_for_speed(frame_bgr: np.ndarray, max_side: int) -> tuple[np.ndarray, tuple[int, int]]:
-    height, width = frame_bgr.shape[:2]
-    if max(height, width) <= max_side:
-        return frame_bgr, (width, height)
-    ratio = max_side / float(max(height, width))
-    resized = cv2.resize(
-        frame_bgr,
-        (int(width * ratio), int(height * ratio)),
-        interpolation=cv2.INTER_AREA,
-    )
-    return resized, (width, height)
-def _detect_faces(frame_bgr: np.ndarray) -> np.ndarray:
-    gray = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2GRAY)
-    return FACE_CASCADE.detectMultiScale(
-        gray,
-        scaleFactor=1.12,
-        minNeighbors=5,
-        minSize=(60, 60),
-    )
-def _detect_upper_body(frame_bgr: np.ndarray) -> np.ndarray:
-    if UPPER_BODY_CASCADE.empty():
-        return np.array([])
-    gray = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2GRAY)
-    return UPPER_BODY_CASCADE.detectMultiScale(
-        gray,
-        scaleFactor=1.08,
-        minNeighbors=4,
-        minSize=(80, 80),
-    )
-def _clamp_box(
-    box: tuple[int, int, int, int],
-    frame_shape: tuple[int, int, int],
-) -> tuple[int, int, int, int]:
-    x, y, w, h = box
-    img_h, img_w = frame_shape[:2]
-    x = max(0, min(x, img_w - 2))
-    y = max(0, min(y, img_h - 2))
-    w = max(2, min(w, img_w - x))
-    h = max(2, min(h, img_h - y))
-    return x, y, w, h
-def _box_iou(a: tuple[int, int, int, int], b: tuple[int, int, int, int]) -> float:
-    ax, ay, aw, ah = a
-    bx, by, bw, bh = b
-    ax2, ay2 = ax + aw, ay + ah
-    bx2, by2 = bx + bw, by + bh
-    ix1, iy1 = max(ax, bx), max(ay, by)
-    ix2, iy2 = min(ax2, bx2), min(ay2, by2)
-    iw, ih = max(0, ix2 - ix1), max(0, iy2 - iy1)
-    inter = iw * ih
-    if inter <= 0:
-        return 0.0
-    union = aw * ah + bw * bh - inter
-    return inter / max(union, 1)
-def _patch_from_box(
-    frame_bgr: np.ndarray,
-    box: tuple[int, int, int, int],
-) -> np.ndarray | None:
-    x, y, w, h = _clamp_box(box, frame_bgr.shape)
-    patch = frame_bgr[y : y + h, x : x + w]
-    if patch.size == 0:
-        return None
-    return cv2.cvtColor(patch, cv2.COLOR_BGR2GRAY)
-def _set_tracker(
-    frame_bgr: np.ndarray,
-    box: tuple[int, int, int, int],
-) -> None:
-    global TRACK_BOX, TRACK_TEMPLATE, TRACK_CONFIDENCE
-    patch = _patch_from_box(frame_bgr, box)
-    if patch is None or patch.shape[0] < 12 or patch.shape[1] < 12:
-        return
-    TRACK_BOX = _clamp_box(box, frame_bgr.shape)
-    TRACK_TEMPLATE = patch
-    TRACK_CONFIDENCE = 1.0
-def _update_tracker(frame_bgr: np.ndarray) -> tuple[tuple[int, int, int, int] | None, float]:
-    global TRACK_BOX, TRACK_TEMPLATE, TRACK_CONFIDENCE
-    if TRACK_BOX is None or TRACK_TEMPLATE is None:
-        return None, 0.0
-    x, y, w, h = TRACK_BOX
-    gray = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2GRAY)
-    margin = int(max(w, h) * 0.55)
-    sx0 = max(0, x - margin)
-    sy0 = max(0, y - margin)
-    sx1 = min(gray.shape[1], x + w + margin)
-    sy1 = min(gray.shape[0], y + h + margin)
-    search = gray[sy0:sy1, sx0:sx1]
-    tmpl = TRACK_TEMPLATE
-    if (
-        search.shape[0] < tmpl.shape[0]
-        or search.shape[1] < tmpl.shape[1]
-        or tmpl.shape[0] < 10
-        or tmpl.shape[1] < 10
-    ):
-        TRACK_CONFIDENCE *= 0.75
-        return None, 0.0
-    match = cv2.matchTemplate(search, tmpl, cv2.TM_CCOEFF_NORMED)
-    _, max_val, _, max_loc = cv2.minMaxLoc(match)
-    if max_val < 0.35:
-        TRACK_CONFIDENCE *= 0.7
-        return None, float(max_val)
-    nx = sx0 + int(max_loc[0])
-    ny = sy0 + int(max_loc[1])
-    new_box = _clamp_box((nx, ny, w, h), frame_bgr.shape)
-    new_patch = _patch_from_box(frame_bgr, new_box)
-    if new_patch is not None and new_patch.shape == tmpl.shape:
-        TRACK_TEMPLATE = cv2.addWeighted(tmpl, 0.82, new_patch, 0.18, 0)
-    TRACK_BOX = new_box
-    TRACK_CONFIDENCE = 0.8 * TRACK_CONFIDENCE + 0.2 * float(max_val)
-    return new_box, float(max_val)
-def _body_box_from_face(face_box: tuple[int, int, int, int]) -> tuple[int, int, int, int]:
-    x, y, fw, fh = face_box
-    body_w = int(fw * 2.9)
-    body_h = int(fh * 5.1)
-    cx = x + fw // 2
-    cy = y + int(fh * 2.35)
-    return cx - body_w // 2, cy - body_h // 2, body_w, body_h
-def _body_box_from_upper(upper_box: tuple[int, int, int, int]) -> tuple[int, int, int, int]:
-    x, y, uw, uh = upper_box
-    body_w = int(uw * 1.35)
-    body_h = int(uh * 2.45)
-    cx = x + uw // 2
-    cy = y + int(uh * 1.15)
-    return cx - body_w // 2, cy - body_h // 2, body_w, body_h
-def _detect_body_box(frame_bgr: np.ndarray) -> tuple[tuple[int, int, int, int] | None, str]:
-    faces = _detect_faces(frame_bgr)
-    if len(faces) > 0:
-        x, y, w, h = max(faces, key=lambda item: item[2] * item[3])
-        return _clamp_box(_body_box_from_face((x, y, w, h)), frame_bgr.shape), "face"
-    uppers = _detect_upper_body(frame_bgr)
-    if len(uppers) > 0:
-        x, y, w, h = max(uppers, key=lambda item: item[2] * item[3])
-        return _clamp_box(_body_box_from_upper((x, y, w, h)), frame_bgr.shape), "upper-body"
-    return None, ""
-def _resolve_fallback_box(frame_bgr: np.ndarray) -> tuple[tuple[int, int, int, int], str]:
-    global FRAME_INDEX
-    FRAME_INDEX += 1
-    should_detect = FRAME_INDEX % 3 == 0 or TRACK_BOX is None
-    detected_box: tuple[int, int, int, int] | None = None
-    detected_source = ""
-    if should_detect:
-        detected_box, detected_source = _detect_body_box(frame_bgr)
-    tracked_box, track_score = _update_tracker(frame_bgr)
-    if detected_box is not None and tracked_box is not None:
-        if _box_iou(detected_box, tracked_box) >= 0.1:
-            ax, ay, aw, ah = detected_box
-            bx, by, bw, bh = tracked_box
-            blended = (
-                int(0.6 * ax + 0.4 * bx),
-                int(0.6 * ay + 0.4 * by),
-                int(0.6 * aw + 0.4 * bw),
-                int(0.6 * ah + 0.4 * bh),
-            )
-            final_box = _clamp_box(blended, frame_bgr.shape)
-        else:
-            final_box = detected_box
-        _set_tracker(frame_bgr, final_box)
-        return final_box, f"fallback detect+track ({detected_source})"
-    if detected_box is not None:
-        _set_tracker(frame_bgr, detected_box)
-        return detected_box, f"fallback detect ({detected_source})"
-    if tracked_box is not None:
-        return tracked_box, f"fallback track ({track_score:.2f})"
-    cx, cy, bw, bh = _fallback_body_box(frame_bgr)
-    x = cx - bw // 2
-    y = cy - bh // 2
-    box = _clamp_box((x, y, bw, bh), frame_bgr.shape)
-    _set_tracker(frame_bgr, box)
-    return box, "fallback static"
-def _segment_person_mask(frame_bgr: np.ndarray, threshold: float) -> tuple[np.ndarray | None, np.ndarray | None]:
-    if SELFIE_SEGMENTER is None:
-        return None, None
-    rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
-    result = SELFIE_SEGMENTER.process(rgb)
-    if result.segmentation_mask is None:
-        return None, None
-    raw_mask = np.clip(result.segmentation_mask.astype(np.float32), 0.0, 1.0)
-    binary = raw_mask > threshold
-    return raw_mask, binary
-def _avatar_key(avatar_bgr: np.ndarray) -> str:
-    thumb = cv2.resize(avatar_bgr, (32, 32), interpolation=cv2.INTER_AREA)
     return (
-        f"{avatar_bgr.shape[0]}x{avatar_bgr.shape[1]}"
         f"-{int(thumb.mean())}"
         f"-{int(thumb[::4, ::4].sum())}"
     )
-def _extract_avatar_person(avatar_bgr: np.ndarray) -> tuple[np.ndarray, np.ndarray, str]:
-    raw_mask, binary = _segment_person_mask(avatar_bgr, threshold=0.22)
-    if raw_mask is None or binary is None or int(binary.sum()) < 2000:
-        # Heuristic fallback: separate subject from corner background color.
-        h, w = avatar_bgr.shape[:2]
-        patch = max(10, min(h, w) // 18)
-        corners = [
-            avatar_bgr[:patch, :patch],
-            avatar_bgr[:patch, -patch:],
-            avatar_bgr[-patch:, :patch],
-            avatar_bgr[-patch:, -patch:],
-        ]
-        bg_color = np.mean(np.concatenate([c.reshape(-1, 3) for c in corners], axis=0), axis=0)
-        dist = np.linalg.norm(avatar_bgr.astype(np.float32) - bg_color[None, None, :], axis=2)
-        heuristic = (dist > 26.0).astype(np.float32)
-        if float(heuristic.mean()) < 0.03:
-            full_mask = np.ones(avatar_bgr.shape[:2], dtype=np.float32)
-            return avatar_bgr, full_mask, "Avatar segmentation fallback."
-        heuristic = cv2.GaussianBlur(heuristic, (0, 0), 2.8)
-        heuristic = np.clip(heuristic, 0.0, 1.0)
-        return avatar_bgr, heuristic, "Avatar segmentation fallback."
-    ys, xs = np.where(binary)
-    x0, x1 = int(xs.min()), int(xs.max())
-    y0, y1 = int(ys.min()), int(ys.max())
-    crop = avatar_bgr[y0 : y1 + 1, x0 : x1 + 1]
-    crop_mask = raw_mask[y0 : y1 + 1, x0 : x1 + 1]
-    crop_mask = cv2.GaussianBlur(crop_mask, (0, 0), 1.8)
-    crop_mask = np.clip(crop_mask, 0.0, 1.0)
-    return crop, crop_mask, ""
-def _get_avatar_assets(avatar_image: np.ndarray | None) -> tuple[np.ndarray | None, np.ndarray | None, str]:
-    global AVATAR_CACHE_KEY, AVATAR_CACHE_CROP, AVATAR_CACHE_MASK
-    if avatar_image is None:
-        return None, None, "Upload persona image."
-    avatar_bgr = _prepare_bgr(avatar_image)
-    key = _avatar_key(avatar_bgr)
-    if (
-        key == AVATAR_CACHE_KEY
-        and AVATAR_CACHE_CROP is not None
-        and AVATAR_CACHE_MASK is not None
-    ):
-        return AVATAR_CACHE_CROP, AVATAR_CACHE_MASK, ""
-    crop, mask, msg = _extract_avatar_person(avatar_bgr)
-    AVATAR_CACHE_KEY = key
-    AVATAR_CACHE_CROP = crop
-    AVATAR_CACHE_MASK = mask
-    return crop, mask, msg
-def _place_with_alpha(
-    canvas_shape: tuple[int, int, int],
-    src_img: np.ndarray,
-    src_alpha: np.ndarray,
-    center_x: int,
-    center_y: int,
-    target_w: int,
-    target_h: int,
-) -> tuple[np.ndarray, np.ndarray]:
-    canvas_img = np.zeros(canvas_shape, dtype=np.uint8)
-    canvas_alpha = np.zeros(canvas_shape[:2], dtype=np.float32)
-    if target_w < 4 or target_h < 4:
-        return canvas_img, canvas_alpha
-    resized_img = cv2.resize(src_img, (target_w, target_h), interpolation=cv2.INTER_LINEAR)
-    resized_alpha = cv2.resize(src_alpha, (target_w, target_h), interpolation=cv2.INTER_LINEAR)
-    x0 = center_x - target_w // 2
-    y0 = center_y - target_h // 2
-    x1 = x0 + target_w
-    y1 = y0 + target_h
-    dst_x0 = max(0, x0)
-    dst_y0 = max(0, y0)
-    dst_x1 = min(canvas_shape[1], x1)
-    dst_y1 = min(canvas_shape[0], y1)
-    if dst_x0 >= dst_x1 or dst_y0 >= dst_y1:
-        return canvas_img, canvas_alpha
-    src_x0 = dst_x0 - x0
-    src_y0 = dst_y0 - y0
-    src_x1 = src_x0 + (dst_x1 - dst_x0)
-    src_y1 = src_y0 + (dst_y1 - dst_y0)
-    canvas_img[dst_y0:dst_y1, dst_x0:dst_x1] = resized_img[src_y0:src_y1, src_x0:src_x1]
-    canvas_alpha[dst_y0:dst_y1, dst_x0:dst_x1] = resized_alpha[src_y0:src_y1, src_x0:src_x1]
-    return canvas_img, np.clip(canvas_alpha, 0.0, 1.0)
-def _fallback_body_box(frame_bgr: np.ndarray) -> tuple[int, int, int, int]:
-    h, w = frame_bgr.shape[:2]
-    faces = _detect_faces(frame_bgr)
-    if len(faces) > 0:
-        x, y, fw, fh = max(faces, key=lambda item: item[2] * item[3])
-        body_w = int(fw * 2.8)
-        body_h = int(fh * 5.0)
-        center_x = x + fw // 2
-        center_y = y + int(fh * 2.3)
-        return center_x, center_y, body_w, body_h
-    return w // 2, int(h * 0.54), int(w * 0.56), int(h * 0.86)
-def _full_body_replace(
-    frame_bgr: np.ndarray,
-    avatar_image: np.ndarray | None,
-    replace_strength: float,
-    mask_threshold: float,
-    edge_softness: float,
-) -> tuple[np.ndarray, str]:
-    avatar_crop, avatar_mask, avatar_msg = _get_avatar_assets(avatar_image)
-    if avatar_crop is None or avatar_mask is None:
-        return frame_bgr, "Upload persona image to start replacement."
-    raw_mask, binary = _segment_person_mask(frame_bgr, threshold=mask_threshold)
-    use_fallback_box = raw_mask is None or binary is None or int(binary.sum()) < 2500
-    if use_fallback_box:
-        box, source = _resolve_fallback_box(frame_bgr)
-        bx, by, person_w, person_h = box
-        center_x = bx + person_w // 2
-        center_y = by + person_h // 2
-        status = f"Tracking fallback active ({source})."
-    else:
-        ys, xs = np.where(binary)
-        x0, x1 = int(xs.min()), int(xs.max())
-        y0, y1 = int(ys.min()), int(ys.max())
-        person_w = x1 - x0 + 1
-        person_h = y1 - y0 + 1
-        center_x = x0 + person_w // 2
-        center_y = y0 + int(person_h * 0.52)
-        status = "Full body replace active."
-    avatar_h, avatar_w = avatar_crop.shape[:2]
-    scale = 1.08 + 0.34 * replace_strength
-    target_h = int(person_h * scale)
-    target_w = int(target_h * (avatar_w / max(1, avatar_h)))
-    target_w = max(target_w, int(person_w * 0.98))
-    avatar_layer, avatar_alpha = _place_with_alpha(
-        canvas_shape=frame_bgr.shape,
-        src_img=avatar_crop,
-        src_alpha=avatar_mask,
-        center_x=center_x,
-        center_y=center_y,
-        target_w=target_w,
-        target_h=target_h,
-    )
-    if use_fallback_box:
-        fallback_mask = np.zeros(frame_bgr.shape[:2], dtype=np.float32)
-        axes = (max(24, int(person_w * 0.58)), max(24, int(person_h * 0.62)))
-        cv2.ellipse(fallback_mask, (center_x, center_y), axes, 0, 0, 360, 1.0, -1)
-        target_alpha = cv2.GaussianBlur(fallback_mask, (0, 0), 12.0)
-        # Make replacement visually obvious in fallback mode.
-        avatar_alpha = np.clip(0.35 + 0.65 * avatar_alpha, 0.0, 1.0)
-    else:
-        target_alpha = cv2.GaussianBlur(raw_mask, (0, 0), 1.5 + 18.0 * edge_softness)
-    alpha = np.clip(target_alpha * avatar_alpha * replace_strength, 0.0, 1.0)
-    mixed = (
-        avatar_layer.astype(np.float32) * alpha[:, :, None]
-        + frame_bgr.astype(np.float32) * (1.0 - alpha[:, :, None])
-    )
-    notes: list[str] = [status]
-    if avatar_msg:
-        notes.append(avatar_msg)
-    if SELFIE_SEGMENTER is None:
-        notes.append("Live segmentation unavailable on server.")
-    return np.clip(mixed, 0, 255).astype(np.uint8), " ".join(notes)
 def transform_live(
     frame: np.ndarray,
-    avatar_image: np.ndarray | None,
     mirror: bool,
-    replace_strength: float,
-    mask_threshold: float,
-    edge_softness: float,
 ) -> tuple[np.ndarray, str]:
     global LAST_FRAME_TS, EMA_FPS
     if frame is None:
         return frame, "Waiting for webcam frame."
-    started = time.perf_counter()
-    original_bgr = _prepare_bgr(frame)
-    resized_bgr, original_size = _resize_for_speed(original_bgr, max_side=640)
-    transformed, status_text = _full_body_replace(
-        resized_bgr,
-        avatar_image=avatar_image,
-        replace_strength=replace_strength,
-        mask_threshold=mask_threshold,
-        edge_softness=edge_softness,
-    )
-    if mirror:
-        transformed = cv2.flip(transformed, 1)
-    if transformed.shape[1] != original_size[0] or transformed.shape[0] != original_size[1]:
-        transformed = cv2.resize(
-            transformed,
-            original_size,
-            interpolation=cv2.INTER_LINEAR,
         )
     now = time.perf_counter()
-    frame_latency_ms = (now - started) * 1000.0
-    proc_fps = 1000.0 / max(frame_latency_ms, 1e-4)
     if LAST_FRAME_TS > 0.0:
-        instant_fps = 1.0 / max(now - LAST_FRAME_TS, 1e-4)
-        EMA_FPS = instant_fps if EMA_FPS <= 0.0 else (0.2 * instant_fps + 0.8 * EMA_FPS)
     LAST_FRAME_TS = now
-    status_text = (
-        f"{status_text} | proc {proc_fps:.1f} fps ({frame_latency_ms:.0f} ms) "
-        f"| stream ~{EMA_FPS:.1f} fps"
     )
-    return cv2.cvtColor(transformed, cv2.COLOR_BGR2RGB), status_text
-with gr.Blocks(title="Live Full Body Replace") as demo:
     gr.Markdown(
         """
-# Live Full Body Replace
-Only one workflow is enabled:
-1. Upload persona image.
-2. Start webcam stream.
-3. Output panel shows live replacement.
 """
     )
     with gr.Row():
-        replace_strength = gr.Slider(
-            label="Replace strength",
-            minimum=0.35,
             maximum=1.0,
-            value=0.92,
             step=0.01,
         )
-        mask_threshold = gr.Slider(
-            label="Body mask threshold",
-            minimum=0.1,
-            maximum=0.8,
-            value=0.28,
-            step=0.02,
         )
-        edge_softness = gr.Slider(
-            label="Edge softness",
-            minimum=0.0,
-            maximum=1.0,
-            value=0.35,
-            step=0.02,
         )
         mirror = gr.Checkbox(label="Mirror output", value=True)
-    avatar_upload = gr.Image(
-        label="Persona image (preloaded; replace if you want another look)",
         type="numpy",
-        value=DEFAULT_AVATAR_VALUE,
     )
     with gr.Row():
         live_input = gr.Image(
             label="Webcam input",
@@ -564,25 +242,24 @@ Only one workflow is enabled:
             streaming=True,
         )
         live_output = gr.Image(
-            label="Live replaced output",
             type="numpy",
         )
-    live_status = gr.Markdown("Waiting for webcam + persona image.")
     live_input.stream(
         fn=transform_live,
-        inputs=[
-            live_input,
-            avatar_upload,
-            mirror,
-            replace_strength,
-            mask_threshold,
-            edge_softness,
-        ],
-        outputs=[live_output, live_status],
         time_limit=None,
-        stream_every=0.05,
         concurrency_limit=1,
         queue=False,
         show_progress="hidden",

 import cv2
 import gradio as gr
 import numpy as np
+import onnxruntime as ort
+from insightface.app import FaceAnalysis
+from insightface.model_zoo import get_model
 APP_DIR = Path(__file__).resolve().parent
+DEFAULT_SOURCE_PATH = APP_DIR / "assets" / "default_persona.jpg"
+DEFAULT_SOURCE_VALUE = str(DEFAULT_SOURCE_PATH) if DEFAULT_SOURCE_PATH.exists() else None
+FACE_ANALYSER: FaceAnalysis | None = None
+FACE_SWAPPER: Any | None = None
+MODEL_PROVIDERS: list[str] = []
+MODEL_ERROR = ""
+SOURCE_FACE: Any | None = None
+SOURCE_FACE_KEY = ""
 LAST_FRAME_TS = 0.0
 EMA_FPS = 0.0
+def _to_bgr(image: np.ndarray) -> np.ndarray:
+    if image.dtype != np.uint8:
+        image = np.clip(image, 0, 255).astype(np.uint8)
+    if image.ndim == 2:
+        return cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
+    if image.shape[2] == 4:
+        return cv2.cvtColor(image, cv2.COLOR_RGBA2BGR)
+    return cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+def _image_key(image_bgr: np.ndarray) -> str:
+    thumb = cv2.resize(image_bgr, (32, 32), interpolation=cv2.INTER_AREA)
     return (
+        f"{image_bgr.shape[0]}x{image_bgr.shape[1]}"
         f"-{int(thumb.mean())}"
         f"-{int(thumb[::4, ::4].sum())}"
     )
+def _face_area(face: Any) -> float:
+    x0, y0, x1, y1 = face.bbox
+    return max(1.0, float(x1 - x0) * float(y1 - y0))
+def _ensure_models() -> tuple[bool, str]:
+    global FACE_ANALYSER, FACE_SWAPPER, MODEL_PROVIDERS, MODEL_ERROR
+    if FACE_ANALYSER is not None and FACE_SWAPPER is not None:
+        return True, ""
+    if MODEL_ERROR:
+        return False, MODEL_ERROR
+    try:
+        available = ort.get_available_providers()
+        use_cuda = "CUDAExecutionProvider" in available
+        providers = ["CUDAExecutionProvider", "CPUExecutionProvider"] if use_cuda else ["CPUExecutionProvider"]
+        ctx_id = 0 if use_cuda else -1
+        analyser = FaceAnalysis(name="buffalo_l", providers=providers)
+        analyser.prepare(ctx_id=ctx_id, det_size=(640, 640))
+        swapper = get_model("inswapper_128.onnx", download=True, download_zip=True, providers=providers)
+        FACE_ANALYSER = analyser
+        FACE_SWAPPER = swapper
+        MODEL_PROVIDERS = providers
+        return True, ""
+    except Exception as exc:
+        MODEL_ERROR = f"Model init failed: {type(exc).__name__}: {exc}"
+        return False, MODEL_ERROR
+def _ensure_source_face(source_image: np.ndarray | None) -> tuple[bool, str]:
+    global SOURCE_FACE, SOURCE_FACE_KEY
+    if source_image is None:
+        return False, "Upload source portrait first."
+    ok, msg = _ensure_models()
+    if not ok:
+        return False, msg
+    source_bgr = _to_bgr(source_image)
+    key = _image_key(source_bgr)
+    if SOURCE_FACE is not None and key == SOURCE_FACE_KEY:
+        return True, ""
+    assert FACE_ANALYSER is not None
+    source_faces = FACE_ANALYSER.get(source_bgr)
+    if not source_faces:
+        SOURCE_FACE = None
+        SOURCE_FACE_KEY = ""
+        return False, "No face found in source image. Use clear front-facing portrait."
+    SOURCE_FACE = max(source_faces, key=_face_area)
+    SOURCE_FACE_KEY = key
+    return True, ""
+def warmup(source_image: np.ndarray | None) -> str:
+    ok, msg = _ensure_models()
+    if not ok:
+        return msg
+    ok, msg = _ensure_source_face(source_image)
+    if not ok:
+        return msg
+    return f"Model ready. Providers: {', '.join(MODEL_PROVIDERS)}"
 def transform_live(
     frame: np.ndarray,
+    source_image: np.ndarray | None,
     mirror: bool,
+    swap_strength: float,
+    max_faces: int,
+    process_side: int,
 ) -> tuple[np.ndarray, str]:
     global LAST_FRAME_TS, EMA_FPS
     if frame is None:
         return frame, "Waiting for webcam frame."
+    ok, msg = _ensure_models()
+    if not ok:
+        return frame, msg
+    ok, msg = _ensure_source_face(source_image)
+    if not ok:
+        return frame, msg
+    assert FACE_ANALYSER is not None
+    assert FACE_SWAPPER is not None
+    assert SOURCE_FACE is not None
+    started = time.perf_counter()
+    frame_bgr = _to_bgr(frame)
+    original_h, original_w = frame_bgr.shape[:2]
+    resized = frame_bgr
+    ratio = 1.0
+    if max(original_h, original_w) > process_side:
+        ratio = process_side / float(max(original_h, original_w))
+        resized = cv2.resize(
+            frame_bgr,
+            (int(original_w * ratio), int(original_h * ratio)),
+            interpolation=cv2.INTER_AREA,
         )
+    target_faces = FACE_ANALYSER.get(resized)
+    swapped = resized.copy()
+    applied = 0
+    if target_faces:
+        ordered_faces = sorted(target_faces, key=_face_area, reverse=True)[:max_faces]
+        for target_face in ordered_faces:
+            swapped = FACE_SWAPPER.get(swapped, target_face, SOURCE_FACE, paste_back=True)
+            applied += 1
+    if swap_strength < 0.999:
+        swapped = cv2.addWeighted(
+            swapped.astype(np.float32),
+            float(swap_strength),
+            resized.astype(np.float32),
+            float(1.0 - swap_strength),
+            0.0,
+        ).astype(np.uint8)
+    if ratio != 1.0:
+        swapped = cv2.resize(swapped, (original_w, original_h), interpolation=cv2.INTER_LINEAR)
+    if mirror:
+        swapped = cv2.flip(swapped, 1)
     now = time.perf_counter()
+    frame_ms = (now - started) * 1000.0
+    proc_fps = 1000.0 / max(frame_ms, 1e-4)
     if LAST_FRAME_TS > 0.0:
+        inst_fps = 1.0 / max(now - LAST_FRAME_TS, 1e-4)
+        EMA_FPS = inst_fps if EMA_FPS <= 0.0 else (0.2 * inst_fps + 0.8 * EMA_FPS)
     LAST_FRAME_TS = now
+    status = (
+        f"Swapped faces: {applied} | providers: {', '.join(MODEL_PROVIDERS)} "
+        f"| proc {proc_fps:.1f} fps ({frame_ms:.0f} ms) | stream ~{EMA_FPS:.1f} fps"
     )
+    return cv2.cvtColor(swapped, cv2.COLOR_BGR2RGB), status
+with gr.Blocks(title="Live Upper-Body Swap") as demo:
     gr.Markdown(
         """
+# Live Upper-Body Swap
+1. Upload source portrait (or keep preloaded default).
+2. Click **Warm up model** once.
+3. Start webcam stream.
 """
     )
     with gr.Row():
+        swap_strength = gr.Slider(
+            label="Swap strength",
+            minimum=0.45,
             maximum=1.0,
+            value=0.95,
             step=0.01,
         )
+        max_faces = gr.Slider(
+            label="Max faces in frame",
+            minimum=1,
+            maximum=4,
+            value=1,
+            step=1,
         )
+        process_side = gr.Slider(
+            label="Process size (speed/quality)",
+            minimum=384,
+            maximum=960,
+            value=640,
+            step=32,
         )
         mirror = gr.Checkbox(label="Mirror output", value=True)
+    source_image = gr.Image(
+        label="Source portrait",
         type="numpy",
+        value=DEFAULT_SOURCE_VALUE,
     )
+    warmup_button = gr.Button("Warm up model", variant="primary")
+    status = gr.Markdown("Idle.")
     with gr.Row():
         live_input = gr.Image(
             label="Webcam input",
             streaming=True,
         )
         live_output = gr.Image(
+            label="Live swapped output",
             type="numpy",
         )
+    warmup_button.click(
+        fn=warmup,
+        inputs=[source_image],
+        outputs=[status],
+        queue=False,
+        show_progress="hidden",
+    )
     live_input.stream(
         fn=transform_live,
+        inputs=[live_input, source_image, mirror, swap_strength, max_faces, process_side],
+        outputs=[live_output, status],
         time_limit=None,
+        stream_every=0.06,
         concurrency_limit=1,
         queue=False,
         show_progress="hidden",

requirements.txt CHANGED Viewed

@@ -1,4 +1,5 @@
-gradio>=5.0.0
-numpy>=1.26.4
 opencv-python-headless>=4.10.0.84
-mediapipe>=0.10.14

+gradio==5.44.1
+numpy>=1.26.4,<2.3
 opencv-python-headless>=4.10.0.84
+onnxruntime-gpu>=1.17.0
+insightface==0.7.3