FaceSwap / app.py
bb676's picture
Upload 2 files
9ac2526 verified
import os
import uuid
import time
from dataclasses import dataclass
from typing import Any, Dict, List, Optional, Tuple
import gradio as gr
import numpy as np
from PIL import Image, ImageDraw, ImageFont
APP_NAME = "FaceSwap AI"
DEFAULT_REMOTE_SPACE_ID = os.getenv("REMOTE_SPACE_ID", "felixrosberg/face-swap")
HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
OUTPUT_DIR = os.path.join(os.path.dirname(__file__), "outputs")
EXAMPLES_DIR = os.path.join(os.path.dirname(__file__), "assets", "examples")
MODELS_DIR = os.path.join(os.path.dirname(__file__), "models")
@dataclass
class SwapResult:
output_path: str
share_url: str
error: Optional[str] = None
def _ensure_dirs() -> None:
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(EXAMPLES_DIR, exist_ok=True)
os.makedirs(MODELS_DIR, exist_ok=True)
def _pil_from_any(img: Any) -> Image.Image:
if img is None:
raise ValueError("No image provided.")
if isinstance(img, Image.Image):
return img.convert("RGB")
if isinstance(img, np.ndarray):
if img.ndim == 2:
return Image.fromarray(img).convert("RGB")
if img.ndim == 3:
return Image.fromarray(img[:, :, :3]).convert("RGB")
if isinstance(img, str) and os.path.exists(img):
return Image.open(img).convert("RGB")
raise ValueError("Unsupported image format.")
def _save_temp_upload(img: Image.Image, prefix: str) -> str:
_ensure_dirs()
fp = os.path.join(OUTPUT_DIR, f"{prefix}_{uuid.uuid4().hex}.png")
img.save(fp, format="PNG")
return fp
def _detect_faces_haar(pil_img: Image.Image) -> int:
"""
Lightweight face detection for user-friendly errors.
This is not used for swapping; only for "No face detected" messaging.
"""
try:
import cv2 # lazy import
cv_img = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
gray = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY)
cascade = cv2.CascadeClassifier(
os.path.join(cv2.data.haarcascades, "haarcascade_frontalface_default.xml")
)
faces = cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(60, 60))
return int(len(faces))
except Exception:
# If OpenCV isn't available for any reason, skip the pre-check.
return 1
def _onnx_providers() -> List[str]:
"""
Best-effort provider selection for ONNXRuntime / InsightFace.
Set `FORCE_CPU=1` to disable CUDA even if available.
"""
force_cpu = os.getenv("FORCE_CPU", "").strip().lower() in {"1", "true", "yes", "y"}
if force_cpu:
return ["CPUExecutionProvider"]
try:
import onnxruntime as ort # type: ignore
available = set(ort.get_available_providers())
if "CUDAExecutionProvider" in available:
return ["CUDAExecutionProvider", "CPUExecutionProvider"]
except Exception:
pass
return ["CPUExecutionProvider"]
def _ensure_inswapper_onnx() -> str:
"""
Ensures `inswapper_128.onnx` exists locally and returns its path.
You can override with:
- `INSWAPPER_ONNX_PATH` (absolute/relative path)
- `INSWAPPER_REPO_ID` and `INSWAPPER_FILENAME` for HF download
"""
override = os.getenv("INSWAPPER_ONNX_PATH", "").strip()
if override:
p = override
if not os.path.isabs(p):
p = os.path.join(os.path.dirname(__file__), p)
if not os.path.exists(p):
raise FileNotFoundError(f"INSWAPPER_ONNX_PATH not found: {p}")
return p
_ensure_dirs()
local_path = os.path.join(MODELS_DIR, "inswapper_128.onnx")
if os.path.exists(local_path):
return local_path
# Default to a small community HF repo that hosts the file.
repo_id = os.getenv("INSWAPPER_REPO_ID", "ezioruan/inswapper_128.onnx").strip()
filename = os.getenv("INSWAPPER_FILENAME", "inswapper_128.onnx").strip()
try:
from huggingface_hub import hf_hub_download # type: ignore
downloaded = hf_hub_download(
repo_id=repo_id,
filename=filename,
token=HF_TOKEN,
)
# Copy to our local models dir so users can find it easily.
# (Avoid importing shutil at module import time.)
import shutil
shutil.copyfile(downloaded, local_path)
return local_path
except Exception as e:
raise RuntimeError(
"Could not download inswapper ONNX model.\n"
f"- Tried repo `{repo_id}` file `{filename}`\n"
f"- You can also set `INSWAPPER_ONNX_PATH` to a local file.\n"
f"Error: {e}"
)
_IFACE_ANALYZER = None
_IFACE_SWAPPER = None
def _load_local_faceswap_models():
"""
Lazy-load InsightFace analyzer + inswapper ONNX swapper.
Returns (analyzer, swapper).
"""
global _IFACE_ANALYZER, _IFACE_SWAPPER
if _IFACE_ANALYZER is not None and _IFACE_SWAPPER is not None:
return _IFACE_ANALYZER, _IFACE_SWAPPER
try:
import insightface # type: ignore
from insightface.app import FaceAnalysis # type: ignore
except Exception as e:
raise RuntimeError(
"Missing dependency for local live swap. Install `insightface`.\n"
f"Error: {e}"
)
providers = _onnx_providers()
# buffalo_l includes detection + recognition (needed to build embeddings for swapper).
analyzer = FaceAnalysis(name="buffalo_l", providers=providers)
analyzer.prepare(ctx_id=0 if providers[0] != "CPUExecutionProvider" else -1, det_size=(640, 640))
onnx_path = _ensure_inswapper_onnx()
swapper = insightface.model_zoo.get_model(onnx_path, providers=providers)
_IFACE_ANALYZER, _IFACE_SWAPPER = analyzer, swapper
return analyzer, swapper
def _largest_face(faces: List[Any]) -> Optional[Any]:
if not faces:
return None
best = None
best_area = -1
for f in faces:
try:
x1, y1, x2, y2 = f.bbox.astype(int).tolist()
area = max(0, x2 - x1) * max(0, y2 - y1)
except Exception:
area = -1
if area > best_area:
best_area = area
best = f
return best
def _np_rgb_to_bgr(img: np.ndarray) -> np.ndarray:
# Gradio gives RGB; InsightFace expects BGR.
if img is None:
raise ValueError("No image provided.")
if img.ndim != 3 or img.shape[2] < 3:
raise ValueError("Expected a 3-channel color image.")
rgb = img[:, :, :3]
if rgb.dtype != np.uint8:
# Gradio can emit float images (0..1 or 0..255). Normalize to uint8.
mx = float(np.max(rgb)) if rgb.size else 255.0
if mx <= 1.5:
rgb = np.clip(rgb, 0.0, 1.0) * 255.0
else:
rgb = np.clip(rgb, 0.0, 255.0)
rgb = rgb.astype(np.uint8)
return rgb[:, :, ::-1].copy()
def _np_bgr_to_rgb(img: np.ndarray) -> np.ndarray:
if img is None:
raise ValueError("No image provided.")
if img.ndim != 3 or img.shape[2] < 3:
return img
return img[:, :, :3][:, :, ::-1].copy()
def _watermark(pil_img: Image.Image, text: str = "FaceSwap AI • demo") -> Image.Image:
img = pil_img.copy().convert("RGBA")
w, h = img.size
overlay = Image.new("RGBA", img.size, (0, 0, 0, 0))
draw = ImageDraw.Draw(overlay)
# Try a default font; fall back to PIL bitmap font.
font_size = max(14, int(min(w, h) * 0.03))
try:
font = ImageFont.truetype("DejaVuSans.ttf", font_size)
except Exception:
font = ImageFont.load_default()
padding = max(10, int(font_size * 0.6))
tw, th = draw.textbbox((0, 0), text, font=font)[2:]
x = w - tw - padding
y = h - th - padding
# Semi-transparent background pill
bg_pad = max(6, int(font_size * 0.5))
draw.rounded_rectangle(
(x - bg_pad, y - bg_pad, x + tw + bg_pad, y + th + bg_pad),
radius=max(6, int(font_size * 0.6)),
fill=(0, 0, 0, 110),
)
draw.text((x, y), text, font=font, fill=(255, 255, 255, 220))
return Image.alpha_composite(img, overlay).convert("RGB")
def _host_base_url() -> str:
# HF Spaces commonly provide one of these.
for k in ("SPACE_HOST", "HOST", "GRADIO_SERVER_NAME"):
v = os.getenv(k)
if v and v.startswith("http"):
return v.rstrip("/")
space_id = os.getenv("SPACE_ID")
if space_id:
return f"https://{space_id.replace('/', '-')}.hf.space"
return ""
def _make_share_url(local_file_path: str) -> str:
# Gradio will serve returned file paths via its /file=... mechanism.
# We keep a friendly full URL for copy/paste when hosted.
base = _host_base_url()
if not base:
return ""
# When returning a file to a component, Gradio rewrites it; but a direct
# "file=" URL is still useful for HF Spaces in many cases.
rel = os.path.relpath(local_file_path, os.path.dirname(__file__)).replace("\\", "/")
return f"{base}/file={rel}"
def _call_remote_space(
source_pil: Image.Image,
target_pil: Image.Image,
*,
defense_ratio: int,
blend_ratio: int,
options: List[str],
remote_space_id: str,
) -> Image.Image:
"""
Calls a remote Gradio Space as the "cloud inference" backend.
Default backend: felixrosberg/face-swap (FaceDancer).
"""
from gradio_client import Client, handle_file # type: ignore
client = Client(remote_space_id, token=HF_TOKEN)
# Save uploads to disk so we can pass them via handle_file
src_path = _save_temp_upload(source_pil, "source")
trg_path = _save_temp_upload(target_pil, "target")
# FaceDancer Space signature (from its app.py):
# run_inference(target, source, defense_ratio, merge_ratio, options)
out = client.predict(
handle_file(trg_path),
handle_file(src_path),
int(defense_ratio),
int(blend_ratio),
options,
api_name="/run_inference",
)
return _pil_from_any(out)
def _call_custom_endpoint(
source_pil: Image.Image,
target_pil: Image.Image,
*,
strength: float,
steps: int,
guidance: float,
) -> Image.Image:
"""
Optional BYO endpoint mode.
Contract: POST $HF_INFERENCE_ENDPOINT_URL with multipart form:
- source: image file
- target: image file
- strength: float
- steps: int
- guidance: float
Returns: image bytes (PNG/JPEG) in response body.
"""
import requests
url = os.getenv("HF_INFERENCE_ENDPOINT_URL", "").strip()
if not url:
raise ValueError("Custom endpoint URL is not set.")
src_bytes = _pil_to_png_bytes(source_pil)
trg_bytes = _pil_to_png_bytes(target_pil)
files = {
"source": ("source.png", src_bytes, "image/png"),
"target": ("target.png", trg_bytes, "image/png"),
}
data = {"strength": str(strength), "steps": str(int(steps)), "guidance": str(guidance)}
headers = {}
token = os.getenv("HF_ENDPOINT_TOKEN") or HF_TOKEN
if token:
headers["Authorization"] = f"Bearer {token}"
resp = requests.post(url, files=files, data=data, headers=headers, timeout=180)
if resp.status_code >= 400:
raise RuntimeError(f"Endpoint error {resp.status_code}: {resp.text[:300]}")
return Image.open(_bytes_io(resp.content)).convert("RGB")
def _bytes_io(b: bytes):
import io
return io.BytesIO(b)
def _pil_to_png_bytes(img: Image.Image) -> bytes:
import io
buf = io.BytesIO()
img.save(buf, format="PNG")
return buf.getvalue()
def _download_example_images() -> List[Tuple[str, str]]:
"""
Downloads a couple of lightweight example images on first run.
Returned list is (source_path, target_path) pairs.
"""
import requests
_ensure_dirs()
examples: List[Tuple[str, str]] = []
# Public domain / permissive sample images (Wikimedia).
# We keep them small-ish to stay friendly for Spaces.
pairs = [
(
"https://upload.wikimedia.org/wikipedia/commons/thumb/3/37/Face_of_a_young_woman.jpg/512px-Face_of_a_young_woman.jpg",
"https://upload.wikimedia.org/wikipedia/commons/thumb/0/0b/Barack_Obama.jpg/512px-Barack_Obama.jpg",
),
(
"https://upload.wikimedia.org/wikipedia/commons/thumb/5/50/Vd-Orig.png/512px-Vd-Orig.png",
"https://upload.wikimedia.org/wikipedia/commons/thumb/8/8d/Portrait_Placeholder.png/512px-Portrait_Placeholder.png",
),
]
def fetch(url: str, out_path: str) -> None:
if os.path.exists(out_path):
return
r = requests.get(url, timeout=60)
r.raise_for_status()
with open(out_path, "wb") as f:
f.write(r.content)
for i, (src_url, trg_url) in enumerate(pairs, start=1):
src_path = os.path.join(EXAMPLES_DIR, f"source_{i}.jpg")
trg_path = os.path.join(EXAMPLES_DIR, f"target_{i}.jpg")
try:
fetch(src_url, src_path)
fetch(trg_url, trg_path)
examples.append((src_path, trg_path))
except Exception:
# If network is restricted, examples will just be absent.
continue
return examples
def swap_faces(
source_img: Any,
target_img: Any,
consent_ok: bool,
strength: float,
steps: int,
guidance: float,
backend: str,
history: List[Dict[str, str]],
) -> Tuple[Any, Any, Any, List[Dict[str, str]], str]:
if not consent_ok:
return None, None, None, history, "Please confirm you have consent to swap faces."
try:
src = _pil_from_any(source_img)
trg = _pil_from_any(target_img)
except Exception as e:
return None, None, None, history, str(e)
# Pre-check for better errors
if _detect_faces_haar(src) < 1:
return None, None, None, history, "No face detected in Source Face."
if _detect_faces_haar(trg) < 1:
return None, None, None, history, "No face detected in Target Photo."
try:
t0 = time.time()
if backend == "Cloud (FaceDancer Space)":
# Map our UX knobs to the backend's available inputs.
# - strength -> blend_ratio (0..100)
# - steps/guidance don't exist here; we keep them for BYO endpoint.
out = _call_remote_space(
src,
trg,
defense_ratio=100,
blend_ratio=int(np.clip(strength * 100, 0, 100)),
options=[],
remote_space_id=DEFAULT_REMOTE_SPACE_ID,
)
else:
out = _call_custom_endpoint(src, trg, strength=strength, steps=steps, guidance=guidance)
out = _watermark(out)
_ensure_dirs()
out_path = os.path.join(OUTPUT_DIR, f"faceswap_{uuid.uuid4().hex}.png")
out.save(out_path, format="PNG")
share = _make_share_url(out_path)
elapsed = time.time() - t0
history = [{"result": out_path, "source": _save_temp_upload(src, "src"), "target": _save_temp_upload(trg, "trg")}][
:1
] + history
history = history[:12]
status = f"Done in {elapsed:.1f}s."
if share:
status += f" Share link: {share}"
return trg, out, out_path, history, status
except Exception as e:
msg = str(e)
if "Could not find Space" in msg or "404" in msg:
msg = (
"Cloud backend unavailable. Try again, or configure a custom endpoint. "
"See README for deployment options."
)
return None, None, None, history, msg
CSS = """
.fsai-wrap { max-width: 1200px; margin: 0 auto; }
.fsai-hero { font-size: 28px; font-weight: 700; margin: 8px 0 4px; }
.fsai-sub { opacity: 0.8; margin-top: 0; }
.fsai-warn { border: 1px solid rgba(255,255,255,0.12); border-radius: 12px; padding: 12px 14px; }
@media (prefers-color-scheme: dark) {
.fsai-warn { background: rgba(255,255,255,0.04); }
}
@media (prefers-color-scheme: light) {
.fsai-warn { background: rgba(0,0,0,0.03); }
}
"""
def build_demo() -> gr.Blocks:
_ensure_dirs()
examples = _download_example_images()
theme = gr.themes.Soft(primary_hue="violet", neutral_hue="slate")
with gr.Blocks(theme=theme, css=CSS, title=APP_NAME) as demo:
gr.HTML(
f"""
<div class="fsai-wrap">
<div class="fsai-hero">{APP_NAME}</div>
<p class="fsai-sub">Swap faces in photos (cloud) or live webcam (local ONNX). Use only with consent.</p>
</div>
"""
)
with gr.Tabs():
with gr.Tab("Photo Swap (Cloud)"):
with gr.Accordion("Consent & Safety (required)", open=True):
gr.Markdown(
"""
**Important:** Only upload photos you own or have explicit permission to edit.
- **Consent**: You confirm you have consent from any person depicted.
- **No misuse**: Do not use for harassment, impersonation, fraud, or sexual content.
- **Watermark**: Outputs are watermarked to discourage misuse.
"""
)
consent = gr.Checkbox(label="I confirm I have consent and will use this responsibly.")
with gr.Row():
with gr.Column(scale=1):
source = gr.Image(label="Source Face", type="pil", height=320)
with gr.Column(scale=1):
target = gr.Image(label="Target Photo", type="pil", height=320)
with gr.Row():
backend = gr.Radio(
choices=["Cloud (FaceDancer Space)", "Custom Endpoint (HF Inference Endpoint / your API)"],
value="Cloud (FaceDancer Space)",
label="Inference backend",
)
with gr.Accordion("Advanced options", open=False):
strength = gr.Slider(
0.0,
1.0,
value=0.8,
step=0.05,
label="Swap strength",
info="Higher = stronger identity transfer. (Cloud backend maps this to blend ratio.)",
)
steps = gr.Slider(
10, 60, value=30, step=1, label="Steps", info="Used by Custom Endpoint backends."
)
guidance = gr.Slider(
1.0,
10.0,
value=4.5,
step=0.5,
label="Guidance scale",
info="Used by Custom Endpoint backends.",
)
swap_btn = gr.Button("Swap Faces", variant="primary", size="lg")
status = gr.Markdown(value="", elem_classes=["fsai-wrap"])
with gr.Row():
before = gr.Image(label="Before (Target)", type="pil", height=360)
after = gr.Image(label="After (Result)", type="pil", height=360)
with gr.Row():
download = gr.File(label="Download result", file_types=[".png"])
history_state = gr.State([]) # list[dict] with paths
gallery = gr.Gallery(label="History (this session)", columns=4, height=260, preview=True)
def _history_to_gallery(items: List[Dict[str, str]]) -> List[str]:
return [it["result"] for it in items if "result" in it and os.path.exists(it["result"])]
def _swap_and_gallery(*args):
b, a, f, hist, msg = swap_faces(*args)
return b, a, f, hist, _history_to_gallery(hist), msg
swap_btn.click(
_swap_and_gallery,
inputs=[source, target, consent, strength, steps, guidance, backend, history_state],
outputs=[before, after, download, history_state, gallery, status],
)
if examples:
gr.Examples(
examples=examples,
inputs=[source, target],
label="Examples",
examples_per_page=4,
)
with gr.Accordion("Setup notes", open=False):
gr.Markdown(
f"""
**Default cloud backend:** `{DEFAULT_REMOTE_SPACE_ID}` via Gradio Spaces API.
To use a custom backend, set:
- `HF_INFERENCE_ENDPOINT_URL` (your endpoint URL)
- optional `HF_ENDPOINT_TOKEN` (Bearer token)
See `README.md` for a 5-minute deploy guide.
"""
)
with gr.Tab("Live Swap (Local ONNX)"):
gr.Markdown(
"""
Upload a **source face** — it **locks automatically** (with consent checked) so the **webcam**
shows the swap **in real time**. You can use **Re-lock** if you change the photo.
This runs locally using **InsightFace + ONNXRuntime** with `inswapper_128.onnx`.
Tip: For best results, use a clear, front-facing source photo and good lighting.
"""
)
live_consent = gr.Checkbox(
label="I confirm I have consent and will use this responsibly.",
value=False,
)
with gr.Row():
live_source = gr.Image(label="Source Face (identity to use)", type="numpy", height=260)
live_source_status = gr.Markdown(value="")
source_face_state = gr.State(None) # cached InsightFace Face object
def _set_live_source(source_np: Any, consent_ok: bool):
if not consent_ok:
return None, "Please confirm consent to enable live swap."
if source_np is None:
return None, "Upload a source face image."
analyzer, _ = _load_local_faceswap_models()
src_bgr = _np_rgb_to_bgr(np.array(source_np))
faces = analyzer.get(src_bgr)
src_face = _largest_face(faces)
if src_face is None:
return None, "No face detected in source image."
return src_face, "Source face locked — webcam shows live swap."
live_set_btn = gr.Button("Re-lock source face", variant="secondary")
_live_source_inputs = [live_source, live_consent]
_live_source_outputs = [source_face_state, live_source_status]
live_source.change(_set_live_source, inputs=_live_source_inputs, outputs=_live_source_outputs)
live_consent.change(_set_live_source, inputs=_live_source_inputs, outputs=_live_source_outputs)
live_set_btn.click(_set_live_source, inputs=_live_source_inputs, outputs=_live_source_outputs)
with gr.Row():
webcam = gr.Image(
label="Webcam",
sources=["webcam"],
streaming=True,
type="numpy",
height=420,
)
live_out = gr.Image(label="Live swapped output", type="numpy", height=420)
live_status = gr.Markdown(value="")
def _live_swap(frame_np: Any, src_face: Any, consent_ok: bool):
if not consent_ok:
return frame_np, "Consent not confirmed."
if frame_np is None:
return None, ""
if src_face is None:
return frame_np, "Lock a source face first."
analyzer, swapper = _load_local_faceswap_models()
frame_bgr = _np_rgb_to_bgr(np.array(frame_np))
faces = analyzer.get(frame_bgr)
tgt_face = _largest_face(faces)
if tgt_face is None:
return _np_bgr_to_rgb(frame_bgr), "No face detected in webcam frame."
try:
swapped_bgr = swapper.get(frame_bgr, tgt_face, src_face, paste_back=True)
except Exception as e:
return _np_bgr_to_rgb(frame_bgr), f"Swap error: {e}"
return _np_bgr_to_rgb(swapped_bgr), ""
webcam.stream(
_live_swap,
inputs=[webcam, source_face_state, live_consent],
outputs=[live_out, live_status],
)
return demo
if __name__ == "__main__":
build_demo().launch()