Spaces:
Running
Running
File size: 6,267 Bytes
c32e0c5 9b2c620 980761c c32e0c5 980761c c32e0c5 980761c 9b2c620 980761c c34c17a 980761c 9b2c620 980761c 9b2c620 980761c c32e0c5 980761c c32e0c5 980761c c32e0c5 980761c c32e0c5 980761c c32e0c5 980761c c32e0c5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 | import os, cv2, numpy as np, onnxruntime as ort, gradio as gr
from huggingface_hub import hf_hub_download
# --- MODELS SETUP ---
def load_models():
# 1. Background: Swin2SR (The one you liked best)
bg_path = hf_hub_download(repo_id="Xenova/swin2SR-realworld-sr-x4-64-bsrgan-psnr", filename="onnx/model.onnx")
# 2. Face: CodeFormer (The current "King" of faces)
face_path = hf_hub_download(repo_id="maze/faceX", filename="codeformer.onnx")
opts = ort.SessionOptions()
opts.intra_op_num_threads = 2
bg_sess = ort.InferenceSession(bg_path, opts, providers=['CPUExecutionProvider'])
face_sess = ort.InferenceSession(face_path, opts, providers=['CPUExecutionProvider'])
return bg_sess, face_sess
bg_session, face_session = load_models()
def upscale_bg_tiled(frame, tile_size=128, overlap=16):
"""Swin2SR 4x Tiled Upscale"""
h, w, c = frame.shape
output_h, output_w = h * 4, w * 4
upscaled_img = np.zeros((output_h, output_w, c), dtype=np.uint8)
stride = tile_size - (overlap * 2)
for y in range(0, h, stride):
for x in range(0, w, stride):
y1, y2, x1, x2 = max(0, y-overlap), min(h, y+stride+overlap), max(0, x-overlap), min(w, x+stride+overlap)
tile = frame[y1:y2, x1:x2]
# Pad to multiple of 8
th, tw = tile.shape[:2]
ph, pw = (8 - (th % 8)) % 8, (8 - (tw % 8)) % 8
if ph > 0 or pw > 0: tile = cv2.copyMakeBorder(tile, 0, ph, 0, pw, cv2.BORDER_REFLECT)
# AI
img_in = np.transpose(cv2.cvtColor(tile, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0, (2, 0, 1))[np.newaxis, :]
out = bg_session.run(None, {bg_session.get_inputs()[0].name: img_in})[0]
# Stitch
tile_out = cv2.cvtColor((np.clip(np.squeeze(out), 0, 1).transpose(1, 2, 0) * 255.0).astype(np.uint8), cv2.COLOR_RGB2BGR)
tile_out = tile_out[:(th*4), :(tw*4)]
oy, ox = (y-y1)*4, (x-x1)*4
py1, py2, px1, px2 = y*4, min(output_h, (y+stride)*4), x*4, min(output_w, (x+stride)*4)
upscaled_img[py1:py2, px1:px2] = tile_out[oy : oy+(py2-py1), ox : ox+(px2-px1)]
return upscaled_img
def restore_face_core(img, fidelity=0.5):
"""CodeFormer Face Restoration with Correct Math Normalization"""
img_512 = cv2.resize(img, (512, 512), interpolation=cv2.INTER_LINEAR)
img_rgb = cv2.cvtColor(img_512, cv2.COLOR_BGR2RGB).astype(np.float32)
# --- MATH FIX: Map colors from [0, 255] to [-1.0, 1.0] ---
img_in = (img_rgb / 127.5) - 1.0
img_in = np.transpose(img_in, (2, 0, 1))[np.newaxis, :]
# Map Inputs
inputs_info = face_session.get_inputs()
input_feed = {inputs_info[0].name: img_in}
for i in range(1, len(inputs_info)):
dt = np.float64 if 'double' in inputs_info[i].type else np.float32
input_feed[inputs_info[i].name] = np.array([fidelity], dtype=dt)
# AI Inference
out = face_session.run(None, input_feed)[0]
# --- MATH FIX: Map colors back from [-1.0, 1.0] to [0, 1] ---
out = (np.squeeze(out) + 1.0) / 2.0
res = cv2.cvtColor((np.clip(out, 0, 1).transpose(1, 2, 0) * 255.0).astype(np.uint8), cv2.COLOR_RGB2BGR)
return cv2.resize(res, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_LANCZOS4)
def hybrid_enhancer(img_data, mode, face_strength, progress=gr.Progress()):
if img_data is None: return None
img = img_data["composite"]
if img.shape[2] == 4: img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
# Stage 1: Background
if mode == "Full Image (BG + Face)":
progress(0, desc="Stage 1: Upscaling Background...")
final_img = upscale_bg_tiled(img)
else:
final_img = img.copy()
# Stage 2: Smart Face Detection
progress(0.5, desc="Stage 2: Scanning for Faces...")
# Use OpenCV's built-in AI to find where the face actually is
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
gray = cv2.cvtColor(final_img, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(64, 64))
if len(faces) == 0:
print("No face detected! Running on whole image as fallback.")
restored = restore_face_core(final_img, fidelity=face_strength)
final_img = cv2.addWeighted(restored, face_strength, final_img, 1 - face_strength, 0)
else:
# Process each face found
for (x, y, w, h) in faces:
# Add some padding around the face so the AI sees the jawline/hair
pad_x = int(w * 0.2)
pad_y = int(h * 0.2)
x1 = max(0, x - pad_x)
y1 = max(0, y - int(pad_y * 1.5)) # Extra room at the top for hair
x2 = min(final_img.shape[1], x + w + pad_x)
y2 = min(final_img.shape[0], y + h + pad_y)
# Extract just the face box
face_crop = final_img[y1:y2, x1:x2]
# Restore the face perfectly
restored_face = restore_face_core(face_crop, fidelity=face_strength)
# Paste it seamlessly back into the high-res background
final_img[y1:y2, x1:x2] = cv2.addWeighted(restored_face, face_strength, face_crop, 1 - face_strength, 0)
progress(1.0, desc="Done!")
return final_img
# --- UI ---
with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo")) as demo:
gr.Markdown("# 🏆 Hybrid Face & Background Enhancer")
gr.Markdown("Combines **Swin2SR** for backgrounds and **CodeFormer** for faces.")
with gr.Row():
with gr.Column():
image_in = gr.ImageEditor(label="Upload (Crop to the person's face)", type="numpy")
mode = gr.Radio(["Full Image (BG + Face)", "Face Only"], value="Full Image (BG + Face)", label="Process Mode")
strength = gr.Slider(0.1, 1.0, value=0.7, label="Face Detail Strength")
btn = gr.Button("🚀 START ENHANCEMENT", variant="primary")
with gr.Column():
image_out = gr.Image(label="Pro Result")
btn.click(hybrid_enhancer, [image_in, mode, strength], image_out)
if __name__ == "__main__":
demo.queue().launch() |