Spaces:
Running
Running
| import os, cv2, numpy as np, onnxruntime as ort, gradio as gr | |
| from huggingface_hub import hf_hub_download | |
| # --- MODELS SETUP --- | |
| def load_models(): | |
| # 1. Background: Swin2SR (The one you liked best) | |
| bg_path = hf_hub_download(repo_id="Xenova/swin2SR-realworld-sr-x4-64-bsrgan-psnr", filename="onnx/model.onnx") | |
| # 2. Face: CodeFormer (The current "King" of faces) | |
| face_path = hf_hub_download(repo_id="maze/faceX", filename="codeformer.onnx") | |
| opts = ort.SessionOptions() | |
| opts.intra_op_num_threads = 2 | |
| bg_sess = ort.InferenceSession(bg_path, opts, providers=['CPUExecutionProvider']) | |
| face_sess = ort.InferenceSession(face_path, opts, providers=['CPUExecutionProvider']) | |
| return bg_sess, face_sess | |
| bg_session, face_session = load_models() | |
| def upscale_bg_tiled(frame, tile_size=128, overlap=16): | |
| """Swin2SR 4x Tiled Upscale""" | |
| h, w, c = frame.shape | |
| output_h, output_w = h * 4, w * 4 | |
| upscaled_img = np.zeros((output_h, output_w, c), dtype=np.uint8) | |
| stride = tile_size - (overlap * 2) | |
| for y in range(0, h, stride): | |
| for x in range(0, w, stride): | |
| y1, y2, x1, x2 = max(0, y-overlap), min(h, y+stride+overlap), max(0, x-overlap), min(w, x+stride+overlap) | |
| tile = frame[y1:y2, x1:x2] | |
| # Pad to multiple of 8 | |
| th, tw = tile.shape[:2] | |
| ph, pw = (8 - (th % 8)) % 8, (8 - (tw % 8)) % 8 | |
| if ph > 0 or pw > 0: tile = cv2.copyMakeBorder(tile, 0, ph, 0, pw, cv2.BORDER_REFLECT) | |
| # AI | |
| img_in = np.transpose(cv2.cvtColor(tile, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0, (2, 0, 1))[np.newaxis, :] | |
| out = bg_session.run(None, {bg_session.get_inputs()[0].name: img_in})[0] | |
| # Stitch | |
| tile_out = cv2.cvtColor((np.clip(np.squeeze(out), 0, 1).transpose(1, 2, 0) * 255.0).astype(np.uint8), cv2.COLOR_RGB2BGR) | |
| tile_out = tile_out[:(th*4), :(tw*4)] | |
| oy, ox = (y-y1)*4, (x-x1)*4 | |
| py1, py2, px1, px2 = y*4, min(output_h, (y+stride)*4), x*4, min(output_w, (x+stride)*4) | |
| upscaled_img[py1:py2, px1:px2] = tile_out[oy : oy+(py2-py1), ox : ox+(px2-px1)] | |
| return upscaled_img | |
| def restore_face_core(img, fidelity=0.5): | |
| """CodeFormer Face Restoration with Correct Math Normalization""" | |
| img_512 = cv2.resize(img, (512, 512), interpolation=cv2.INTER_LINEAR) | |
| img_rgb = cv2.cvtColor(img_512, cv2.COLOR_BGR2RGB).astype(np.float32) | |
| # --- MATH FIX: Map colors from [0, 255] to [-1.0, 1.0] --- | |
| img_in = (img_rgb / 127.5) - 1.0 | |
| img_in = np.transpose(img_in, (2, 0, 1))[np.newaxis, :] | |
| # Map Inputs | |
| inputs_info = face_session.get_inputs() | |
| input_feed = {inputs_info[0].name: img_in} | |
| for i in range(1, len(inputs_info)): | |
| dt = np.float64 if 'double' in inputs_info[i].type else np.float32 | |
| input_feed[inputs_info[i].name] = np.array([fidelity], dtype=dt) | |
| # AI Inference | |
| out = face_session.run(None, input_feed)[0] | |
| # --- MATH FIX: Map colors back from [-1.0, 1.0] to [0, 1] --- | |
| out = (np.squeeze(out) + 1.0) / 2.0 | |
| res = cv2.cvtColor((np.clip(out, 0, 1).transpose(1, 2, 0) * 255.0).astype(np.uint8), cv2.COLOR_RGB2BGR) | |
| return cv2.resize(res, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_LANCZOS4) | |
| def hybrid_enhancer(img_data, mode, face_strength, progress=gr.Progress()): | |
| if img_data is None: return None | |
| img = img_data["composite"] | |
| if img.shape[2] == 4: img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB) | |
| # Stage 1: Background | |
| if mode == "Full Image (BG + Face)": | |
| progress(0, desc="Stage 1: Upscaling Background...") | |
| final_img = upscale_bg_tiled(img) | |
| else: | |
| final_img = img.copy() | |
| # Stage 2: Smart Face Detection | |
| progress(0.5, desc="Stage 2: Scanning for Faces...") | |
| # Use OpenCV's built-in AI to find where the face actually is | |
| face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml') | |
| gray = cv2.cvtColor(final_img, cv2.COLOR_BGR2GRAY) | |
| faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(64, 64)) | |
| if len(faces) == 0: | |
| print("No face detected! Running on whole image as fallback.") | |
| restored = restore_face_core(final_img, fidelity=face_strength) | |
| final_img = cv2.addWeighted(restored, face_strength, final_img, 1 - face_strength, 0) | |
| else: | |
| # Process each face found | |
| for (x, y, w, h) in faces: | |
| # Add some padding around the face so the AI sees the jawline/hair | |
| pad_x = int(w * 0.2) | |
| pad_y = int(h * 0.2) | |
| x1 = max(0, x - pad_x) | |
| y1 = max(0, y - int(pad_y * 1.5)) # Extra room at the top for hair | |
| x2 = min(final_img.shape[1], x + w + pad_x) | |
| y2 = min(final_img.shape[0], y + h + pad_y) | |
| # Extract just the face box | |
| face_crop = final_img[y1:y2, x1:x2] | |
| # Restore the face perfectly | |
| restored_face = restore_face_core(face_crop, fidelity=face_strength) | |
| # Paste it seamlessly back into the high-res background | |
| final_img[y1:y2, x1:x2] = cv2.addWeighted(restored_face, face_strength, face_crop, 1 - face_strength, 0) | |
| progress(1.0, desc="Done!") | |
| return final_img | |
| # --- UI --- | |
| with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo")) as demo: | |
| gr.Markdown("# 🏆 Hybrid Face & Background Enhancer") | |
| gr.Markdown("Combines **Swin2SR** for backgrounds and **CodeFormer** for faces.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| image_in = gr.ImageEditor(label="Upload (Crop to the person's face)", type="numpy") | |
| mode = gr.Radio(["Full Image (BG + Face)", "Face Only"], value="Full Image (BG + Face)", label="Process Mode") | |
| strength = gr.Slider(0.1, 1.0, value=0.7, label="Face Detail Strength") | |
| btn = gr.Button("🚀 START ENHANCEMENT", variant="primary") | |
| with gr.Column(): | |
| image_out = gr.Image(label="Pro Result") | |
| btn.click(hybrid_enhancer, [image_in, mode, strength], image_out) | |
| if __name__ == "__main__": | |
| demo.queue().launch() |