import os, cv2, numpy as np, onnxruntime as ort, gradio as gr from huggingface_hub import hf_hub_download # --- MODELS SETUP --- def load_models(): # 1. Background: Swin2SR (The one you liked best) bg_path = hf_hub_download(repo_id="Xenova/swin2SR-realworld-sr-x4-64-bsrgan-psnr", filename="onnx/model.onnx") # 2. Face: CodeFormer (The current "King" of faces) face_path = hf_hub_download(repo_id="maze/faceX", filename="codeformer.onnx") opts = ort.SessionOptions() opts.intra_op_num_threads = 2 bg_sess = ort.InferenceSession(bg_path, opts, providers=['CPUExecutionProvider']) face_sess = ort.InferenceSession(face_path, opts, providers=['CPUExecutionProvider']) return bg_sess, face_sess bg_session, face_session = load_models() def upscale_bg_tiled(frame, tile_size=128, overlap=16): """Swin2SR 4x Tiled Upscale""" h, w, c = frame.shape output_h, output_w = h * 4, w * 4 upscaled_img = np.zeros((output_h, output_w, c), dtype=np.uint8) stride = tile_size - (overlap * 2) for y in range(0, h, stride): for x in range(0, w, stride): y1, y2, x1, x2 = max(0, y-overlap), min(h, y+stride+overlap), max(0, x-overlap), min(w, x+stride+overlap) tile = frame[y1:y2, x1:x2] # Pad to multiple of 8 th, tw = tile.shape[:2] ph, pw = (8 - (th % 8)) % 8, (8 - (tw % 8)) % 8 if ph > 0 or pw > 0: tile = cv2.copyMakeBorder(tile, 0, ph, 0, pw, cv2.BORDER_REFLECT) # AI img_in = np.transpose(cv2.cvtColor(tile, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0, (2, 0, 1))[np.newaxis, :] out = bg_session.run(None, {bg_session.get_inputs()[0].name: img_in})[0] # Stitch tile_out = cv2.cvtColor((np.clip(np.squeeze(out), 0, 1).transpose(1, 2, 0) * 255.0).astype(np.uint8), cv2.COLOR_RGB2BGR) tile_out = tile_out[:(th*4), :(tw*4)] oy, ox = (y-y1)*4, (x-x1)*4 py1, py2, px1, px2 = y*4, min(output_h, (y+stride)*4), x*4, min(output_w, (x+stride)*4) upscaled_img[py1:py2, px1:px2] = tile_out[oy : oy+(py2-py1), ox : ox+(px2-px1)] return upscaled_img def restore_face_core(img, fidelity=0.5): """CodeFormer Face Restoration with Correct Math Normalization""" img_512 = cv2.resize(img, (512, 512), interpolation=cv2.INTER_LINEAR) img_rgb = cv2.cvtColor(img_512, cv2.COLOR_BGR2RGB).astype(np.float32) # --- MATH FIX: Map colors from [0, 255] to [-1.0, 1.0] --- img_in = (img_rgb / 127.5) - 1.0 img_in = np.transpose(img_in, (2, 0, 1))[np.newaxis, :] # Map Inputs inputs_info = face_session.get_inputs() input_feed = {inputs_info[0].name: img_in} for i in range(1, len(inputs_info)): dt = np.float64 if 'double' in inputs_info[i].type else np.float32 input_feed[inputs_info[i].name] = np.array([fidelity], dtype=dt) # AI Inference out = face_session.run(None, input_feed)[0] # --- MATH FIX: Map colors back from [-1.0, 1.0] to [0, 1] --- out = (np.squeeze(out) + 1.0) / 2.0 res = cv2.cvtColor((np.clip(out, 0, 1).transpose(1, 2, 0) * 255.0).astype(np.uint8), cv2.COLOR_RGB2BGR) return cv2.resize(res, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_LANCZOS4) def hybrid_enhancer(img_data, mode, face_strength, progress=gr.Progress()): if img_data is None: return None img = img_data["composite"] if img.shape[2] == 4: img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB) # Stage 1: Background if mode == "Full Image (BG + Face)": progress(0, desc="Stage 1: Upscaling Background...") final_img = upscale_bg_tiled(img) else: final_img = img.copy() # Stage 2: Smart Face Detection progress(0.5, desc="Stage 2: Scanning for Faces...") # Use OpenCV's built-in AI to find where the face actually is face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml') gray = cv2.cvtColor(final_img, cv2.COLOR_BGR2GRAY) faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(64, 64)) if len(faces) == 0: print("No face detected! Running on whole image as fallback.") restored = restore_face_core(final_img, fidelity=face_strength) final_img = cv2.addWeighted(restored, face_strength, final_img, 1 - face_strength, 0) else: # Process each face found for (x, y, w, h) in faces: # Add some padding around the face so the AI sees the jawline/hair pad_x = int(w * 0.2) pad_y = int(h * 0.2) x1 = max(0, x - pad_x) y1 = max(0, y - int(pad_y * 1.5)) # Extra room at the top for hair x2 = min(final_img.shape[1], x + w + pad_x) y2 = min(final_img.shape[0], y + h + pad_y) # Extract just the face box face_crop = final_img[y1:y2, x1:x2] # Restore the face perfectly restored_face = restore_face_core(face_crop, fidelity=face_strength) # Paste it seamlessly back into the high-res background final_img[y1:y2, x1:x2] = cv2.addWeighted(restored_face, face_strength, face_crop, 1 - face_strength, 0) progress(1.0, desc="Done!") return final_img # --- UI --- with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo")) as demo: gr.Markdown("# 🏆 Hybrid Face & Background Enhancer") gr.Markdown("Combines **Swin2SR** for backgrounds and **CodeFormer** for faces.") with gr.Row(): with gr.Column(): image_in = gr.ImageEditor(label="Upload (Crop to the person's face)", type="numpy") mode = gr.Radio(["Full Image (BG + Face)", "Face Only"], value="Full Image (BG + Face)", label="Process Mode") strength = gr.Slider(0.1, 1.0, value=0.7, label="Face Detail Strength") btn = gr.Button("🚀 START ENHANCEMENT", variant="primary") with gr.Column(): image_out = gr.Image(label="Pro Result") btn.click(hybrid_enhancer, [image_in, mode, strength], image_out) if __name__ == "__main__": demo.queue().launch()