File size: 6,267 Bytes
c32e0c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9b2c620
980761c
c32e0c5
980761c
c32e0c5
980761c
 
 
9b2c620
980761c
 
 
c34c17a
980761c
 
9b2c620
980761c
9b2c620
 
980761c
 
 
 
c32e0c5
 
980761c
c32e0c5
 
 
 
 
980761c
c32e0c5
 
980761c
c32e0c5
980761c
c32e0c5
980761c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c32e0c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import os, cv2, numpy as np, onnxruntime as ort, gradio as gr
from huggingface_hub import hf_hub_download

# --- MODELS SETUP ---
def load_models():
    # 1. Background: Swin2SR (The one you liked best)
    bg_path = hf_hub_download(repo_id="Xenova/swin2SR-realworld-sr-x4-64-bsrgan-psnr", filename="onnx/model.onnx")
    # 2. Face: CodeFormer (The current "King" of faces)
    face_path = hf_hub_download(repo_id="maze/faceX", filename="codeformer.onnx")
    
    opts = ort.SessionOptions()
    opts.intra_op_num_threads = 2
    
    bg_sess = ort.InferenceSession(bg_path, opts, providers=['CPUExecutionProvider'])
    face_sess = ort.InferenceSession(face_path, opts, providers=['CPUExecutionProvider'])
    return bg_sess, face_sess

bg_session, face_session = load_models()

def upscale_bg_tiled(frame, tile_size=128, overlap=16):
    """Swin2SR 4x Tiled Upscale"""
    h, w, c = frame.shape
    output_h, output_w = h * 4, w * 4
    upscaled_img = np.zeros((output_h, output_w, c), dtype=np.uint8)
    stride = tile_size - (overlap * 2)
    
    for y in range(0, h, stride):
        for x in range(0, w, stride):
            y1, y2, x1, x2 = max(0, y-overlap), min(h, y+stride+overlap), max(0, x-overlap), min(w, x+stride+overlap)
            tile = frame[y1:y2, x1:x2]
            # Pad to multiple of 8
            th, tw = tile.shape[:2]
            ph, pw = (8 - (th % 8)) % 8, (8 - (tw % 8)) % 8
            if ph > 0 or pw > 0: tile = cv2.copyMakeBorder(tile, 0, ph, 0, pw, cv2.BORDER_REFLECT)
            
            # AI
            img_in = np.transpose(cv2.cvtColor(tile, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0, (2, 0, 1))[np.newaxis, :]
            out = bg_session.run(None, {bg_session.get_inputs()[0].name: img_in})[0]
            
            # Stitch
            tile_out = cv2.cvtColor((np.clip(np.squeeze(out), 0, 1).transpose(1, 2, 0) * 255.0).astype(np.uint8), cv2.COLOR_RGB2BGR)
            tile_out = tile_out[:(th*4), :(tw*4)]
            oy, ox = (y-y1)*4, (x-x1)*4
            py1, py2, px1, px2 = y*4, min(output_h, (y+stride)*4), x*4, min(output_w, (x+stride)*4)
            upscaled_img[py1:py2, px1:px2] = tile_out[oy : oy+(py2-py1), ox : ox+(px2-px1)]
    return upscaled_img

def restore_face_core(img, fidelity=0.5):
    """CodeFormer Face Restoration with Correct Math Normalization"""
    img_512 = cv2.resize(img, (512, 512), interpolation=cv2.INTER_LINEAR)
    img_rgb = cv2.cvtColor(img_512, cv2.COLOR_BGR2RGB).astype(np.float32)
    
    # --- MATH FIX: Map colors from [0, 255] to [-1.0, 1.0] ---
    img_in = (img_rgb / 127.5) - 1.0
    img_in = np.transpose(img_in, (2, 0, 1))[np.newaxis, :]
    
    # Map Inputs
    inputs_info = face_session.get_inputs()
    input_feed = {inputs_info[0].name: img_in}
    for i in range(1, len(inputs_info)):
        dt = np.float64 if 'double' in inputs_info[i].type else np.float32
        input_feed[inputs_info[i].name] = np.array([fidelity], dtype=dt)
    
    # AI Inference
    out = face_session.run(None, input_feed)[0]
    
    # --- MATH FIX: Map colors back from [-1.0, 1.0] to [0, 1] ---
    out = (np.squeeze(out) + 1.0) / 2.0
    res = cv2.cvtColor((np.clip(out, 0, 1).transpose(1, 2, 0) * 255.0).astype(np.uint8), cv2.COLOR_RGB2BGR)
    
    return cv2.resize(res, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_LANCZOS4)


def hybrid_enhancer(img_data, mode, face_strength, progress=gr.Progress()):
    if img_data is None: return None
    img = img_data["composite"]
    if img.shape[2] == 4: img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
    
    # Stage 1: Background
    if mode == "Full Image (BG + Face)":
        progress(0, desc="Stage 1: Upscaling Background...")
        final_img = upscale_bg_tiled(img)
    else:
        final_img = img.copy()

    # Stage 2: Smart Face Detection
    progress(0.5, desc="Stage 2: Scanning for Faces...")
    
    # Use OpenCV's built-in AI to find where the face actually is
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
    gray = cv2.cvtColor(final_img, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(64, 64))
    
    if len(faces) == 0:
        print("No face detected! Running on whole image as fallback.")
        restored = restore_face_core(final_img, fidelity=face_strength)
        final_img = cv2.addWeighted(restored, face_strength, final_img, 1 - face_strength, 0)
    else:
        # Process each face found
        for (x, y, w, h) in faces:
            # Add some padding around the face so the AI sees the jawline/hair
            pad_x = int(w * 0.2)
            pad_y = int(h * 0.2)
            x1 = max(0, x - pad_x)
            y1 = max(0, y - int(pad_y * 1.5)) # Extra room at the top for hair
            x2 = min(final_img.shape[1], x + w + pad_x)
            y2 = min(final_img.shape[0], y + h + pad_y)
            
            # Extract just the face box
            face_crop = final_img[y1:y2, x1:x2]
            
            # Restore the face perfectly
            restored_face = restore_face_core(face_crop, fidelity=face_strength)
            
            # Paste it seamlessly back into the high-res background
            final_img[y1:y2, x1:x2] = cv2.addWeighted(restored_face, face_strength, face_crop, 1 - face_strength, 0)
            
    progress(1.0, desc="Done!")
    return final_img
# --- UI ---
with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo")) as demo:
    gr.Markdown("# 🏆 Hybrid Face & Background Enhancer")
    gr.Markdown("Combines **Swin2SR** for backgrounds and **CodeFormer** for faces.")
    
    with gr.Row():
        with gr.Column():
            image_in = gr.ImageEditor(label="Upload (Crop to the person's face)", type="numpy")
            mode = gr.Radio(["Full Image (BG + Face)", "Face Only"], value="Full Image (BG + Face)", label="Process Mode")
            strength = gr.Slider(0.1, 1.0, value=0.7, label="Face Detail Strength")
            btn = gr.Button("🚀 START ENHANCEMENT", variant="primary")
        with gr.Column():
            image_out = gr.Image(label="Pro Result")

    btn.click(hybrid_enhancer, [image_in, mode, strength], image_out)

if __name__ == "__main__":
    demo.queue().launch()