File size: 8,795 Bytes
d6deb62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
import numpy as np
from PIL import Image, ImageFilter
import torch
from transformers import AutoImageProcessor, AutoModelForSemanticSegmentation, AutoModelForDepthEstimation
from scipy.ndimage import gaussian_filter
import gradio as gr

# Global models (loaded once at startup)
segmentation_model = None
segmentation_processor = None
depth_model = None
depth_processor = None
device = None

def load_models():
    """Load all required models at startup"""
    global segmentation_model, segmentation_processor, depth_model, depth_processor, device
    
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"Using device: {device}")
    
    # Load segmentation model
    print("Loading segmentation model...")
    seg_model_id = "nvidia/segformer-b0-finetuned-ade-512-512"
    segmentation_processor = AutoImageProcessor.from_pretrained(seg_model_id)
    segmentation_model = AutoModelForSemanticSegmentation.from_pretrained(seg_model_id)
    segmentation_model.eval()
    segmentation_model.to(device)
    
    # Load depth estimation model
    print("Loading depth estimation model...")
    depth_model_id = "depth-anything/Depth-Anything-V2-Base-hf"
    depth_processor = AutoImageProcessor.from_pretrained(depth_model_id)
    depth_model = AutoModelForDepthEstimation.from_pretrained(depth_model_id)
    depth_model.eval()
    depth_model.to(device)
    
    print("Models loaded successfully!")

def get_person_mask(image):
    """Extract person mask from image using semantic segmentation"""
    # Resize to 512x512 for processing
    img_512 = image.resize((512, 512), Image.BILINEAR)
    
    # Run segmentation
    inputs = segmentation_processor(images=img_512, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = segmentation_model(**inputs)
        logits = torch.nn.functional.interpolate(
            outputs.logits, size=(512, 512), mode="bilinear", align_corners=False
        )
        pred = logits.argmax(dim=1)[0].cpu().numpy()
    
    # Find person class ID
    id2label = segmentation_model.config.id2label
    label2id = {v.lower(): int(k) for k, v in id2label.items()}
    person_key = next((k for k in label2id.keys() if k in ["person", "people", "human"]), None)
    
    if person_key is None:
        # If no person found, return empty mask
        return Image.new("L", (512, 512), 0)
    
    person_id = label2id[person_key]
    mask = (pred == person_id).astype(np.uint8) * 255
    
    return Image.fromarray(mask, mode="L")

def gaussian_blur_effect(image, blur_radius=15):
    """Apply Gaussian blur to background, keep person sharp"""
    if image is None:
        return None
    
    # Convert to RGB if needed
    if image.mode != "RGB":
        image = image.convert("RGB")
    
    # Resize to 512x512
    img_512 = image.resize((512, 512), Image.BILINEAR)
    
    # Get person mask
    mask_img = get_person_mask(img_512)
    
    # Apply Gaussian blur to entire image
    blurred_img = img_512.filter(ImageFilter.GaussianBlur(radius=blur_radius))
    
    # Composite: person (sharp) + background (blurred)
    input_array = np.array(img_512)
    blurred_array = np.array(blurred_img)
    mask_array = np.array(mask_img) / 255.0
    mask_3ch = np.stack([mask_array] * 3, axis=-1)
    
    output_array = (input_array * mask_3ch + blurred_array * (1 - mask_3ch)).astype(np.uint8)
    output_img = Image.fromarray(output_array)
    
    return output_img

def get_depth_map(image):
    """Estimate depth map from image"""
    # Resize to 512x512
    img_512 = image.resize((512, 512), Image.BILINEAR)
    
    # Run depth estimation
    inputs = depth_processor(images=img_512, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = depth_model(**inputs)
        predicted_depth = outputs.predicted_depth
    
    # Interpolate to 512x512
    prediction = torch.nn.functional.interpolate(
        predicted_depth.unsqueeze(1),
        size=(512, 512),
        mode="bicubic",
        align_corners=False,
    )
    
    depth_map = prediction.squeeze().cpu().numpy()
    return depth_map

def lens_blur_effect(image, max_blur=15, focus_threshold=5.0):
    """Apply depth-based lens blur (foreground sharp, background blurred)"""
    if image is None:
        return None
    
    # Convert to RGB if needed
    if image.mode != "RGB":
        image = image.convert("RGB")
    
    # Resize to 512x512
    img_512 = image.resize((512, 512), Image.BILINEAR)
    
    # Get depth map
    depth_map = get_depth_map(img_512)
    
    # Invert depth (higher values = farther = more blur)
    depth_normalized = (depth_map.max() - depth_map) / (depth_map.max() - depth_map.min())
    depth_normalized = depth_normalized * max_blur
    
    # Create blur map
    blur_map = np.zeros_like(depth_normalized)
    close_mask = depth_normalized <= focus_threshold
    blur_map[close_mask] = 0.0
    
    far_mask = depth_normalized > focus_threshold
    blur_map[far_mask] = ((depth_normalized[far_mask] - focus_threshold) / (max_blur - focus_threshold)) * max_blur
    
    # Apply variable blur
    img_array = np.array(img_512).astype(np.float32)
    output_array = img_array.copy()
    
    num_blur_levels = 20
    for level in range(1, num_blur_levels + 1):
        sigma_min = (level - 1) * max_blur / num_blur_levels
        sigma_max = level * max_blur / num_blur_levels
        sigma_avg = (sigma_min + sigma_max) / 2.0
        
        mask = ((blur_map >= sigma_min) & (blur_map < sigma_max)).astype(np.float32)
        
        if mask.sum() > 0 and sigma_avg > 0.1:
            blurred = np.zeros_like(img_array)
            for c in range(3):
                blurred[:, :, c] = gaussian_filter(img_array[:, :, c], sigma=sigma_avg)
            
            mask_3ch = np.stack([mask] * 3, axis=-1)
            output_array = output_array * (1 - mask_3ch) + blurred * mask_3ch
    
    output_array = np.clip(output_array, 0, 255).astype(np.uint8)
    output_img = Image.fromarray(output_array)
    
    return output_img

# Load models at startup
load_models()

# Create Gradio interface
with gr.Blocks(title="Image Blur Effects Demo") as demo:
    gr.Markdown("""
    # 🎨 Image Blur Effects Demo
    
    Upload an image to apply **Gaussian Blur** or **Lens Blur** effects.
    
    - **Gaussian Blur**: Detects people and blurs the background, keeping the person sharp.
    - **Lens Blur**: Uses depth estimation to simulate camera lens bokeh effect (foreground sharp, background blurred).
    """)
    
    with gr.Tab("Gaussian Blur"):
        gr.Markdown("### Background blur with person detection")
        with gr.Row():
            with gr.Column():
                gaussian_input = gr.Image(type="pil", label="Input Image")
                gaussian_radius = gr.Slider(
                    minimum=5, maximum=30, value=15, step=1,
                    label="Blur Radius (σ)"
                )
                gaussian_btn = gr.Button("Apply Gaussian Blur", variant="primary")
            with gr.Column():
                gaussian_output = gr.Image(type="pil", label="Output Image")
        
        gaussian_btn.click(
            fn=gaussian_blur_effect,
            inputs=[gaussian_input, gaussian_radius],
            outputs=gaussian_output
        )
        
        gr.Examples(
            examples=[["self.jpg"], ["self-pic.jpg"]],
            inputs=gaussian_input,
            label="Example Images"
        )
    
    with gr.Tab("Lens Blur (Depth-Based)"):
        gr.Markdown("### Depth-based bokeh effect simulation")
        with gr.Row():
            with gr.Column():
                lens_input = gr.Image(type="pil", label="Input Image")
                lens_max_blur = gr.Slider(
                    minimum=5, maximum=25, value=15, step=1,
                    label="Max Blur Intensity"
                )
                lens_focus = gr.Slider(
                    minimum=0, maximum=10, value=5.0, step=0.5,
                    label="Focus Threshold (lower = more blur)"
                )
                lens_btn = gr.Button("Apply Lens Blur", variant="primary")
            with gr.Column():
                lens_output = gr.Image(type="pil", label="Output Image")
        
        lens_btn.click(
            fn=lens_blur_effect,
            inputs=[lens_input, lens_max_blur, lens_focus],
            outputs=lens_output
        )
        
        gr.Examples(
            examples=[["self.jpg"], ["self-pic.jpg"]],
            inputs=lens_input,
            label="Example Images"
        )
    
    gr.Markdown("""
    ---
    **Technical Details:**
    - Segmentation: NVIDIA SegFormer (ADE20K)
    - Depth Estimation: Depth Anything V2
    - All images resized to 512×512 for processing
    """)

if __name__ == "__main__":
    demo.launch()