Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -13,6 +13,15 @@ from PIL import Image
|
|
| 13 |
from huggingface_hub import snapshot_download
|
| 14 |
import requests
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
css = """
|
| 17 |
#col-container {
|
| 18 |
margin: 0 auto;
|
|
@@ -70,8 +79,21 @@ pipe.enable_vae_slicing()
|
|
| 70 |
|
| 71 |
print("✅ All models loaded successfully!")
|
| 72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
MAX_SEED = 1000000
|
| 74 |
-
MAX_PIXEL_BUDGET =
|
| 75 |
|
| 76 |
|
| 77 |
def generate_caption(image):
|
|
@@ -115,33 +137,82 @@ def process_input(input_image, upscale_factor):
|
|
| 115 |
gr.Info(
|
| 116 |
f"Requested output image is too large. Resizing input to fit within pixel budget."
|
| 117 |
)
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
resample=Image.LANCZOS
|
| 124 |
-
)
|
| 125 |
was_resized = True
|
| 126 |
|
| 127 |
-
|
| 128 |
-
w, h = input_image.size
|
| 129 |
-
w = w - w % 8
|
| 130 |
-
h = h - h % 8
|
| 131 |
-
|
| 132 |
-
return input_image.resize((w, h), resample=Image.LANCZOS), w_original, h_original, was_resized
|
| 133 |
|
| 134 |
|
| 135 |
def load_image_from_url(url):
|
| 136 |
"""Load image from URL"""
|
| 137 |
try:
|
| 138 |
-
response = requests.get(url)
|
| 139 |
response.raise_for_status()
|
| 140 |
-
return Image.open(
|
| 141 |
except Exception as e:
|
| 142 |
raise gr.Error(f"Failed to load image from URL: {e}")
|
| 143 |
|
| 144 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
@spaces.GPU(duration=120)
|
| 146 |
def enhance_image(
|
| 147 |
image_input,
|
|
@@ -183,33 +254,35 @@ def enhance_image(
|
|
| 183 |
else:
|
| 184 |
prompt = custom_prompt if custom_prompt.strip() else ""
|
| 185 |
|
| 186 |
-
# Rescale with upscale factor using LANCZOS
|
| 187 |
-
w, h = input_image.size
|
| 188 |
-
control_image = input_image.resize((w * upscale_factor, h * upscale_factor), resample=Image.LANCZOS)
|
| 189 |
-
|
| 190 |
generator = torch.Generator().manual_seed(seed)
|
| 191 |
|
| 192 |
gr.Info("🚀 Upscaling image...")
|
| 193 |
-
|
| 194 |
-
#
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
|
| 206 |
if was_resized:
|
| 207 |
gr.Info(f"📏 Resizing output to target size: {w_original * upscale_factor}x{h_original * upscale_factor}")
|
| 208 |
-
|
| 209 |
-
# Resize to target desired size
|
| 210 |
-
final_image = image.resize((w_original * upscale_factor, h_original * upscale_factor), resample=Image.LANCZOS)
|
| 211 |
|
| 212 |
-
return [true_input_image,
|
| 213 |
|
| 214 |
|
| 215 |
# Create Gradio interface
|
|
@@ -380,10 +453,10 @@ with gr.Blocks(css=css, title="🎨 AI Image Enhancer - Florence-2 + FLUX") as d
|
|
| 380 |
<h4>💡 How it works:</h4>
|
| 381 |
<ol>
|
| 382 |
<li><strong>Florence-2</strong> analyzes your image and generates a detailed caption</li>
|
| 383 |
-
<li>Initial upscale with LANCZOS interpolation</li>
|
| 384 |
-
<li><strong>FLUX Img2Img</strong> enhances the upscaled image with AI diffusion guided by the caption</li>
|
| 385 |
</ol>
|
| 386 |
-
<p><strong>Note:</strong> Output limited to
|
| 387 |
</div>
|
| 388 |
""")
|
| 389 |
|
|
|
|
| 13 |
from huggingface_hub import snapshot_download
|
| 14 |
import requests
|
| 15 |
|
| 16 |
+
# For ESRGAN (requires pip install basicsr gfpgan)
|
| 17 |
+
try:
|
| 18 |
+
from basicsr.archs.rrdbnet_arch import RRDBNet
|
| 19 |
+
from basicsr.utils import img2tensor, tensor2img
|
| 20 |
+
USE_ESRGAN = True
|
| 21 |
+
except ImportError:
|
| 22 |
+
USE_ESRGAN = False
|
| 23 |
+
warnings.warn("basicsr not installed; falling back to LANCZOS interpolation.")
|
| 24 |
+
|
| 25 |
css = """
|
| 26 |
#col-container {
|
| 27 |
margin: 0 auto;
|
|
|
|
| 79 |
|
| 80 |
print("✅ All models loaded successfully!")
|
| 81 |
|
| 82 |
+
# Download ESRGAN model if using
|
| 83 |
+
if USE_ESRGAN:
|
| 84 |
+
esrgan_path = "4x-UltraSharp.pth"
|
| 85 |
+
if not os.path.exists(esrgan_path):
|
| 86 |
+
url = "https://huggingface.co/uwg/upscaler/resolve/main/ESRGAN/4x-UltraSharp.pth"
|
| 87 |
+
with open(esrgan_path, "wb") as f:
|
| 88 |
+
f.write(requests.get(url).content)
|
| 89 |
+
esrgan_model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
|
| 90 |
+
state_dict = torch.load(esrgan_path)['params_ema']
|
| 91 |
+
esrgan_model.load_state_dict(state_dict)
|
| 92 |
+
esrgan_model.eval()
|
| 93 |
+
esrgan_model.to(device)
|
| 94 |
+
|
| 95 |
MAX_SEED = 1000000
|
| 96 |
+
MAX_PIXEL_BUDGET = 8192 * 8192 # Increased for tiling support
|
| 97 |
|
| 98 |
|
| 99 |
def generate_caption(image):
|
|
|
|
| 137 |
gr.Info(
|
| 138 |
f"Requested output image is too large. Resizing input to fit within pixel budget."
|
| 139 |
)
|
| 140 |
+
target_input_pixels = MAX_PIXEL_BUDGET / (upscale_factor ** 2)
|
| 141 |
+
scale = (target_input_pixels / (w * h)) ** 0.5
|
| 142 |
+
new_w = int(w * scale) - int(w * scale) % 8
|
| 143 |
+
new_h = int(h * scale) - int(h * scale) % 8
|
| 144 |
+
input_image = input_image.resize((new_w, new_h), resample=Image.LANCZOS)
|
|
|
|
|
|
|
| 145 |
was_resized = True
|
| 146 |
|
| 147 |
+
return input_image, w_original, h_original, was_resized
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
|
| 149 |
|
| 150 |
def load_image_from_url(url):
|
| 151 |
"""Load image from URL"""
|
| 152 |
try:
|
| 153 |
+
response = requests.get(url, stream=True)
|
| 154 |
response.raise_for_status()
|
| 155 |
+
return Image.open(response.raw)
|
| 156 |
except Exception as e:
|
| 157 |
raise gr.Error(f"Failed to load image from URL: {e}")
|
| 158 |
|
| 159 |
|
| 160 |
+
def esrgan_upscale(image, scale=4):
|
| 161 |
+
if not USE_ESRGAN:
|
| 162 |
+
return image.resize((image.width * scale, image.height * scale), resample=Image.LANCZOS)
|
| 163 |
+
img = img2tensor(np.array(image) / 255., bgr2rgb=False, float32=True)
|
| 164 |
+
with torch.no_grad():
|
| 165 |
+
output = esrgan_model(img.unsqueeze(0)).squeeze()
|
| 166 |
+
output_img = tensor2img(output, rgb2bgr=False, min_max=(0, 1))
|
| 167 |
+
return Image.fromarray(output_img)
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
def tiled_flux_img2img(pipe, prompt, image, strength, steps, guidance, generator, tile_size=1024, overlap=32):
|
| 171 |
+
"""Tiled Img2Img to mimic Ultimate SD Upscaler tiling"""
|
| 172 |
+
w, h = image.size
|
| 173 |
+
output = image.copy() # Start with the control image
|
| 174 |
+
|
| 175 |
+
for x in range(0, w, tile_size - overlap):
|
| 176 |
+
for y in range(0, h, tile_size - overlap):
|
| 177 |
+
tile_w = min(tile_size, w - x)
|
| 178 |
+
tile_h = min(tile_size, h - y)
|
| 179 |
+
tile = image.crop((x, y, x + tile_w, y + tile_h))
|
| 180 |
+
|
| 181 |
+
# Run Flux on tile
|
| 182 |
+
gen_tile = pipe(
|
| 183 |
+
prompt=prompt,
|
| 184 |
+
image=tile,
|
| 185 |
+
strength=strength,
|
| 186 |
+
num_inference_steps=steps,
|
| 187 |
+
guidance_scale=guidance,
|
| 188 |
+
height=tile_h,
|
| 189 |
+
width=tile_w,
|
| 190 |
+
generator=generator,
|
| 191 |
+
).images[0]
|
| 192 |
+
|
| 193 |
+
# Paste with blending if overlap
|
| 194 |
+
if overlap > 0:
|
| 195 |
+
paste_box = (x, y, x + tile_w, y + tile_h)
|
| 196 |
+
if x > 0 or y > 0:
|
| 197 |
+
# Simple linear blend on overlaps
|
| 198 |
+
mask = Image.new('L', (tile_w, tile_h), 255)
|
| 199 |
+
if x > 0:
|
| 200 |
+
for i in range(overlap):
|
| 201 |
+
for j in range(tile_h):
|
| 202 |
+
mask.putpixel((i, j), int(255 * (i / overlap)))
|
| 203 |
+
if y > 0:
|
| 204 |
+
for i in range(tile_w):
|
| 205 |
+
for j in range(overlap):
|
| 206 |
+
mask.putpixel((i, j), int(255 * (j / overlap)))
|
| 207 |
+
output.paste(gen_tile, paste_box, mask)
|
| 208 |
+
else:
|
| 209 |
+
output.paste(gen_tile, paste_box)
|
| 210 |
+
else:
|
| 211 |
+
output.paste(gen_tile, (x, y))
|
| 212 |
+
|
| 213 |
+
return output
|
| 214 |
+
|
| 215 |
+
|
| 216 |
@spaces.GPU(duration=120)
|
| 217 |
def enhance_image(
|
| 218 |
image_input,
|
|
|
|
| 254 |
else:
|
| 255 |
prompt = custom_prompt if custom_prompt.strip() else ""
|
| 256 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
generator = torch.Generator().manual_seed(seed)
|
| 258 |
|
| 259 |
gr.Info("🚀 Upscaling image...")
|
| 260 |
+
|
| 261 |
+
# Initial upscale
|
| 262 |
+
if USE_ESRGAN and upscale_factor == 4:
|
| 263 |
+
control_image = esrgan_upscale(input_image, upscale_factor)
|
| 264 |
+
else:
|
| 265 |
+
w, h = input_image.size
|
| 266 |
+
control_image = input_image.resize((w * upscale_factor, h * upscale_factor), resample=Image.LANCZOS)
|
| 267 |
+
|
| 268 |
+
# Tiled Flux Img2Img for refinement
|
| 269 |
+
image = tiled_flux_img2img(
|
| 270 |
+
pipe,
|
| 271 |
+
prompt,
|
| 272 |
+
control_image,
|
| 273 |
+
denoising_strength,
|
| 274 |
+
num_inference_steps,
|
| 275 |
+
guidance_scale,
|
| 276 |
+
generator,
|
| 277 |
+
tile_size=1024,
|
| 278 |
+
overlap=32
|
| 279 |
+
)
|
| 280 |
|
| 281 |
if was_resized:
|
| 282 |
gr.Info(f"📏 Resizing output to target size: {w_original * upscale_factor}x{h_original * upscale_factor}")
|
| 283 |
+
image = image.resize((w_original * upscale_factor, h_original * upscale_factor), resample=Image.LANCZOS)
|
|
|
|
|
|
|
| 284 |
|
| 285 |
+
return [true_input_image, image], seed, generated_caption if use_generated_caption else ""
|
| 286 |
|
| 287 |
|
| 288 |
# Create Gradio interface
|
|
|
|
| 453 |
<h4>💡 How it works:</h4>
|
| 454 |
<ol>
|
| 455 |
<li><strong>Florence-2</strong> analyzes your image and generates a detailed caption</li>
|
| 456 |
+
<li>Initial upscale with LANCZOS interpolation (or ESRGAN if installed)</li>
|
| 457 |
+
<li><strong>FLUX Img2Img</strong> enhances the upscaled image with tiled AI diffusion guided by the caption</li>
|
| 458 |
</ol>
|
| 459 |
+
<p><strong>Note:</strong> Output limited to 8192x8192 pixels total budget. Tiling enables larger sizes.</p>
|
| 460 |
</div>
|
| 461 |
""")
|
| 462 |
|