# PyTorch 2.8 (temporary hack) import os os.system('pip install --upgrade --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu126 "torch<2.9" "torchvision" spaces') os.environ['DIFFUSERS_ENABLE_HUB_KERNELS']='yes' import torchvision.transforms.functional as TF # Actual demo code import gradio as gr import numpy as np import spaces import torch import random from PIL import Image from pipeline import GenSIRR from diffusers.utils import load_image import torch.nn.functional as F from optimization import optimize_pipeline_ MAX_SEED = np.iinfo(np.int32).max from huggingface_hub import hf_hub_download def pad_for_model(image: torch.Tensor, multiple: int): """Pad the tensor image so height/width are divisible by ``multiple``.""" height, width = image.shape[-2:] pad_h = (multiple - height % multiple) % multiple pad_w = (multiple - width % multiple) % multiple if pad_h == 0 and pad_w == 0: return image, (0, 0) padded = F.pad(image.unsqueeze(0), (0, pad_w, 0, pad_h), mode="reflect").squeeze(0) return padded, (pad_h, pad_w) def tensor_to_image(tensor: torch.Tensor) -> Image.Image: """Convert a [C,H,W] tensor in [0,1] to a PIL image.""" # tensor = tensor.clamp(-1.0, 1.0) / 2.0 + 0.5 tensor = tensor.clamp(0.0, 1.0) array = tensor.mul(255).byte().permute(1, 2, 0).cpu().numpy() return Image.fromarray(array) def load_deepspeed_weights(model, checkpoint_path) -> None: """Load LoRA weights from a DeepSpeed ZeRO Stage 2 checkpoint into the model.""" tensor_path = checkpoint_path # LOGGER.info("Loading ZeRO checkpoint from %s", tensor_path) raw_state = torch.load(tensor_path, map_location="cpu") module_state: Dict[str, torch.Tensor] = raw_state.get("module") if module_state is None: raise KeyError("Checkpoint is missing the 'module' state dict") # Remove the Lightning prefix so it matches the FluxKontext state dict. cleaned_state = {key[len("net_g."):]: value for key, value in module_state.items() if key.startswith("net_g.")} missing, unexpected = model.load_state_dict(cleaned_state, strict=True) pipe = GenSIRR("black-forest-labs/FLUX.1-Kontext-dev") load_deepspeed_weights(pipe, hf_hub_download(repo_id='lime-j/GenSIRR', filename="GenSIRR.pt")) # pipe.transformer.fuse_qkv_projections() # pipe.transformer.set_attention_backend("_flash_3_hub") pipe = pipe.to("cuda") # optimize_pipeline_(pipe, image=Image.new("RGB", (512, 512)), prompt='prompt') @spaces.GPU def infer(input_image, seed=42, randomize_seed=False, steps=28, progress=gr.Progress(track_tqdm=True)): """ Perform image editing using the FLUX.1 Kontext pipeline. This function takes an input image and a text prompt to generate a modified version of the image based on the provided instructions. It uses the FLUX.1 Kontext model for contextual image editing tasks. Args: input_image (PIL.Image.Image): The input image to be edited. Will be converted to RGB format if not already in that format. prompt (str): Text description of the desired edit to apply to the image. Examples: "Remove glasses", "Add a hat", "Change background to beach". seed (int, optional): Random seed for reproducible generation. Defaults to 42. Must be between 0 and MAX_SEED (2^31 - 1). randomize_seed (bool, optional): If True, generates a random seed instead of using the provided seed value. Defaults to False. guidance_scale (float, optional): Controls how closely the model follows the prompt. Higher values mean stronger adherence to the prompt but may reduce image quality. Range: 1.0-10.0. Defaults to 2.5. steps (int, optional): Controls how many steps to run the diffusion model for. Range: 1-30. Defaults to 28. progress (gr.Progress, optional): Gradio progress tracker for monitoring generation progress. Defaults to gr.Progress(track_tqdm=True). Returns: tuple: A 3-tuple containing: - PIL.Image.Image: The generated/edited image - int: The seed value used for generation (useful when randomize_seed=True) - gr.update: Gradio update object to make the reuse button visible Example: >>> edited_image, used_seed, button_update = infer( ... input_image=my_image, ... prompt="Add sunglasses", ... seed=123, ... randomize_seed=False, ... guidance_scale=2.5 ... ) """ if randomize_seed: seed = random.randint(0, MAX_SEED) size = 512 input_image = input_image.convert("RGB") if input_image.width < input_image.height: input_image = input_image.resize((size, int(size * input_image.height / input_image.width))) else: input_image = input_image.resize((int(size * input_image.width / input_image.height), size)) tensor = TF.to_tensor(input_image) with torch.inference_mode(): original_size = tensor.shape[-2:] padded_tensor, padding = pad_for_model(tensor, 16) batch_cpu = padded_tensor.unsqueeze(0) batch_device = batch_cpu.to('cuda') output = pipe( image=batch_device, width = input_image.size[0], height = input_image.size[1], num_inference_steps=steps, generator=torch.Generator().manual_seed(seed), ) #.images[0] if isinstance(output, tuple): output_tensor = output[0] else: output_tensor = output output_tensor = output_tensor.squeeze(0).detach().cpu() h, w = original_size output_tensor = output_tensor[..., :h, :w] output_image = tensor_to_image(output_tensor) return output_image, seed, gr.Button(visible=True) @spaces.GPU def infer_example(input_image): image, seed, _ = infer(input_image) return image, seed css=""" #col-container { margin: 0 auto; max-width: 960px; } """ with gr.Blocks(css=css) as demo: with gr.Column(elem_id="col-container"): gr.Markdown(f"""# GenSIRR: Rectifying Latent Space for Generative SIRR This is a demo for our generative single-image reflection removal model. To limit the running time, the model here runs at a 512px resolution. We strongly suggest you to use 768px or 1024px for better performance. """) with gr.Row(): with gr.Column(): input_image = gr.Image(label="Upload the image for reflection removal", type="pil") with gr.Row(): run_button = gr.Button("Run") with gr.Accordion("Advanced Settings", open=False): seed = gr.Slider( label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0, ) randomize_seed = gr.Checkbox(label="Randomize seed", value=True) steps = gr.Slider( label="Steps", minimum=1, maximum=30, value=28, step=1 ) with gr.Column(): result = gr.Image(label="Result", show_label=False, interactive=False) reuse_button = gr.Button("Reuse this image", visible=False) gr.on( triggers=[run_button.click], fn = infer, inputs = [input_image, seed, randomize_seed, steps], outputs = [result, seed, reuse_button] ) reuse_button.click( fn = lambda image: image, inputs = [result], outputs = [input_image] ) demo.launch(mcp_server=True)