Spaces:

X-HighVoltage-X
/

FLUX.1-Fill-dev-Inpaint-Tester

Running on Zero

App Files Files Community

X-HighVoltage-X commited on 26 days ago

Commit

f3d4c16

verified ·

1 Parent(s): 1041613

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -180

app.py CHANGED Viewed

@@ -10,132 +10,91 @@ from PIL import Image
 MAX_SEED = np.iinfo(np.int32).max
-pipe = FluxFillPipeline.from_pretrained(
-    "black-forest-labs/FLUX.1-Fill-dev",
-    torch_dtype=torch.bfloat16,
-)
 flux_keywords_available = ["IMG_1025.HEIC", "Selfie"]
-# ------------------------------------------------------------------
-# LATENT MANIPULATION
-# ------------------------------------------------------------------
 def pack_latents(latents, batch_size, num_channels, height, width):
     latents = latents.view(batch_size, num_channels, height // 2, 2, width // 2, 2)
     latents = latents.permute(0, 2, 4, 1, 3, 5)
-    latents = latents.reshape(
-        batch_size,
-        (height // 2) * (width // 2),
-        num_channels * 4,
-    )
     return latents
 def unpack_latents(latents, height, width, h_scale=2, w_scale=2):
     batch_size, seq_len, channels = latents.shape
     latents = latents.view(
-        batch_size,
-        height // h_scale,
-        width // w_scale,
-        channels // (h_scale * w_scale),
-        h_scale,
-        w_scale,
     )
     latents = latents.permute(0, 3, 1, 4, 2, 5)
-    latents = latents.reshape(
-        batch_size,
-        channels // (h_scale * w_scale),
-        height,
-        width,
-    )
     return latents
-# ------------------------------------------------------------------
-# HARD PRESERVE CALLBACK
-# ------------------------------------------------------------------
-def get_hard_preserve_callback(
     pipe,
     original_image,
     preserved_area_mask,
     total_steps,
     step_images_list,
 ):
     device = pipe.device
     dtype = pipe.transformer.dtype
-    with torch.no_grad():
-        img_tensor = (
-            torch.from_numpy(np.array(original_image).transpose(2, 0, 1))
-            .float()
-            / 127.5
-            - 1.0
-        )
-        img_tensor = img_tensor.unsqueeze(0).to(device, dtype)
-        init_latents = pipe.vae.encode(img_tensor).latent_dist.sample()
-        init_latents = (
-            init_latents - pipe.vae.config.shift_factor
-        ) * pipe.vae.config.scaling_factor
-        init_latents = init_latents.to(dtype)
-        _, _, h_latent, w_latent = init_latents.shape
-        packed_init_latents = pack_latents(
-            init_latents,
-            batch_size=1,
-            num_channels=16,
-            height=h_latent,
-            width=w_latent,
-        ).to(dtype)
-        mask_tensor = (
-            torch.from_numpy(np.array(preserved_area_mask.convert("L")))
-            .float()
-            / 255.0
-        )
-        mask_tensor = mask_tensor.unsqueeze(0).unsqueeze(0).to(device, dtype)
-        latent_mask = torch.nn.functional.interpolate(
-            mask_tensor,
-            size=(h_latent, w_latent),
-            mode="nearest",
-        )
-        packed_preserved_mask = pack_latents(
-            latent_mask,
-            batch_size=1,
-            num_channels=1,
-            height=h_latent,
-            width=w_latent,
-        )
-        packed_preserved_mask = (packed_preserved_mask > 0.5).to(dtype)
-        packed_preserved_mask = packed_preserved_mask.repeat(1, 1, 16)
     def callback_fn(pipe, step, timestep, callback_kwargs):
         latents = callback_kwargs["latents"]
-        latent_dtype = latents.dtype
-        latents = (
-            latents * (1.0 - packed_preserved_mask)
-            + packed_init_latents * packed_preserved_mask
-        ).to(latent_dtype)
         if step % 5 == 0 or step == total_steps - 1:
             with torch.no_grad():
                 unpacked = unpack_latents(latents, h_latent, w_latent)
-                unpacked = (
-                    unpacked / pipe.vae.config.scaling_factor
-                ) + pipe.vae.config.shift_factor
-                decoded = pipe.vae.decode(
-                    unpacked.to(pipe.vae.dtype)
-                ).sample
-                img_step = pipe.image_processor.postprocess(
-                    decoded, output_type="pil"
-                )[0]
                 step_images_list.append(img_step)
         callback_kwargs["latents"] = latents
@@ -144,10 +103,7 @@ def get_hard_preserve_callback(
     return callback_fn
-# ------------------------------------------------------------------
-# LORA UTILITIES
-# ------------------------------------------------------------------
 def activate_loras(pipe: FluxFillPipeline, loras_with_weights: list[tuple[LoRA, float]]):
     adapter_names = []
     adapter_weights = []
@@ -164,10 +120,7 @@ def deactivate_loras(pipe):
     return pipe
-# ------------------------------------------------------------------
-# GENERATION
-# ------------------------------------------------------------------
 def calculate_optimal_dimensions(image):
     original_width, original_height = image.size
     FIXED_DIMENSION = 1024
@@ -192,30 +145,25 @@ def inpaint(
 ):
     image = image.convert("RGB")
     mask = mask.convert("L")
     width, height = calculate_optimal_dimensions(image)
     image_resized = image.resize((width, height), Image.LANCZOS)
     pipe.to("cuda")
     step_images = []
     callback = None
     if preserved_area_mask is not None:
-        preserved_area_resized = preserved_area_mask.resize(
-            (width, height), Image.NEAREST
-        )
-        callback = get_hard_preserve_callback(
-            pipe,
-            image_resized,
-            preserved_area_resized,
-            num_inference_steps,
-            step_images,
         )
     result = pipe(
         image=image_resized,
-        mask_image=mask.resize((width, height), Image.NEAREST),
         prompt=prompt,
         width=width,
         height=height,
@@ -261,16 +209,12 @@ def inpaint_api(
     final_prompt = ""
     if flux_keywords:
         final_prompt += ", ".join(flux_keywords) + ", "
     if selected_loras_with_weights:
         for lora, _ in selected_loras_with_weights:
             if lora.keyword:
-                final_prompt += (
-                    lora.keyword
-                    if isinstance(lora.keyword, str)
-                    else ", ".join(lora.keyword)
-                ) + ", "
     final_prompt += prompt
     if not isinstance(seed, int) or seed < 0:
@@ -288,47 +232,17 @@ def inpaint_api(
     )
-# ------------------------------------------------------------------
-# UI
-# ------------------------------------------------------------------
-with gr.Blocks(
-    title="FLUX.1 Fill dev + HARD Area Preservation",
-    theme=gr.themes.Soft(),
-) as demo:
     with gr.Row():
         with gr.Column(scale=2):
-            prompt_input = gr.Text(
-                label="Prompt",
-                lines=4,
-                value="a 25 years old woman",
-            )
-            seed_slider = gr.Slider(
-                label="Seed",
-                minimum=-1,
-                maximum=MAX_SEED,
-                step=1,
-                value=-1,
-            )
-            num_inference_steps_input = gr.Number(
-                label="Inference steps",
-                value=40,
-            )
-            guidance_scale_input = gr.Number(
-                label="Guidance scale",
-                value=30,
-            )
-            strength_input = gr.Number(
-                label="Strength",
-                value=1.0,
-                maximum=1.0,
-            )
             gr.Markdown("### Flux Keywords")
-            flux_keywords_input = gr.CheckboxGroup(
-                choices=flux_keywords_available,
-                label="Flux Keywords",
-            )
             if loras:
                 gr.Markdown("### Available LoRAs")
@@ -343,32 +257,16 @@ with gr.Blocks(
                 )
         with gr.Column(scale=3):
-            image_input = gr.Image(
-                label="Original Image",
-                type="pil",
-            )
-            mask_input = gr.Image(
-                label="Inpaint Mask (Area to change)",
-                type="pil",
-            )
-            preserved_area_input = gr.Image(
-                label="Preserved Area Mask (Area to keep)",
-                type="pil",
-            )
-            run_btn = gr.Button(
-                "Generate",
-                variant="primary",
-            )
         with gr.Column(scale=3):
             result_image = gr.Image(label="Result")
             used_prompt_box = gr.Text(label="Final Prompt")
             used_seed_box = gr.Number(label="Used Seed")
-            steps_gallery = gr.Gallery(
-                label="Evolution (Steps)",
-                columns=3,
-                preview=True,
-            )
     run_btn.click(
         fn=inpaint_api,
@@ -384,12 +282,7 @@ with gr.Blocks(
             flux_keywords_input,
             loras_selected_input,
         ],
-        outputs=[
-            result_image,
-            steps_gallery,
-            used_prompt_box,
-            used_seed_box,
-        ],
     )
 if __name__ == "__main__":

 MAX_SEED = np.iinfo(np.int32).max
+pipe = FluxFillPipeline.from_pretrained("black-forest-labs/FLUX.1-Fill-dev", torch_dtype=torch.bfloat16)
 flux_keywords_available = ["IMG_1025.HEIC", "Selfie"]
+# --- LATENT MANIPULATION FUNCTIONS ---
 def pack_latents(latents, batch_size, num_channels, height, width):
     latents = latents.view(batch_size, num_channels, height // 2, 2, width // 2, 2)
     latents = latents.permute(0, 2, 4, 1, 3, 5)
+    latents = latents.reshape(batch_size, (height // 2) * (width // 2), num_channels * 4)
     return latents
 def unpack_latents(latents, height, width, h_scale=2, w_scale=2):
     batch_size, seq_len, channels = latents.shape
+    # Flux uses a 2x2 patch, so the factor is 2
     latents = latents.view(
+        batch_size, height // h_scale, width // w_scale, channels // (h_scale * w_scale), h_scale, w_scale
     )
     latents = latents.permute(0, 3, 1, 4, 2, 5)
+    latents = latents.reshape(batch_size, channels // (h_scale * w_scale), height, width)
     return latents
+# --- CALLBACK (PRESERVED AREA + STEP CAPTURE) ---
+def get_gradual_blend_callback(
     pipe,
     original_image,
     preserved_area_mask,
     total_steps,
     step_images_list,
+    start_alpha=1.0,
+    end_alpha=0.2,
 ):
     device = pipe.device
     dtype = pipe.transformer.dtype
+    packed_init_latents = None
+    packed_preserved_mask = None
+    h_latent = w_latent = None
+    if preserved_area_mask is not None:
+        with torch.no_grad():
+            img_tensor = (
+                (torch.from_numpy(np.array(original_image).transpose(2, 0, 1)).float() / 127.5 - 1.0)
+                .unsqueeze(0)
+                .to(device, dtype)
+            )
+            init_latents = pipe.vae.encode(img_tensor).latent_dist.sample()
+            init_latents = (init_latents - pipe.vae.config.shift_factor) * pipe.vae.config.scaling_factor
+            _, _, h_latent, w_latent = init_latents.shape
+            packed_init_latents = pack_latents(
+                init_latents, batch_size=1, num_channels=16, height=h_latent, width=w_latent
+            )
+            mask_tensor = (
+                (torch.from_numpy(np.array(preserved_area_mask.convert("L"))).float() / 255.0)
+                .unsqueeze(0)
+                .unsqueeze(0)
+                .to(device, dtype)
+            )
+            latent_preserved_mask = torch.nn.functional.interpolate(
+                mask_tensor, size=(h_latent, w_latent), mode="nearest"
+            )
+            packed_preserved_mask = pack_latents(
+                latent_preserved_mask, batch_size=1, num_channels=1, height=h_latent, width=w_latent
+            )
     def callback_fn(pipe, step, timestep, callback_kwargs):
         latents = callback_kwargs["latents"]
+        if packed_preserved_mask is not None:
+            progress = step / max(1, total_steps - 1)
+            current_alpha = start_alpha - (start_alpha - end_alpha) * progress
+            effective_mask = (packed_preserved_mask * current_alpha).repeat(1, 1, 16)
+            latents = (1 - effective_mask) * latents + effective_mask * packed_init_latents
         if step % 5 == 0 or step == total_steps - 1:
             with torch.no_grad():
                 unpacked = unpack_latents(latents, h_latent, w_latent)
+                unpacked = (unpacked / pipe.vae.config.scaling_factor) + pipe.vae.config.shift_factor
+                decoded = pipe.vae.decode(unpacked.to(pipe.vae.dtype)).sample
+                img_step = pipe.image_processor.postprocess(decoded, output_type="pil")[0]
                 step_images_list.append(img_step)
         callback_kwargs["latents"] = latents
     return callback_fn
+# --- LoRA's FUNCTIONS ---
 def activate_loras(pipe: FluxFillPipeline, loras_with_weights: list[tuple[LoRA, float]]):
     adapter_names = []
     adapter_weights = []
     return pipe
+# --- GENERATION
 def calculate_optimal_dimensions(image):
     original_width, original_height = image.size
     FIXED_DIMENSION = 1024
 ):
     image = image.convert("RGB")
     mask = mask.convert("L")
     width, height = calculate_optimal_dimensions(image)
+    # Resize to match dimensions
     image_resized = image.resize((width, height), Image.LANCZOS)
     pipe.to("cuda")
+    # Setup callback if a preserved area mask is provided
     step_images = []
     callback = None
     if preserved_area_mask is not None:
+        preserved_area_resized = preserved_area_mask.resize((width, height), Image.NEAREST)
+        callback = get_gradual_blend_callback(
+            pipe, image_resized, preserved_area_resized, num_inference_steps, step_images
         )
     result = pipe(
         image=image_resized,
+        mask_image=mask.resize((width, height)),
         prompt=prompt,
         width=width,
         height=height,
     final_prompt = ""
     if flux_keywords:
         final_prompt += ", ".join(flux_keywords) + ", "
     if selected_loras_with_weights:
         for lora, _ in selected_loras_with_weights:
             if lora.keyword:
+                final_prompt += (lora.keyword if isinstance(lora.keyword, str) else ", ".join(lora.keyword)) + ", "
     final_prompt += prompt
     if not isinstance(seed, int) or seed < 0:
     )
+with gr.Blocks(title="FLUX.1 Fill dev + Area Preservation", theme=gr.themes.Soft()) as demo:
     with gr.Row():
         with gr.Column(scale=2):
+            prompt_input = gr.Text(label="Prompt", lines=4, value="a 25 years old woman")
+            seed_slider = gr.Slider(label="Seed", minimum=-1, maximum=MAX_SEED, step=1, value=-1)
+            num_inference_steps_input = gr.Number(label="Inference steps", value=40)
+            guidance_scale_input = gr.Number(label="Guidance scale", value=30)
+            strength_input = gr.Number(label="Strength", value=1.0, maximum=1.0)
             gr.Markdown("### Flux Keywords")
+            flux_keywords_input = gr.CheckboxGroup(choices=flux_keywords_available, label="Flux Keywords")
             if loras:
                 gr.Markdown("### Available LoRAs")
                 )
         with gr.Column(scale=3):
+            image_input = gr.Image(label="Original Image", type="pil")
+            mask_input = gr.Image(label="Inpaint Mask (Area to change)", type="pil")
+            preserved_area_input = gr.Image(label="Preserved Area Mask (Area to keep)", type="pil")
+            run_btn = gr.Button("Generate", variant="primary")
         with gr.Column(scale=3):
             result_image = gr.Image(label="Result")
             used_prompt_box = gr.Text(label="Final Prompt")
             used_seed_box = gr.Number(label="Used Seed")
+            steps_gallery = gr.Gallery(label="Evolution (Steps)", columns=3, preview=True)
     run_btn.click(
         fn=inpaint_api,
             flux_keywords_input,
             loras_selected_input,
         ],
+        outputs=[result_image, steps_gallery, used_prompt_box, used_seed_box],
     )
 if __name__ == "__main__":