Spaces:

X-HighVoltage-X
/

FLUX.1-Fill-dev-Inpaint-Tester

Running on Zero

App Files Files Community

X-HighVoltage-X commited on about 1 month ago

Commit

beb6b67

verified ·

1 Parent(s): 4ad6fb2

Update app.py

Browse files

Files changed (1) hide show

app.py +127 -61

app.py CHANGED Viewed

@@ -10,91 +10,135 @@ from PIL import Image
 MAX_SEED = np.iinfo(np.int32).max
-pipe = FluxFillPipeline.from_pretrained("black-forest-labs/FLUX.1-Fill-dev", torch_dtype=torch.bfloat16)
 flux_keywords_available = ["IMG_1025.HEIC", "Selfie"]
-# --- LATENT MANIPULATION FUNCTIONS ---
 def pack_latents(latents, batch_size, num_channels, height, width):
     latents = latents.view(batch_size, num_channels, height // 2, 2, width // 2, 2)
     latents = latents.permute(0, 2, 4, 1, 3, 5)
-    latents = latents.reshape(batch_size, (height // 2) * (width // 2), num_channels * 4)
     return latents
 def unpack_latents(latents, height, width, h_scale=2, w_scale=2):
     batch_size, seq_len, channels = latents.shape
-    # Flux uses a 2x2 patch, so the factor is 2
     latents = latents.view(
-        batch_size, height // h_scale, width // w_scale, channels // (h_scale * w_scale), h_scale, w_scale
     )
     latents = latents.permute(0, 3, 1, 4, 2, 5)
-    latents = latents.reshape(batch_size, channels // (h_scale * w_scale), height, width)
     return latents
-# --- CALLBACK (PRESERVED AREA + STEP CAPTURE) ---
-def get_gradual_blend_callback(
     pipe,
     original_image,
     preserved_area_mask,
     total_steps,
     step_images_list,
-    start_alpha=1.0,
-    end_alpha=0.2,
 ):
     device = pipe.device
     dtype = pipe.transformer.dtype
-    packed_init_latents = None
-    packed_preserved_mask = None
-    h_latent = w_latent = None
-    if preserved_area_mask is not None:
-        with torch.no_grad():
-            img_tensor = (
-                (torch.from_numpy(np.array(original_image).transpose(2, 0, 1)).float() / 127.5 - 1.0)
-                .unsqueeze(0)
-                .to(device, dtype)
-            )
-            init_latents = pipe.vae.encode(img_tensor).latent_dist.sample()
-            init_latents = (init_latents - pipe.vae.config.shift_factor) * pipe.vae.config.scaling_factor
-            _, _, h_latent, w_latent = init_latents.shape
-            packed_init_latents = pack_latents(
-                init_latents, batch_size=1, num_channels=16, height=h_latent, width=w_latent
-            )
-            mask_tensor = (
-                (torch.from_numpy(np.array(preserved_area_mask.convert("L"))).float() / 255.0)
-                .unsqueeze(0)
-                .unsqueeze(0)
-                .to(device, dtype)
-            )
-            latent_preserved_mask = torch.nn.functional.interpolate(
-                mask_tensor, size=(h_latent, w_latent), mode="nearest"
-            )
-            packed_preserved_mask = pack_latents(
-                latent_preserved_mask, batch_size=1, num_channels=1, height=h_latent, width=w_latent
-            )
     def callback_fn(pipe, step, timestep, callback_kwargs):
         latents = callback_kwargs["latents"]
-        if packed_preserved_mask is not None:
-            progress = step / max(1, total_steps - 1)
-            current_alpha = start_alpha - (start_alpha - end_alpha) * progress
-            effective_mask = (packed_preserved_mask * current_alpha).repeat(1, 1, 16)
-            latents = (1 - effective_mask) * latents + effective_mask * packed_init_latents
         if step % 5 == 0 or step == total_steps - 1:
             with torch.no_grad():
                 unpacked = unpack_latents(latents, h_latent, w_latent)
-                unpacked = (unpacked / pipe.vae.config.scaling_factor) + pipe.vae.config.shift_factor
-                decoded = pipe.vae.decode(unpacked.to(pipe.vae.dtype)).sample
-                img_step = pipe.image_processor.postprocess(decoded, output_type="pil")[0]
                 step_images_list.append(img_step)
         callback_kwargs["latents"] = latents
@@ -103,7 +147,10 @@ def get_gradual_blend_callback(
     return callback_fn
-# --- LoRA's FUNCTIONS ---
 def activate_loras(pipe: FluxFillPipeline, loras_with_weights: list[tuple[LoRA, float]]):
     adapter_names = []
     adapter_weights = []
@@ -120,7 +167,10 @@ def deactivate_loras(pipe):
     return pipe
-# --- GENERATION
 def calculate_optimal_dimensions(image):
     original_width, original_height = image.size
     FIXED_DIMENSION = 1024
@@ -145,25 +195,30 @@ def inpaint(
 ):
     image = image.convert("RGB")
     mask = mask.convert("L")
-    width, height = calculate_optimal_dimensions(image)
-    # Resize to match dimensions
     image_resized = image.resize((width, height), Image.LANCZOS)
     pipe.to("cuda")
-    # Setup callback if a preserved area mask is provided
     step_images = []
     callback = None
     if preserved_area_mask is not None:
-        preserved_area_resized = preserved_area_mask.resize((width, height), Image.NEAREST)
-        callback = get_gradual_blend_callback(
-            pipe, image_resized, preserved_area_resized, num_inference_steps, step_images
         )
     result = pipe(
         image=image_resized,
-        mask_image=mask.resize((width, height)),
         prompt=prompt,
         width=width,
         height=height,
@@ -209,12 +264,16 @@ def inpaint_api(
     final_prompt = ""
     if flux_keywords:
         final_prompt += ", ".join(flux_keywords) + ", "
     if selected_loras_with_weights:
         for lora, _ in selected_loras_with_weights:
             if lora.keyword:
-                final_prompt += (lora.keyword if isinstance(lora.keyword, str) else ", ".join(lora.keyword)) + ", "
     final_prompt += prompt
     if not isinstance(seed, int) or seed < 0:
@@ -232,7 +291,11 @@ def inpaint_api(
     )
-with gr.Blocks(title="FLUX.1 Fill dev + Area Preservation", theme=gr.themes.Soft()) as demo:
     with gr.Row():
         with gr.Column(scale=2):
             prompt_input = gr.Text(label="Prompt", lines=4, value="a 25 years old woman")
@@ -242,7 +305,10 @@ with gr.Blocks(title="FLUX.1 Fill dev + Area Preservation", theme=gr.themes.Soft
             strength_input = gr.Number(label="Strength", value=1.0, maximum=1.0)
             gr.Markdown("### Flux Keywords")
-            flux_keywords_input = gr.CheckboxGroup(choices=flux_keywords_available, label="Flux Keywords")
             if loras:
                 gr.Markdown("### Available LoRAs")

 MAX_SEED = np.iinfo(np.int32).max
+pipe = FluxFillPipeline.from_pretrained(
+    "black-forest-labs/FLUX.1-Fill-dev",
+    torch_dtype=torch.bfloat16,
+)
 flux_keywords_available = ["IMG_1025.HEIC", "Selfie"]
+# ------------------------------------------------------------------
+# LATENT MANIPULATION
+# ------------------------------------------------------------------
 def pack_latents(latents, batch_size, num_channels, height, width):
     latents = latents.view(batch_size, num_channels, height // 2, 2, width // 2, 2)
     latents = latents.permute(0, 2, 4, 1, 3, 5)
+    latents = latents.reshape(
+        batch_size,
+        (height // 2) * (width // 2),
+        num_channels * 4,
+    )
     return latents
 def unpack_latents(latents, height, width, h_scale=2, w_scale=2):
     batch_size, seq_len, channels = latents.shape
     latents = latents.view(
+        batch_size,
+        height // h_scale,
+        width // w_scale,
+        channels // (h_scale * w_scale),
+        h_scale,
+        w_scale,
     )
     latents = latents.permute(0, 3, 1, 4, 2, 5)
+    latents = latents.reshape(
+        batch_size,
+        channels // (h_scale * w_scale),
+        height,
+        width,
+    )
     return latents
+# ------------------------------------------------------------------
+# HARD PRESERVE CALLBACK (ABSOLUTE LOCK)
+# ------------------------------------------------------------------
+def get_hard_preserve_callback(
     pipe,
     original_image,
     preserved_area_mask,
     total_steps,
     step_images_list,
 ):
     device = pipe.device
     dtype = pipe.transformer.dtype
+    with torch.no_grad():
+        # IMAGE → LATENTS
+        img_tensor = (
+            torch.from_numpy(np.array(original_image).transpose(2, 0, 1))
+            .float()
+            / 127.5
+            - 1.0
+        )
+        img_tensor = img_tensor.unsqueeze(0).to(device, dtype)
+        init_latents = pipe.vae.encode(img_tensor).latent_dist.sample()
+        init_latents = (
+            init_latents - pipe.vae.config.shift_factor
+        ) * pipe.vae.config.scaling_factor
+        _, _, h_latent, w_latent = init_latents.shape
+        packed_init_latents = pack_latents(
+            init_latents,
+            batch_size=1,
+            num_channels=16,
+            height=h_latent,
+            width=w_latent,
+        )
+        # MASK → LATENT MASK (BINARY, HARD)
+        mask_tensor = (
+            torch.from_numpy(np.array(preserved_area_mask.convert("L")))
+            .float()
+            / 255.0
+        )
+        mask_tensor = mask_tensor.unsqueeze(0).unsqueeze(0).to(device, dtype)
+        latent_mask = torch.nn.functional.interpolate(
+            mask_tensor,
+            size=(h_latent, w_latent),
+            mode="nearest",  # CRITICAL
+        )
+        packed_preserved_mask = pack_latents(
+            latent_mask,
+            batch_size=1,
+            num_channels=1,
+            height=h_latent,
+            width=w_latent,
+        )
+        # strict binary
+        packed_preserved_mask = (packed_preserved_mask > 0.5).float()
+        packed_preserved_mask = packed_preserved_mask.repeat(1, 1, 16)
     def callback_fn(pipe, step, timestep, callback_kwargs):
         latents = callback_kwargs["latents"]
+        # ABSOLUTE OVERWRITE — EVERY STEP
+        latents = (
+            latents * (1.0 - packed_preserved_mask)
+            + packed_init_latents * packed_preserved_mask
+        )
+        # Debug steps
         if step % 5 == 0 or step == total_steps - 1:
             with torch.no_grad():
                 unpacked = unpack_latents(latents, h_latent, w_latent)
+                unpacked = (
+                    unpacked / pipe.vae.config.scaling_factor
+                ) + pipe.vae.config.shift_factor
+                decoded = pipe.vae.decode(
+                    unpacked.to(pipe.vae.dtype)
+                ).sample
+                img_step = pipe.image_processor.postprocess(
+                    decoded, output_type="pil"
+                )[0]
                 step_images_list.append(img_step)
         callback_kwargs["latents"] = latents
     return callback_fn
+# ------------------------------------------------------------------
+# LoRA UTILS
+# ------------------------------------------------------------------
 def activate_loras(pipe: FluxFillPipeline, loras_with_weights: list[tuple[LoRA, float]]):
     adapter_names = []
     adapter_weights = []
     return pipe
+# ------------------------------------------------------------------
+# GENERATION
+# ------------------------------------------------------------------
 def calculate_optimal_dimensions(image):
     original_width, original_height = image.size
     FIXED_DIMENSION = 1024
 ):
     image = image.convert("RGB")
     mask = mask.convert("L")
+    width, height = calculate_optimal_dimensions(image)
     image_resized = image.resize((width, height), Image.LANCZOS)
     pipe.to("cuda")
     step_images = []
     callback = None
     if preserved_area_mask is not None:
+        preserved_area_resized = preserved_area_mask.resize(
+            (width, height), Image.NEAREST
+        )
+        callback = get_hard_preserve_callback(
+            pipe,
+            image_resized,
+            preserved_area_resized,
+            num_inference_steps,
+            step_images,
         )
     result = pipe(
         image=image_resized,
+        mask_image=mask.resize((width, height), Image.NEAREST),
         prompt=prompt,
         width=width,
         height=height,
     final_prompt = ""
     if flux_keywords:
         final_prompt += ", ".join(flux_keywords) + ", "
     if selected_loras_with_weights:
         for lora, _ in selected_loras_with_weights:
             if lora.keyword:
+                final_prompt += (
+                    lora.keyword
+                    if isinstance(lora.keyword, str)
+                    else ", ".join(lora.keyword)
+                ) + ", "
     final_prompt += prompt
     if not isinstance(seed, int) or seed < 0:
     )
+# ------------------------------------------------------------------
+# UI
+# ------------------------------------------------------------------
+with gr.Blocks(title="FLUX.1 Fill dev + HARD Area Preservation", theme=gr.themes.Soft()) as demo:
     with gr.Row():
         with gr.Column(scale=2):
             prompt_input = gr.Text(label="Prompt", lines=4, value="a 25 years old woman")
             strength_input = gr.Number(label="Strength", value=1.0, maximum=1.0)
             gr.Markdown("### Flux Keywords")
+            flux_keywords_input = gr.CheckboxGroup(
+                choices=flux_keywords_available,
+                label="Flux Keywords",
+            )
             if loras:
                 gr.Markdown("### Available LoRAs")