FLUX.MF-Lightning-Fast-Upscaler

Running on Zero

App Files Files Community

LPX55 commited on Jul 8

Commit

faeb252

verified ·

1 Parent(s): 92cabbc

Update app_v4.py

Browse files

Files changed (1) hide show

app_v4.py +16 -61

app_v4.py CHANGED Viewed

@@ -6,10 +6,9 @@ import spaces
 import os
 import datetime
 import io
-import numpy as np
 import moondream as md
 from transformers import T5EncoderModel
-from diffusers import FluxControlNetPipeline, FluxControlNetInpaintPipeline, FluxTransformer2DModel
 from diffusers.utils import load_image
 from PIL import Image
 from threading import Thread
@@ -71,29 +70,14 @@ text_encoder_2_unquant = T5EncoderModel.from_pretrained(
     torch_dtype=torch.bfloat16,
     token=huggingface_token
 )
-transformer = FluxTransformer2DModel.from_pretrained(
-        "LPX55/FLUX.1-merged_lightning_v2", subfolder='transformer', torch_dytpe=torch.bfloat16
-    )
-pipe_upscaler = FluxControlNetPipeline.from_pretrained(
     "LPX55/FLUX.1M-8step_upscaler-cnet",
     torch_dtype=torch.bfloat16,
     text_encoder_2=text_encoder_2_unquant,
     token=huggingface_token
 )
-pipe_upscaler.to("cuda")
-controlnet = FluxControlNetModel.from_pretrained("alimama-creative/FLUX.1-dev-Controlnet-Inpainting-Beta", torch_dtype=torch.bfloat16)
-pipe = FluxControlNetInpaintPipeline.from_pretrained(
-    "LPX55/FLUX.1-merged_lightning_v2",
-    controlnet=controlnet,
-    transformer=transformer,
-    torch_dtype=torch.bfloat16,
-    token=huggingface_token
-)
 pipe.to("cuda")
-pipe.transformer.to(torch.bfloat16)
-pipe.controlnet.to(torch.bfloat16)
 try:
     dump_environment_info()
@@ -164,51 +148,28 @@ def generate_focus(control_image, focus_list):
     except Exception as e:
         print(f"Error generating focus: {e}")
         return "highly detailed photo, raw photography.", "Original Image Dimensions: N/A"
 @spaces.GPU(duration=6, progress=gr.Progress(track_tqdm=True))
 @torch.no_grad()
 def generate_image(prompt, scale, steps, control_image, controlnet_conditioning_scale, guidance_scale, seed, guidance_end):
     generator = torch.Generator().manual_seed(seed)
-    # Ensure transparency is preserved
-    control_image = control_image.convert("RGBA")
     # Resize the image to a maximum longest side of 1024 pixels
     control_image = resize_image_to_max_side(control_image, max_side_length=1024)
     w, h = control_image.size
-    # Crop to nearest multiple of 32
     w = w - w % 32
     h = h - h % 32
-    # Corrected resizing code
-    control_image = control_image.resize((w, h), resample=2)
-    print(f"Resized image dimensions: {control_image.size[0]}x{control_image.size[1]}")
     print(f"PromptLog: {repr(prompt)}")
-    # Convert image to RGB for processing, but keep alpha channel for transparency
-    control_image_rgb = control_image.convert("RGB")
-    control_image_alpha = control_image.split()[-1]
-    # Convert alpha channel to a mask (transparent = white, opaque = black)
-    # White corresponds to 1 (to be inpainted), black corresponds to 0 (to be preserved)
-    # Convert alpha to numpy array for processing
-    alpha_array = np.array(control_image_alpha)
-    # Create binary mask (1 for transparent, 0 for opaque)
-    mask = (alpha_array > 128).astype(np.float32)  # 1 for transparent (to be inpainted), 0 for opaque
-    # Optional: Visualize the mask (for debugging purposes)
-    # mask_image = Image.fromarray((mask * 255).astype(np.uint8))
-    # mask_image.show()
     with torch.inference_mode():
         image = pipe(
-            image=control_image_rgb,
             generator=generator,
             prompt=prompt,
-            control_image=control_image_rgb,
-            mask_image=mask,  # Pass the numpy array as the mask
             controlnet_conditioning_scale=controlnet_conditioning_scale,
             num_inference_steps=steps,
             guidance_scale=guidance_scale,
@@ -217,13 +178,9 @@ def generate_image(prompt, scale, steps, control_image, controlnet_conditioning_
             control_guidance_start=0.0,
             control_guidance_end=guidance_end,
         ).images[0]
-        # Reapply the alpha channel to the generated image
-        image = image.convert("RGBA")
-        image.putalpha(control_image_alpha)
-        return image
 def update_parameters(preset):
     if preset in presets:
         params = presets[preset]
@@ -266,12 +223,10 @@ def process_image(control_image, user_prompt, system_prompt, scale, steps,
             seed=seed,
             guidance_end=guidance_end
         )
         try:
-            # Ensure the image is saved with transparency
-            with io.BytesIO() as output:
-                image.save(output, format="PNG")
-                debug_img = Image.open(output).convert("RGBA")
-                save_image("/tmp/" + str(seed) + "output.png", debug_img)
         except Exception as e:
             print("Error 160: " + str(e))
         log_params(final_prompt, scale, steps, controlnet_conditioning_scale, guidance_scale, seed, guidance_end, control_image, image)

 import os
 import datetime
 import io
 import moondream as md
 from transformers import T5EncoderModel
+from diffusers import FluxControlNetPipeline
 from diffusers.utils import load_image
 from PIL import Image
 from threading import Thread
     torch_dtype=torch.bfloat16,
     token=huggingface_token
 )
+pipe = FluxControlNetPipeline.from_pretrained(
     "LPX55/FLUX.1M-8step_upscaler-cnet",
     torch_dtype=torch.bfloat16,
     text_encoder_2=text_encoder_2_unquant,
     token=huggingface_token
 )
 pipe.to("cuda")
 try:
     dump_environment_info()
     except Exception as e:
         print(f"Error generating focus: {e}")
         return "highly detailed photo, raw photography.", "Original Image Dimensions: N/A"
 @spaces.GPU(duration=6, progress=gr.Progress(track_tqdm=True))
 @torch.no_grad()
 def generate_image(prompt, scale, steps, control_image, controlnet_conditioning_scale, guidance_scale, seed, guidance_end):
     generator = torch.Generator().manual_seed(seed)
+    # Load control image
+    control_image = load_image(control_image)
     # Resize the image to a maximum longest side of 1024 pixels
     control_image = resize_image_to_max_side(control_image, max_side_length=1024)
     w, h = control_image.size
     w = w - w % 32
     h = h - h % 32
+    control_image = control_image.resize((int(w * scale), int(h * scale)), resample=2)  # Resample.BILINEAR
+    print("Size to: " + str(control_image.size[0]) + ", " + str(control_image.size[1]))
     print(f"PromptLog: {repr(prompt)}")
     with torch.inference_mode():
         image = pipe(
             generator=generator,
             prompt=prompt,
+            control_image=control_image,
             controlnet_conditioning_scale=controlnet_conditioning_scale,
             num_inference_steps=steps,
             guidance_scale=guidance_scale,
             control_guidance_start=0.0,
             control_guidance_end=guidance_end,
         ).images[0]
+        # print("Type: " + str(type(image)))
+    return image
 def update_parameters(preset):
     if preset in presets:
         params = presets[preset]
             seed=seed,
             guidance_end=guidance_end
         )
         try:
+            debug_img = Image.open(image.save("/tmp/" + str(seed) + "output.png"))
+            save_image("/tmp/" + str(seed) + "output.png", debug_img)
         except Exception as e:
             print("Error 160: " + str(e))
         log_params(final_prompt, scale, steps, controlnet_conditioning_scale, guidance_scale, seed, guidance_end, control_image, image)