| | import argparse |
| | import time |
| | import torch |
| | from diffusers import FluxTransformer2DModel |
| | from transformers import CLIPModel |
| | from pathlib import Path |
| | from PIL import Image |
| | from open_flux_pipeline import FluxWithCFGPipeline |
| |
|
| | pipe = None |
| |
|
| | def generate(prompt, image_prompt=None, guidance_scale=2, num_images=4, resolution=512): |
| | |
| | image_prompt_kwargs = { |
| | "image_prompt": Image.new("RGB", (resolution, resolution)), |
| | "negative_image_prompt": Image.new("RGB", (resolution, resolution)), |
| | } |
| | if image_prompt is not None: |
| | image_prompt_kwargs["image_prompt"] = image_prompt |
| |
|
| | with torch.no_grad(): |
| | images = pipe( |
| | prompt=prompt, |
| | negative_prompt="", |
| | height=resolution, |
| | width=resolution, |
| | max_sequence_length=256, |
| | guidance_scale=guidance_scale, |
| | num_images_per_prompt=num_images, |
| | **image_prompt_kwargs |
| | ).images |
| |
|
| | |
| | widths, heights = zip(*[img.size for img in images]) |
| | total_width = sum(widths) + len(images) - 1 |
| | max_height = max(heights) |
| | out = Image.new('RGB', (total_width, max_height)) |
| | x_offset = 0 |
| | for img in images: |
| | out.paste(img, (x_offset, 0)) |
| | x_offset += img.width + 1 |
| |
|
| | |
| | if image_prompt is not None: |
| | out_with_image_prompt = Image.new('RGB', (out.width, out.height + 1 + resolution)) |
| | resized_prompt = image_prompt.resize((resolution, resolution), Image.Resampling.BILINEAR) |
| | out_with_image_prompt.paste(resized_prompt, (0, 0)) |
| | out_with_image_prompt.paste(out, (0, resolution + 1)) |
| | out = out_with_image_prompt |
| |
|
| | |
| | Path("image-outputs").mkdir(parents=True, exist_ok=True) |
| | output_filename = f"image-outputs/{prompt[:40].replace(' ', '_')}.{int(time.time())}.png" |
| | out.save(output_filename) |
| | print(f"Saved output to {output_filename}") |
| |
|
| | def main(): |
| | parser = argparse.ArgumentParser(description="Generate images using an image and a text prompt (Flux Image Variations).") |
| | parser.add_argument("--prompt", type=str, default="", help='The text prompt for image generation (default "")') |
| | parser.add_argument("--image_prompt", type=str, default=None, |
| | help="Path to an optional image to use as a prompt") |
| | parser.add_argument("--guidance_scale", type=float, default=2, |
| | help="Guidance scale for image generation (default: 2)") |
| | parser.add_argument("--num_images", type=int, default=4, |
| | help="Number of images to generate (default: 4)") |
| | parser.add_argument("--resolution", type=int, default=512, |
| | help="Resolution for generated images (default: 512)") |
| | args = parser.parse_args() |
| |
|
| | |
| | global pipe |
| | clip = CLIPModel.from_pretrained("openai/clip-vit-large-patch14", torch_dtype=torch.bfloat16) |
| | pipe = FluxWithCFGPipeline.from_pretrained("ostris/OpenFLUX.1", text_encoder=clip, transformer=None, torch_dtype=torch.bfloat16) |
| | pipe.transformer = FluxTransformer2DModel.from_pretrained("flux-image-variations", torch_dtype=torch.bfloat16) |
| | pipe.to("cuda") |
| |
|
| | img_prompt = Image.open(args.image_prompt) if args.image_prompt else None |
| | generate(args.prompt, image_prompt=img_prompt, guidance_scale=args.guidance_scale, |
| | num_images=args.num_images, resolution=args.resolution) |
| |
|
| | if __name__ == "__main__": |
| | main() |
| |
|