ivand-all3d
/

image-variation-experiments

Model card Files Files and versions

image-variation-experiments / inference_flux.py

ivand-all3d's picture

Add flux-image-variations

e6ca313 about 1 year ago

history blame contribute delete

3.69 kB

	import argparse
	import time
	import torch
	from diffusers import FluxTransformer2DModel
	from transformers import CLIPModel
	from pathlib import Path
	from PIL import Image
	from open_flux_pipeline import FluxWithCFGPipeline

	pipe = None

	def generate(prompt, image_prompt=None, guidance_scale=2, num_images=4, resolution=512):
	# Create blank image prompt backgrounds
	image_prompt_kwargs = {
	"image_prompt": Image.new("RGB", (resolution, resolution)),
	"negative_image_prompt": Image.new("RGB", (resolution, resolution)),
	}
	if image_prompt is not None:
	image_prompt_kwargs["image_prompt"] = image_prompt

	with torch.no_grad():
	images = pipe(
	prompt=prompt,
	negative_prompt="",
	height=resolution,
	width=resolution,
	max_sequence_length=256,
	guidance_scale=guidance_scale,
	num_images_per_prompt=num_images,
	**image_prompt_kwargs
	).images

	# Concatenate all images horizontally
	widths, heights = zip(*[img.size for img in images])
	total_width = sum(widths) + len(images) - 1
	max_height = max(heights)
	out = Image.new('RGB', (total_width, max_height))
	x_offset = 0
	for img in images:
	out.paste(img, (x_offset, 0))
	x_offset += img.width + 1

	# If an image prompt was provided, stack it above the generated images
	if image_prompt is not None:
	out_with_image_prompt = Image.new('RGB', (out.width, out.height + 1 + resolution))
	resized_prompt = image_prompt.resize((resolution, resolution), Image.Resampling.BILINEAR)
	out_with_image_prompt.paste(resized_prompt, (0, 0))
	out_with_image_prompt.paste(out, (0, resolution + 1))
	out = out_with_image_prompt

	# Ensure the output directory exists and save the final image
	Path("image-outputs").mkdir(parents=True, exist_ok=True)
	output_filename = f"image-outputs/{prompt[:40].replace(' ', '_')}.{int(time.time())}.png"
	out.save(output_filename)
	print(f"Saved output to {output_filename}")

	def main():
	parser = argparse.ArgumentParser(description="Generate images using an image and a text prompt (Flux Image Variations).")
	parser.add_argument("--prompt", type=str, default="", help='The text prompt for image generation (default "")')
	parser.add_argument("--image_prompt", type=str, default=None,
	help="Path to an optional image to use as a prompt")
	parser.add_argument("--guidance_scale", type=float, default=2,
	help="Guidance scale for image generation (default: 2)")
	parser.add_argument("--num_images", type=int, default=4,
	help="Number of images to generate (default: 4)")
	parser.add_argument("--resolution", type=int, default=512,
	help="Resolution for generated images (default: 512)")
	args = parser.parse_args()

	# Load models and pipelines
	global pipe
	clip = CLIPModel.from_pretrained("openai/clip-vit-large-patch14", torch_dtype=torch.bfloat16)
	pipe = FluxWithCFGPipeline.from_pretrained("ostris/OpenFLUX.1", text_encoder=clip, transformer=None, torch_dtype=torch.bfloat16)
	pipe.transformer = FluxTransformer2DModel.from_pretrained("flux-image-variations", torch_dtype=torch.bfloat16)
	pipe.to("cuda")

	img_prompt = Image.open(args.image_prompt) if args.image_prompt else None
	generate(args.prompt, image_prompt=img_prompt, guidance_scale=args.guidance_scale,
	num_images=args.num_images, resolution=args.resolution)

	if __name__ == "__main__":
	main()