import torch
from diffusers import FluxPipeline
from huggingface_hub import login

# Authenticate (required for gated models)
login(token="hf_yourtokenhere")  # Replace with your token

# Load the model (use bfloat16 for faster inference + less VRAM)
pipe = FluxPipeline.from_pretrained(
    "black-forest-labs/FLUX.1-schnell",
    torch_dtype=torch.bfloat16
)

# Enable CPU offloading if you have limited GPU memory
pipe.enable_model_cpu_offload()

# Generate an image
prompt = "A cat holding a sign that says hello world"
image = pipe(
    prompt,
    guidance_scale=0.0,
    num_inference_steps=4,
    max_sequence_length=256,
    generator=torch.Generator("cpu").manual_seed(0)
).images[0]

# Save the output
image.save("flux-schnell-output.png")
print("Image saved successfully!")