"""Generate an image with the INT8 W8A8 Ideogram 4 DiT.

  python download_deps.py            # one time (gated base-repo access required)
  python usage.py "a poster that says HELLO"

Memory: the FP8 pipeline is large; on a 24 GB card you may need an offload/
sequential-load recipe (see recipe.json).
"""
import sys
import torch
from ideogram4 import Ideogram4Pipeline, Ideogram4PipelineConfig
from safetensors_loader import load_int8

WEIGHTS = "ideogram4-int8-w8a8.safetensors"
prompt = sys.argv[1] if len(sys.argv) > 1 else 'a storefront sign that says "FRESH COFFEE"'

pipe = Ideogram4Pipeline.from_pretrained(
    config=Ideogram4PipelineConfig(weights_repo="ideogram-ai/ideogram-4-fp8"),
    device="cuda", dtype=torch.bfloat16)

sw, pr = load_int8(pipe, WEIGHTS)
print(f"loaded INT8: {sw} quantized + {pr} protected linears")

img = pipe(prompt, num_steps=48, height=1024, width=1024, seed=1000)[0]
img.save("out.png")
print("saved out.png")