Spaces:
Sleeping
Sleeping
File size: 2,029 Bytes
7a1d414 9bc957e 7a1d414 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
"""Text-to-image generation with clean metadata output."""
from __future__ import annotations
import time
from typing import Tuple
import torch
from PIL import Image
from sdgen.sd.models import GenerationMetadata, Txt2ImgConfig
from sdgen.utils.common import validate_resolution
from sdgen.utils.logger import get_logger
logger = get_logger(__name__)
def generate_image(
pipe: any,
cfg: Txt2ImgConfig,
) -> Tuple[Image.Image, GenerationMetadata]:
"""Generate an image from text using a Stable Diffusion pipeline.
Args:
pipe: A diffusers StableDiffusionPipeline instance.
cfg: Structured configuration for text-to-image generation.
Returns:
A tuple of (PIL image, GenerationMetadata).
"""
width, height = validate_resolution(cfg.width, cfg.height)
start = time.time()
seed = cfg.seed
if seed is None:
seed = int(torch.seed() & ((1 << 63) - 1))
device = cfg.device
gen = torch.Generator("cpu" if device == "cpu" else device).manual_seed(int(seed))
logger.info(
"txt2img: steps=%s cfg=%s res=%sx%s seed=%s",
cfg.steps,
cfg.guidance_scale,
width,
height,
seed,
)
autocast_device = device if device == "cuda" else "cpu"
with torch.autocast(device_type=autocast_device):
out = pipe(
prompt=cfg.prompt,
negative_prompt=cfg.negative_prompt or None,
width=width,
height=height,
num_inference_steps=int(cfg.steps),
guidance_scale=float(cfg.guidance_scale),
generator=gen,
)
img = out.images[0]
elapsed = time.time() - start
meta = GenerationMetadata(
mode="txt2img",
prompt=cfg.prompt,
negative_prompt=cfg.negative_prompt or "",
steps=int(cfg.steps),
guidance_scale=float(cfg.guidance_scale),
width=width,
height=height,
seed=int(seed),
elapsed_seconds=float(elapsed),
)
return img, meta
|