Spaces:
Sleeping
Sleeping
| """Text-to-image generation with clean metadata output.""" | |
| from __future__ import annotations | |
| import time | |
| from typing import Tuple | |
| import torch | |
| from PIL import Image | |
| from sdgen.sd.models import GenerationMetadata, Txt2ImgConfig | |
| from sdgen.utils.common import validate_resolution | |
| from sdgen.utils.logger import get_logger | |
| logger = get_logger(__name__) | |
| def generate_image( | |
| pipe: any, | |
| cfg: Txt2ImgConfig, | |
| ) -> Tuple[Image.Image, GenerationMetadata]: | |
| """Generate an image from text using a Stable Diffusion pipeline. | |
| Args: | |
| pipe: A diffusers StableDiffusionPipeline instance. | |
| cfg: Structured configuration for text-to-image generation. | |
| Returns: | |
| A tuple of (PIL image, GenerationMetadata). | |
| """ | |
| width, height = validate_resolution(cfg.width, cfg.height) | |
| start = time.time() | |
| seed = cfg.seed | |
| if seed is None: | |
| seed = int(torch.seed() & ((1 << 63) - 1)) | |
| device = cfg.device | |
| gen = torch.Generator("cpu" if device == "cpu" else device).manual_seed(int(seed)) | |
| logger.info( | |
| "txt2img: steps=%s cfg=%s res=%sx%s seed=%s", | |
| cfg.steps, | |
| cfg.guidance_scale, | |
| width, | |
| height, | |
| seed, | |
| ) | |
| autocast_device = device if device == "cuda" else "cpu" | |
| with torch.autocast(device_type=autocast_device): | |
| out = pipe( | |
| prompt=cfg.prompt, | |
| negative_prompt=cfg.negative_prompt or None, | |
| width=width, | |
| height=height, | |
| num_inference_steps=int(cfg.steps), | |
| guidance_scale=float(cfg.guidance_scale), | |
| generator=gen, | |
| ) | |
| img = out.images[0] | |
| elapsed = time.time() - start | |
| meta = GenerationMetadata( | |
| mode="txt2img", | |
| prompt=cfg.prompt, | |
| negative_prompt=cfg.negative_prompt or "", | |
| steps=int(cfg.steps), | |
| guidance_scale=float(cfg.guidance_scale), | |
| width=width, | |
| height=height, | |
| seed=int(seed), | |
| elapsed_seconds=float(elapsed), | |
| ) | |
| return img, meta | |