Spaces:

SanskarModi
/

sd-image-gen-toolkit

Sleeping

App Files Files Community

sd-image-gen-toolkit / src /sdgen /sd /generator.py

SanskarModi

updated app

9bc957e 2 months ago

raw

history blame contribute delete

2.03 kB

	"""Text-to-image generation with clean metadata output."""

	from __future__ import annotations

	import time
	from typing import Tuple

	import torch
	from PIL import Image

	from sdgen.sd.models import GenerationMetadata, Txt2ImgConfig
	from sdgen.utils.common import validate_resolution
	from sdgen.utils.logger import get_logger

	logger = get_logger(__name__)


	def generate_image(
	pipe: any,
	cfg: Txt2ImgConfig,
	) -> Tuple[Image.Image, GenerationMetadata]:
	"""Generate an image from text using a Stable Diffusion pipeline.

	Args:
	pipe: A diffusers StableDiffusionPipeline instance.
	cfg: Structured configuration for text-to-image generation.

	Returns:
	A tuple of (PIL image, GenerationMetadata).
	"""
	width, height = validate_resolution(cfg.width, cfg.height)
	start = time.time()

	seed = cfg.seed
	if seed is None:
	seed = int(torch.seed() & ((1 << 63) - 1))

	device = cfg.device
	gen = torch.Generator("cpu" if device == "cpu" else device).manual_seed(int(seed))

	logger.info(
	"txt2img: steps=%s cfg=%s res=%sx%s seed=%s",
	cfg.steps,
	cfg.guidance_scale,
	width,
	height,
	seed,
	)

	autocast_device = device if device == "cuda" else "cpu"
	with torch.autocast(device_type=autocast_device):
	out = pipe(
	prompt=cfg.prompt,
	negative_prompt=cfg.negative_prompt or None,
	width=width,
	height=height,
	num_inference_steps=int(cfg.steps),
	guidance_scale=float(cfg.guidance_scale),
	generator=gen,
	)

	img = out.images[0]
	elapsed = time.time() - start

	meta = GenerationMetadata(
	mode="txt2img",
	prompt=cfg.prompt,
	negative_prompt=cfg.negative_prompt or "",
	steps=int(cfg.steps),
	guidance_scale=float(cfg.guidance_scale),
	width=width,
	height=height,
	seed=int(seed),
	elapsed_seconds=float(elapsed),
	)
	return img, meta