Spaces:
Sleeping
Sleeping
Commit ·
2a72dcc
1
Parent(s): 35862c8
completed text to image pipeline using SD
Browse files- app/generator.py +83 -1
- app/pipeline.py +122 -1
- app/utils/logger.py +51 -1
- requirements.txt +1 -0
app/generator.py
CHANGED
|
@@ -1 +1,83 @@
|
|
| 1 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Image generation wrapper around a loaded StableDiffusionPipeline.
|
| 2 |
+
|
| 3 |
+
Provides:
|
| 4 |
+
- generate_image(...) -> (PIL.Image, metadata)
|
| 5 |
+
- deterministic seed handling
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import time
|
| 9 |
+
from typing import Any, Dict, Optional
|
| 10 |
+
|
| 11 |
+
import torch
|
| 12 |
+
|
| 13 |
+
from app.utils.logger import get_logger
|
| 14 |
+
|
| 15 |
+
logger = get_logger(__name__)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def _validate_resolution(width: int, height: int):
|
| 19 |
+
# clamp and snap to multiples of 64 (SD requirement)
|
| 20 |
+
width = max(256, min(width, 768))
|
| 21 |
+
height = max(256, min(height, 768))
|
| 22 |
+
width = (width // 64) * 64
|
| 23 |
+
height = (height // 64) * 64
|
| 24 |
+
return int(width), int(height)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def generate_image(
|
| 28 |
+
pipe,
|
| 29 |
+
prompt: str,
|
| 30 |
+
negative_prompt: Optional[str] = None,
|
| 31 |
+
steps: int = 30,
|
| 32 |
+
guidance_scale: float = 7.5,
|
| 33 |
+
width: int = 512,
|
| 34 |
+
height: int = 512,
|
| 35 |
+
seed: Optional[int] = None,
|
| 36 |
+
device: str = "cuda",
|
| 37 |
+
):
|
| 38 |
+
"""Generate a single image and return (PIL.Image, metadata dict)."""
|
| 39 |
+
start = time.time()
|
| 40 |
+
width, height = _validate_resolution(width, height)
|
| 41 |
+
|
| 42 |
+
# Generator for reproducibility
|
| 43 |
+
if seed is None:
|
| 44 |
+
# create a new seed and use it
|
| 45 |
+
seed = int(torch.seed() & ((1 << 63) - 1))
|
| 46 |
+
gen = torch.Generator(device if device != "cpu" else "cpu").manual_seed(int(seed))
|
| 47 |
+
|
| 48 |
+
logger.info(
|
| 49 |
+
(
|
| 50 |
+
f"Generating: steps={steps}, cfg={guidance_scale},\
|
| 51 |
+
res={width}x{height}, seed={seed}"
|
| 52 |
+
)
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
# Use autocast for speed/precision management
|
| 56 |
+
device_type = "cuda" if device != "cpu" else "cpu"
|
| 57 |
+
with torch.autocast(device_type=device_type):
|
| 58 |
+
result = pipe(
|
| 59 |
+
prompt=prompt,
|
| 60 |
+
negative_prompt=negative_prompt if negative_prompt else None,
|
| 61 |
+
num_inference_steps=int(steps),
|
| 62 |
+
guidance_scale=float(guidance_scale),
|
| 63 |
+
width=width,
|
| 64 |
+
height=height,
|
| 65 |
+
generator=gen,
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
img = result.images[0] # PIL image
|
| 69 |
+
elapsed = time.time() - start
|
| 70 |
+
|
| 71 |
+
metadata: Dict[str, Any] = {
|
| 72 |
+
"prompt": prompt,
|
| 73 |
+
"negative_prompt": negative_prompt,
|
| 74 |
+
"steps": steps,
|
| 75 |
+
"guidance_scale": guidance_scale,
|
| 76 |
+
"width": width,
|
| 77 |
+
"height": height,
|
| 78 |
+
"seed": int(seed),
|
| 79 |
+
"elapsed_seconds": elapsed,
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
logger.info(f"Generation finished in {elapsed:.2f}s")
|
| 83 |
+
return img, metadata
|
app/pipeline.py
CHANGED
|
@@ -1 +1,122 @@
|
|
| 1 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Model pipeline loader for Stable Diffusion (HuggingFace Diffusers).
|
| 2 |
+
|
| 3 |
+
load_pipeline(...) returns a GPU-ready pipeline with memory optimizations.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
from typing import Optional
|
| 8 |
+
|
| 9 |
+
import torch
|
| 10 |
+
from diffusers import (
|
| 11 |
+
DPMSolverMultistepScheduler,
|
| 12 |
+
StableDiffusionPipeline,
|
| 13 |
+
)
|
| 14 |
+
from dotenv import load_dotenv
|
| 15 |
+
|
| 16 |
+
from app.utils.logger import get_logger
|
| 17 |
+
|
| 18 |
+
logger = get_logger(__name__)
|
| 19 |
+
load_dotenv()
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def _try_enable_xformers(pipe):
|
| 23 |
+
try:
|
| 24 |
+
if hasattr(pipe, "enable_xformers_memory_efficient_attention"):
|
| 25 |
+
pipe.enable_xformers_memory_efficient_attention()
|
| 26 |
+
logger.info("Enabled xFormers memory-efficient attention.")
|
| 27 |
+
else:
|
| 28 |
+
logger.info("xFormers not available via API; skipping.")
|
| 29 |
+
except Exception as err:
|
| 30 |
+
logger.info(f"xFormers not enabled: {err}")
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def load_pipeline(
|
| 34 |
+
model_id: str = "runwayml/stable-diffusion-v1-5",
|
| 35 |
+
device: str = "cuda",
|
| 36 |
+
use_fp16: bool = True,
|
| 37 |
+
enable_xformers: bool = False,
|
| 38 |
+
torch_dtype: Optional[torch.dtype] = None,
|
| 39 |
+
scheduler=None,
|
| 40 |
+
):
|
| 41 |
+
"""Load and return an optimized StableDiffusionPipeline."""
|
| 42 |
+
if torch_dtype is None:
|
| 43 |
+
torch_dtype = torch.float16 if use_fp16 and device == "cuda" else torch.float32
|
| 44 |
+
|
| 45 |
+
if scheduler is None:
|
| 46 |
+
try:
|
| 47 |
+
scheduler = DPMSolverMultistepScheduler.from_pretrained(
|
| 48 |
+
model_id,
|
| 49 |
+
subfolder="scheduler",
|
| 50 |
+
)
|
| 51 |
+
except Exception:
|
| 52 |
+
scheduler = None
|
| 53 |
+
|
| 54 |
+
logger.info(f"Loading pipeline {model_id} " f"dtype={torch_dtype} on {device} ...")
|
| 55 |
+
|
| 56 |
+
pipe = StableDiffusionPipeline.from_pretrained(
|
| 57 |
+
model_id,
|
| 58 |
+
torch_dtype=torch_dtype,
|
| 59 |
+
safety_checker=None,
|
| 60 |
+
scheduler=scheduler,
|
| 61 |
+
use_auth_token=os.getenv("HUGGINGFACE_HUB_TOKEN"),
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
pipe = pipe.to(device)
|
| 65 |
+
|
| 66 |
+
try:
|
| 67 |
+
pipe.enable_attention_slicing()
|
| 68 |
+
logger.info("Enabled attention slicing.")
|
| 69 |
+
except Exception:
|
| 70 |
+
logger.info("Attention slicing not available.")
|
| 71 |
+
|
| 72 |
+
if enable_xformers:
|
| 73 |
+
_try_enable_xformers(pipe)
|
| 74 |
+
|
| 75 |
+
try:
|
| 76 |
+
if hasattr(pipe.vae, "enable_tiling"):
|
| 77 |
+
pipe.vae.enable_tiling()
|
| 78 |
+
logger.info("Enabled VAE tiling.")
|
| 79 |
+
except Exception:
|
| 80 |
+
pass
|
| 81 |
+
|
| 82 |
+
if device == "cuda":
|
| 83 |
+
torch.backends.cudnn.benchmark = True
|
| 84 |
+
|
| 85 |
+
logger.info("Pipeline loaded.")
|
| 86 |
+
return pipe
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def warmup_pipeline(
|
| 90 |
+
pipe,
|
| 91 |
+
prompt: str = "A photo of a cat",
|
| 92 |
+
height: int = 512,
|
| 93 |
+
width: int = 512,
|
| 94 |
+
):
|
| 95 |
+
"""Run a quick inference to allocate CUDA kernels and memory."""
|
| 96 |
+
try:
|
| 97 |
+
if hasattr(pipe, "parameters"):
|
| 98 |
+
device = next(pipe.parameters()).device
|
| 99 |
+
else:
|
| 100 |
+
device = "cuda"
|
| 101 |
+
|
| 102 |
+
except Exception:
|
| 103 |
+
device = "cuda"
|
| 104 |
+
|
| 105 |
+
try:
|
| 106 |
+
gen = torch.Generator(device if device != "cpu" else "cpu").manual_seed(0)
|
| 107 |
+
|
| 108 |
+
logger.info("Warmup: running one-step inference to initialize kernels.")
|
| 109 |
+
|
| 110 |
+
_ = pipe(
|
| 111 |
+
prompt=prompt,
|
| 112 |
+
num_inference_steps=1,
|
| 113 |
+
guidance_scale=1.0,
|
| 114 |
+
height=height,
|
| 115 |
+
width=width,
|
| 116 |
+
generator=gen,
|
| 117 |
+
)
|
| 118 |
+
|
| 119 |
+
torch.cuda.empty_cache()
|
| 120 |
+
logger.info("Warmup complete.")
|
| 121 |
+
except Exception as err:
|
| 122 |
+
logger.warning(f"Warmup failed: {err}")
|
app/utils/logger.py
CHANGED
|
@@ -1 +1,51 @@
|
|
| 1 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Centralized logging utility for the project.
|
| 2 |
+
|
| 3 |
+
Features:
|
| 4 |
+
- Colored console logs
|
| 5 |
+
- File logs (logs/app.log)
|
| 6 |
+
- Timestamped + module-aware output
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import logging
|
| 10 |
+
import os
|
| 11 |
+
from logging.handlers import RotatingFileHandler
|
| 12 |
+
|
| 13 |
+
LOG_DIR = "logs"
|
| 14 |
+
LOG_FILE = os.path.join(LOG_DIR, "app.log")
|
| 15 |
+
|
| 16 |
+
os.makedirs(LOG_DIR, exist_ok=True)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def get_logger(name: str = "app", level=logging.INFO) -> logging.Logger:
|
| 20 |
+
"""Returns a configured logger instance.
|
| 21 |
+
|
| 22 |
+
Safe to call from any module.
|
| 23 |
+
"""
|
| 24 |
+
logger = logging.getLogger(name)
|
| 25 |
+
logger.setLevel(level)
|
| 26 |
+
|
| 27 |
+
if logger.hasHandlers():
|
| 28 |
+
return logger
|
| 29 |
+
|
| 30 |
+
# Console handler
|
| 31 |
+
console_handler = logging.StreamHandler()
|
| 32 |
+
console_format = (
|
| 33 |
+
"\033[36m[%(asctime)s] [%(name)s] \
|
| 34 |
+
[%(levelname)s]\033[0m "
|
| 35 |
+
"%(message)s"
|
| 36 |
+
)
|
| 37 |
+
console_handler.setFormatter(logging.Formatter(console_format, "%Y-%m-%d %H:%M:%S"))
|
| 38 |
+
|
| 39 |
+
# File handler
|
| 40 |
+
file_handler = RotatingFileHandler(
|
| 41 |
+
LOG_FILE,
|
| 42 |
+
maxBytes=5_000_000,
|
| 43 |
+
backupCount=3,
|
| 44 |
+
)
|
| 45 |
+
file_format = "[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s"
|
| 46 |
+
file_handler.setFormatter(logging.Formatter(file_format, "%Y-%m-%d %H:%M:%S"))
|
| 47 |
+
|
| 48 |
+
logger.addHandler(console_handler)
|
| 49 |
+
logger.addHandler(file_handler)
|
| 50 |
+
|
| 51 |
+
return logger
|
requirements.txt
CHANGED
|
@@ -8,6 +8,7 @@ torchaudio==2.5.1
|
|
| 8 |
|
| 9 |
|
| 10 |
# HUGGINGFACE DIFFUSION ECOSYSTEM
|
|
|
|
| 11 |
diffusers==0.26.3
|
| 12 |
transformers==4.39.3
|
| 13 |
accelerate==0.28.0
|
|
|
|
| 8 |
|
| 9 |
|
| 10 |
# HUGGINGFACE DIFFUSION ECOSYSTEM
|
| 11 |
+
huggingface_hub==0.20.3
|
| 12 |
diffusers==0.26.3
|
| 13 |
transformers==4.39.3
|
| 14 |
accelerate==0.28.0
|