SanskarModi commited on
Commit
2a72dcc
·
1 Parent(s): 35862c8

completed text to image pipeline using SD

Browse files
Files changed (4) hide show
  1. app/generator.py +83 -1
  2. app/pipeline.py +122 -1
  3. app/utils/logger.py +51 -1
  4. requirements.txt +1 -0
app/generator.py CHANGED
@@ -1 +1,83 @@
1
- """Auto-generated placeholder module for Stable Diffusion Image Generator."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Image generation wrapper around a loaded StableDiffusionPipeline.
2
+
3
+ Provides:
4
+ - generate_image(...) -> (PIL.Image, metadata)
5
+ - deterministic seed handling
6
+ """
7
+
8
+ import time
9
+ from typing import Any, Dict, Optional
10
+
11
+ import torch
12
+
13
+ from app.utils.logger import get_logger
14
+
15
+ logger = get_logger(__name__)
16
+
17
+
18
+ def _validate_resolution(width: int, height: int):
19
+ # clamp and snap to multiples of 64 (SD requirement)
20
+ width = max(256, min(width, 768))
21
+ height = max(256, min(height, 768))
22
+ width = (width // 64) * 64
23
+ height = (height // 64) * 64
24
+ return int(width), int(height)
25
+
26
+
27
+ def generate_image(
28
+ pipe,
29
+ prompt: str,
30
+ negative_prompt: Optional[str] = None,
31
+ steps: int = 30,
32
+ guidance_scale: float = 7.5,
33
+ width: int = 512,
34
+ height: int = 512,
35
+ seed: Optional[int] = None,
36
+ device: str = "cuda",
37
+ ):
38
+ """Generate a single image and return (PIL.Image, metadata dict)."""
39
+ start = time.time()
40
+ width, height = _validate_resolution(width, height)
41
+
42
+ # Generator for reproducibility
43
+ if seed is None:
44
+ # create a new seed and use it
45
+ seed = int(torch.seed() & ((1 << 63) - 1))
46
+ gen = torch.Generator(device if device != "cpu" else "cpu").manual_seed(int(seed))
47
+
48
+ logger.info(
49
+ (
50
+ f"Generating: steps={steps}, cfg={guidance_scale},\
51
+ res={width}x{height}, seed={seed}"
52
+ )
53
+ )
54
+
55
+ # Use autocast for speed/precision management
56
+ device_type = "cuda" if device != "cpu" else "cpu"
57
+ with torch.autocast(device_type=device_type):
58
+ result = pipe(
59
+ prompt=prompt,
60
+ negative_prompt=negative_prompt if negative_prompt else None,
61
+ num_inference_steps=int(steps),
62
+ guidance_scale=float(guidance_scale),
63
+ width=width,
64
+ height=height,
65
+ generator=gen,
66
+ )
67
+
68
+ img = result.images[0] # PIL image
69
+ elapsed = time.time() - start
70
+
71
+ metadata: Dict[str, Any] = {
72
+ "prompt": prompt,
73
+ "negative_prompt": negative_prompt,
74
+ "steps": steps,
75
+ "guidance_scale": guidance_scale,
76
+ "width": width,
77
+ "height": height,
78
+ "seed": int(seed),
79
+ "elapsed_seconds": elapsed,
80
+ }
81
+
82
+ logger.info(f"Generation finished in {elapsed:.2f}s")
83
+ return img, metadata
app/pipeline.py CHANGED
@@ -1 +1,122 @@
1
- """Auto-generated placeholder module for Stable Diffusion Image Generator."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Model pipeline loader for Stable Diffusion (HuggingFace Diffusers).
2
+
3
+ load_pipeline(...) returns a GPU-ready pipeline with memory optimizations.
4
+ """
5
+
6
+ import os
7
+ from typing import Optional
8
+
9
+ import torch
10
+ from diffusers import (
11
+ DPMSolverMultistepScheduler,
12
+ StableDiffusionPipeline,
13
+ )
14
+ from dotenv import load_dotenv
15
+
16
+ from app.utils.logger import get_logger
17
+
18
+ logger = get_logger(__name__)
19
+ load_dotenv()
20
+
21
+
22
+ def _try_enable_xformers(pipe):
23
+ try:
24
+ if hasattr(pipe, "enable_xformers_memory_efficient_attention"):
25
+ pipe.enable_xformers_memory_efficient_attention()
26
+ logger.info("Enabled xFormers memory-efficient attention.")
27
+ else:
28
+ logger.info("xFormers not available via API; skipping.")
29
+ except Exception as err:
30
+ logger.info(f"xFormers not enabled: {err}")
31
+
32
+
33
+ def load_pipeline(
34
+ model_id: str = "runwayml/stable-diffusion-v1-5",
35
+ device: str = "cuda",
36
+ use_fp16: bool = True,
37
+ enable_xformers: bool = False,
38
+ torch_dtype: Optional[torch.dtype] = None,
39
+ scheduler=None,
40
+ ):
41
+ """Load and return an optimized StableDiffusionPipeline."""
42
+ if torch_dtype is None:
43
+ torch_dtype = torch.float16 if use_fp16 and device == "cuda" else torch.float32
44
+
45
+ if scheduler is None:
46
+ try:
47
+ scheduler = DPMSolverMultistepScheduler.from_pretrained(
48
+ model_id,
49
+ subfolder="scheduler",
50
+ )
51
+ except Exception:
52
+ scheduler = None
53
+
54
+ logger.info(f"Loading pipeline {model_id} " f"dtype={torch_dtype} on {device} ...")
55
+
56
+ pipe = StableDiffusionPipeline.from_pretrained(
57
+ model_id,
58
+ torch_dtype=torch_dtype,
59
+ safety_checker=None,
60
+ scheduler=scheduler,
61
+ use_auth_token=os.getenv("HUGGINGFACE_HUB_TOKEN"),
62
+ )
63
+
64
+ pipe = pipe.to(device)
65
+
66
+ try:
67
+ pipe.enable_attention_slicing()
68
+ logger.info("Enabled attention slicing.")
69
+ except Exception:
70
+ logger.info("Attention slicing not available.")
71
+
72
+ if enable_xformers:
73
+ _try_enable_xformers(pipe)
74
+
75
+ try:
76
+ if hasattr(pipe.vae, "enable_tiling"):
77
+ pipe.vae.enable_tiling()
78
+ logger.info("Enabled VAE tiling.")
79
+ except Exception:
80
+ pass
81
+
82
+ if device == "cuda":
83
+ torch.backends.cudnn.benchmark = True
84
+
85
+ logger.info("Pipeline loaded.")
86
+ return pipe
87
+
88
+
89
+ def warmup_pipeline(
90
+ pipe,
91
+ prompt: str = "A photo of a cat",
92
+ height: int = 512,
93
+ width: int = 512,
94
+ ):
95
+ """Run a quick inference to allocate CUDA kernels and memory."""
96
+ try:
97
+ if hasattr(pipe, "parameters"):
98
+ device = next(pipe.parameters()).device
99
+ else:
100
+ device = "cuda"
101
+
102
+ except Exception:
103
+ device = "cuda"
104
+
105
+ try:
106
+ gen = torch.Generator(device if device != "cpu" else "cpu").manual_seed(0)
107
+
108
+ logger.info("Warmup: running one-step inference to initialize kernels.")
109
+
110
+ _ = pipe(
111
+ prompt=prompt,
112
+ num_inference_steps=1,
113
+ guidance_scale=1.0,
114
+ height=height,
115
+ width=width,
116
+ generator=gen,
117
+ )
118
+
119
+ torch.cuda.empty_cache()
120
+ logger.info("Warmup complete.")
121
+ except Exception as err:
122
+ logger.warning(f"Warmup failed: {err}")
app/utils/logger.py CHANGED
@@ -1 +1,51 @@
1
- """Auto-generated placeholder module for Stable Diffusion Image Generator."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Centralized logging utility for the project.
2
+
3
+ Features:
4
+ - Colored console logs
5
+ - File logs (logs/app.log)
6
+ - Timestamped + module-aware output
7
+ """
8
+
9
+ import logging
10
+ import os
11
+ from logging.handlers import RotatingFileHandler
12
+
13
+ LOG_DIR = "logs"
14
+ LOG_FILE = os.path.join(LOG_DIR, "app.log")
15
+
16
+ os.makedirs(LOG_DIR, exist_ok=True)
17
+
18
+
19
+ def get_logger(name: str = "app", level=logging.INFO) -> logging.Logger:
20
+ """Returns a configured logger instance.
21
+
22
+ Safe to call from any module.
23
+ """
24
+ logger = logging.getLogger(name)
25
+ logger.setLevel(level)
26
+
27
+ if logger.hasHandlers():
28
+ return logger
29
+
30
+ # Console handler
31
+ console_handler = logging.StreamHandler()
32
+ console_format = (
33
+ "\033[36m[%(asctime)s] [%(name)s] \
34
+ [%(levelname)s]\033[0m "
35
+ "%(message)s"
36
+ )
37
+ console_handler.setFormatter(logging.Formatter(console_format, "%Y-%m-%d %H:%M:%S"))
38
+
39
+ # File handler
40
+ file_handler = RotatingFileHandler(
41
+ LOG_FILE,
42
+ maxBytes=5_000_000,
43
+ backupCount=3,
44
+ )
45
+ file_format = "[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s"
46
+ file_handler.setFormatter(logging.Formatter(file_format, "%Y-%m-%d %H:%M:%S"))
47
+
48
+ logger.addHandler(console_handler)
49
+ logger.addHandler(file_handler)
50
+
51
+ return logger
requirements.txt CHANGED
@@ -8,6 +8,7 @@ torchaudio==2.5.1
8
 
9
 
10
  # HUGGINGFACE DIFFUSION ECOSYSTEM
 
11
  diffusers==0.26.3
12
  transformers==4.39.3
13
  accelerate==0.28.0
 
8
 
9
 
10
  # HUGGINGFACE DIFFUSION ECOSYSTEM
11
+ huggingface_hub==0.20.3
12
  diffusers==0.26.3
13
  transformers==4.39.3
14
  accelerate==0.28.0