SanskarModi commited on
Commit
7a1d414
·
1 Parent(s): 7eb78e5
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. app/core/__init__.py +0 -1
  2. app/generator.py +0 -83
  3. app/img2img.py +0 -175
  4. app/models/__init__.py +0 -1
  5. app/models/metadata.py +0 -1
  6. app/presets/__init__.py +0 -1
  7. app/presets/styles.py +0 -1
  8. app/ui.py +0 -1
  9. app/upscaler/__init__.py +0 -1
  10. app/upscaler/realesrgan.py +0 -55
  11. app/upscaler/upscaler.py +0 -39
  12. app/utils/__init__.py +0 -1
  13. app/utils/history.py +0 -1
  14. app/utils/logger.py +0 -51
  15. app/utils/seed.py +0 -1
  16. assets/__init__.py +0 -1
  17. assets/lora/__init__.py +0 -1
  18. main.py +0 -1
  19. pyproject.toml +12 -0
  20. requirements.txt +3 -1
  21. src/assets/history/entries/30517a2b-2b3f-468d-a22c-0365852e9fd4.json +15 -0
  22. src/assets/history/entries/6c2372b4-ad89-4f9a-845d-729447fbfc42.json +15 -0
  23. src/assets/history/entries/763d106c-d607-4a42-a4c7-4264c54d0033.json +15 -0
  24. src/assets/history/entries/a6861b54-0afb-4b32-bb33-cacdadd7e639.json +15 -0
  25. src/assets/history/entries/dbc7d811-d607-4432-a10f-94245b06a629.json +15 -0
  26. src/assets/history/index.json +52 -0
  27. src/sdgen/__init__.py +5 -0
  28. src/sdgen/config/__init__.py +29 -0
  29. src/sdgen/config/paths.py +44 -0
  30. src/sdgen/config/settings.py +31 -0
  31. src/sdgen/main.py +65 -0
  32. src/sdgen/presets/__init__.py +5 -0
  33. src/sdgen/presets/styles.py +95 -0
  34. src/sdgen/sd/__init__.py +18 -0
  35. src/sdgen/sd/generator.py +76 -0
  36. src/sdgen/sd/img2img.py +136 -0
  37. src/sdgen/sd/models.py +121 -0
  38. {app → src/sdgen/sd}/pipeline.py +42 -33
  39. src/sdgen/ui/__init__.py +5 -0
  40. src/sdgen/ui/layout.py +184 -0
  41. src/sdgen/ui/tabs/__init__.py +15 -0
  42. src/sdgen/ui/tabs/history_tab.py +162 -0
  43. src/sdgen/ui/tabs/img2img_tab.py +122 -0
  44. src/sdgen/ui/tabs/presets_tab.py +119 -0
  45. src/sdgen/ui/tabs/txt2img_tab.py +112 -0
  46. src/sdgen/ui/tabs/upscaler_tab.py +36 -0
  47. {app → src/sdgen/upscaler}/__init__.py +0 -0
  48. src/sdgen/upscaler/realesrgan.py +85 -0
  49. src/sdgen/upscaler/upscaler.py +95 -0
  50. src/sdgen/utils/__init__.py +0 -0
app/core/__init__.py DELETED
@@ -1 +0,0 @@
1
- """Package initialization file for Stable Diffusion Image Generator."""
 
 
app/generator.py DELETED
@@ -1,83 +0,0 @@
1
- """Image generation wrapper around a loaded StableDiffusionPipeline.
2
-
3
- Provides:
4
- - generate_image(...) -> (PIL.Image, metadata)
5
- - deterministic seed handling
6
- """
7
-
8
- import time
9
- from typing import Any, Dict, Optional
10
-
11
- import torch
12
-
13
- from app.utils.logger import get_logger
14
-
15
- logger = get_logger(__name__)
16
-
17
-
18
- def _validate_resolution(width: int, height: int):
19
- # clamp and snap to multiples of 64 (SD requirement)
20
- width = max(256, min(width, 768))
21
- height = max(256, min(height, 768))
22
- width = (width // 64) * 64
23
- height = (height // 64) * 64
24
- return int(width), int(height)
25
-
26
-
27
- def generate_image(
28
- pipe,
29
- prompt: str,
30
- negative_prompt: Optional[str] = None,
31
- steps: int = 30,
32
- guidance_scale: float = 7.5,
33
- width: int = 512,
34
- height: int = 512,
35
- seed: Optional[int] = None,
36
- device: str = "cuda",
37
- ):
38
- """Generate a single image and return (PIL.Image, metadata dict)."""
39
- start = time.time()
40
- width, height = _validate_resolution(width, height)
41
-
42
- # Generator for reproducibility
43
- if seed is None:
44
- # create a new seed and use it
45
- seed = int(torch.seed() & ((1 << 63) - 1))
46
- gen = torch.Generator(device if device != "cpu" else "cpu").manual_seed(int(seed))
47
-
48
- logger.info(
49
- (
50
- f"Generating: steps={steps}, cfg={guidance_scale},\
51
- res={width}x{height}, seed={seed}"
52
- )
53
- )
54
-
55
- # Use autocast for speed/precision management
56
- device_type = "cuda" if device != "cpu" else "cpu"
57
- with torch.autocast(device_type=device_type):
58
- result = pipe(
59
- prompt=prompt,
60
- negative_prompt=negative_prompt if negative_prompt else None,
61
- num_inference_steps=int(steps),
62
- guidance_scale=float(guidance_scale),
63
- width=width,
64
- height=height,
65
- generator=gen,
66
- )
67
-
68
- img = result.images[0] # PIL image
69
- elapsed = time.time() - start
70
-
71
- metadata: Dict[str, Any] = {
72
- "prompt": prompt,
73
- "negative_prompt": negative_prompt,
74
- "steps": steps,
75
- "guidance_scale": guidance_scale,
76
- "width": width,
77
- "height": height,
78
- "seed": int(seed),
79
- "elapsed_seconds": elapsed,
80
- }
81
-
82
- logger.info(f"Generation finished in {elapsed:.2f}s")
83
- return img, metadata
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/img2img.py DELETED
@@ -1,175 +0,0 @@
1
- """Image-to-image generation using Stable Diffusion.
2
-
3
- This module provides:
4
- - prepare_img2img_pipeline: build an Img2Img pipeline from an existing txt2img pipe.
5
- - generate_img2img: run image-to-image generation and return (PIL.Image, metadata).
6
- """
7
-
8
- from __future__ import annotations
9
-
10
- import time
11
- from pathlib import Path
12
- from typing import Any, Dict, Optional, Union
13
-
14
- import torch
15
- from diffusers import StableDiffusionImg2ImgPipeline
16
- from PIL import Image
17
-
18
- from app.utils.logger import get_logger
19
-
20
- logger = get_logger(__name__)
21
-
22
-
23
- def _validate_resolution(width: int, height: int) -> tuple[int, int]:
24
- """Clamp resolution to a safe range and snap to multiples of 64."""
25
- width = max(256, min(width, 768))
26
- height = max(256, min(height, 768))
27
- width = (width // 64) * 64
28
- height = (height // 64) * 64
29
- return int(width), int(height)
30
-
31
-
32
- def _load_init_image(
33
- image: Union[Image.Image, str, Path],
34
- width: int,
35
- height: int,
36
- ) -> Image.Image:
37
- """Load and preprocess the init image for img2img."""
38
- if isinstance(image, (str, Path)):
39
- image = Image.open(image)
40
-
41
- if not isinstance(image, Image.Image):
42
- raise TypeError("init_image must be a PIL.Image or a valid image path.")
43
-
44
- image = image.convert("RGB")
45
- image = image.resize((width, height), resample=Image.LANCZOS)
46
- return image
47
-
48
-
49
- def prepare_img2img_pipeline(
50
- base_pipe,
51
- model_id: str = "runwayml/stable-diffusion-v1-5",
52
- ) -> StableDiffusionImg2ImgPipeline:
53
- """Create an Img2Img pipeline that shares weights with the base txt2img pipe.
54
-
55
- Tries to use StableDiffusionImg2ImgPipeline.from_pipe to reuse:
56
- - UNet
57
- - VAE
58
- - text encoder
59
- - tokenizer
60
- - scheduler
61
- """
62
- try:
63
- img2img_pipe = StableDiffusionImg2ImgPipeline.from_pipe(base_pipe)
64
- logger.info("Created Img2Img pipeline from existing base pipeline.")
65
- except Exception as err:
66
- logger.info("from_pipe failed (%s); falling back to from_pretrained.", err)
67
- img2img_pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
68
- model_id,
69
- torch_dtype=base_pipe.unet.dtype,
70
- safety_checker=None,
71
- )
72
- device = next(base_pipe.unet.parameters()).device
73
- img2img_pipe = img2img_pipe.to(device)
74
-
75
- # memory optimizations similar to txt2img pipeline
76
- try:
77
- img2img_pipe.enable_attention_slicing()
78
- logger.info("Enabled attention slicing on Img2Img pipeline.")
79
- except Exception:
80
- logger.info("Attention slicing not available on Img2Img pipeline.")
81
-
82
- try:
83
- if hasattr(img2img_pipe.vae, "enable_tiling"):
84
- img2img_pipe.vae.enable_tiling()
85
- logger.info("Enabled VAE tiling on Img2Img pipeline.")
86
- except Exception:
87
- pass
88
-
89
- return img2img_pipe
90
-
91
-
92
- def generate_img2img(
93
- pipe: StableDiffusionImg2ImgPipeline,
94
- init_image: Union[Image.Image, str, Path],
95
- prompt: str,
96
- negative_prompt: Optional[str] = None,
97
- strength: float = 0.7,
98
- steps: int = 30,
99
- guidance_scale: float = 7.5,
100
- width: int = 512,
101
- height: int = 512,
102
- seed: Optional[int] = None,
103
- device: str = "cuda",
104
- ) -> tuple[Image.Image, Dict[str, Any]]:
105
- """Run image-to-image generation.
106
-
107
- Args:
108
- pipe: A StableDiffusionImg2ImgPipeline.
109
- init_image: Base image (PIL or path).
110
- prompt: Text prompt to guide the transformation.
111
- negative_prompt: What to avoid in the output.
112
- strength: How strong the transformation is (0-1).
113
- steps: Number of inference steps.
114
- guidance_scale: Prompt adherence strength.
115
- width: Target width (snapped to 64 multiple).
116
- height: Target height (snapped to 64 multiple).
117
- seed: Optional random seed for reproducibility.
118
- device: "cuda" or "cpu".
119
-
120
- Returns:
121
- (PIL.Image, metadata dict)
122
- """
123
- if not (0.0 < strength <= 1.0):
124
- raise ValueError("strength must be in (0, 1].")
125
-
126
- start = time.time()
127
- width, height = _validate_resolution(width, height)
128
- init_image = _load_init_image(init_image, width, height)
129
-
130
- # Seed handling
131
- if seed is None:
132
- seed = int(torch.seed() & ((1 << 63) - 1))
133
-
134
- gen = torch.Generator(device if device != "cpu" else "cpu").manual_seed(int(seed))
135
-
136
- logger.info(
137
- "Img2Img: steps=%s cfg=%s strength=%.2f res=%sx%s seed=%s",
138
- steps,
139
- guidance_scale,
140
- strength,
141
- width,
142
- height,
143
- seed,
144
- )
145
-
146
- device_type = "cuda" if device != "cpu" else "cpu"
147
- with torch.autocast(device_type=device_type):
148
- result = pipe(
149
- prompt=prompt,
150
- negative_prompt=negative_prompt if negative_prompt else None,
151
- image=init_image,
152
- strength=float(strength),
153
- num_inference_steps=int(steps),
154
- guidance_scale=float(guidance_scale),
155
- generator=gen,
156
- )
157
-
158
- out_image = result.images[0]
159
- elapsed = time.time() - start
160
-
161
- metadata: Dict[str, Any] = {
162
- "mode": "img2img",
163
- "prompt": prompt,
164
- "negative_prompt": negative_prompt,
165
- "steps": steps,
166
- "guidance_scale": guidance_scale,
167
- "width": width,
168
- "height": height,
169
- "seed": int(seed),
170
- "strength": float(strength),
171
- "elapsed_seconds": elapsed,
172
- }
173
-
174
- logger.info("Img2Img finished in %.2fs", elapsed)
175
- return out_image, metadata
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/models/__init__.py DELETED
@@ -1 +0,0 @@
1
- """Package initialization file for Stable Diffusion Image Generator."""
 
 
app/models/metadata.py DELETED
@@ -1 +0,0 @@
1
- """Auto-generated placeholder module for Stable Diffusion Image Generator."""
 
 
app/presets/__init__.py DELETED
@@ -1 +0,0 @@
1
- """Package initialization file for Stable Diffusion Image Generator."""
 
 
app/presets/styles.py DELETED
@@ -1 +0,0 @@
1
- """Auto-generated placeholder module for Stable Diffusion Image Generator."""
 
 
app/ui.py DELETED
@@ -1 +0,0 @@
1
- """Auto-generated placeholder module for Stable Diffusion Image Generator."""
 
 
app/upscaler/__init__.py DELETED
@@ -1 +0,0 @@
1
- """Package initialization file for Stable Diffusion Image Generator."""
 
 
app/upscaler/realesrgan.py DELETED
@@ -1,55 +0,0 @@
1
- """NCNN RealESRGAN upscaler wrapper.
2
-
3
- This module exposes:
4
- - NCNNUpscaler: provides lightweight 2x/4x super-resolution via realesrgan-ncnn-py.
5
- """
6
-
7
- from __future__ import annotations
8
-
9
- from PIL import Image
10
- from realesrgan_ncnn_py import Realesrgan
11
-
12
- from app.utils.logger import get_logger
13
-
14
- logger = get_logger(__name__)
15
-
16
- # Supported scales mapped to internal model indices
17
- SCALE_TO_MODEL = {
18
- 2.0: 3, # realesrgan-x2plus
19
- 4.0: 0, # realesrgan-x4plus
20
- }
21
-
22
-
23
- class NCNNUpscaler:
24
- """Lightweight NCNN RealESRGAN engine using realesrgan-ncnn-py.
25
-
26
- Args:
27
- scale (float): Supported values = 2.0 or 4.0.
28
- """
29
-
30
- def __init__(self, scale: float = 2.0):
31
- """Initialize the NCNN upscaler."""
32
- if scale not in SCALE_TO_MODEL:
33
- raise ValueError("Only 2.0x and 4.0x supported for your NCNN build")
34
-
35
- self.scale = scale
36
- self.model_index = SCALE_TO_MODEL[scale]
37
-
38
- logger.info(
39
- f"[NCNN] Loading RealESRGAN model index={self.model_index} \
40
- for scale={scale}x"
41
- )
42
-
43
- self.model = Realesrgan(model=self.model_index)
44
-
45
- def upscale(self, image: Image.Image) -> Image.Image:
46
- """Upscale a PIL image using NCNN RealESRGAN."""
47
- if not isinstance(image, Image.Image):
48
- raise TypeError("Input must be a PIL.Image")
49
-
50
- logger.info(
51
- f"[NCNN] Upscaling ({image.width}x{image.height}) "
52
- f"by {self.scale}x using model={self.model_index}"
53
- )
54
-
55
- return self.model.process_pil(image)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/upscaler/upscaler.py DELETED
@@ -1,39 +0,0 @@
1
- """Unified upscaler interface.
2
-
3
- Chooses between:
4
- - NCNN RealESRGAN (fastest, works on NVIDIA/AMD/Intel)
5
- - Future SD-upscaler backend
6
- """
7
-
8
- from __future__ import annotations
9
-
10
- from PIL import Image
11
-
12
- from app.upscaler.realesrgan import NCNNUpscaler
13
- from app.utils.logger import get_logger
14
-
15
- logger = get_logger(__name__)
16
-
17
-
18
- class Upscaler:
19
- """Unified high-level upscaling wrapper."""
20
-
21
- def __init__(self, scale: float = 2.0, prefer: str = "ncnn"):
22
- """Initialize the upscaler with given backend preference."""
23
- logger.info(f"Upscaler initializing (prefer={prefer}, scale={scale})")
24
-
25
- self.engine = None
26
-
27
- if prefer in ("ncnn", "auto"):
28
- try:
29
- self.engine = NCNNUpscaler(scale=scale)
30
- logger.info("Using NCNN RealESRGAN engine.")
31
- return
32
- except Exception as err:
33
- logger.warning(f"NCNN RealESRGAN init failed: {err}")
34
-
35
- raise RuntimeError("No valid upscaler engine available.")
36
-
37
- def upscale(self, image: Image.Image) -> Image.Image:
38
- """Upscale the given image."""
39
- return self.engine.upscale(image)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/utils/__init__.py DELETED
@@ -1 +0,0 @@
1
- """Package initialization file for Stable Diffusion Image Generator."""
 
 
app/utils/history.py DELETED
@@ -1 +0,0 @@
1
- """Auto-generated placeholder module for Stable Diffusion Image Generator."""
 
 
app/utils/logger.py DELETED
@@ -1,51 +0,0 @@
1
- """Centralized logging utility for the project.
2
-
3
- Features:
4
- - Colored console logs
5
- - File logs (logs/app.log)
6
- - Timestamped + module-aware output
7
- """
8
-
9
- import logging
10
- import os
11
- from logging.handlers import RotatingFileHandler
12
-
13
- LOG_DIR = "logs"
14
- LOG_FILE = os.path.join(LOG_DIR, "app.log")
15
-
16
- os.makedirs(LOG_DIR, exist_ok=True)
17
-
18
-
19
- def get_logger(name: str = "app", level=logging.INFO) -> logging.Logger:
20
- """Returns a configured logger instance.
21
-
22
- Safe to call from any module.
23
- """
24
- logger = logging.getLogger(name)
25
- logger.setLevel(level)
26
-
27
- if logger.hasHandlers():
28
- return logger
29
-
30
- # Console handler
31
- console_handler = logging.StreamHandler()
32
- console_format = (
33
- "\033[36m[%(asctime)s] [%(name)s] \
34
- [%(levelname)s]\033[0m "
35
- "%(message)s"
36
- )
37
- console_handler.setFormatter(logging.Formatter(console_format, "%Y-%m-%d %H:%M:%S"))
38
-
39
- # File handler
40
- file_handler = RotatingFileHandler(
41
- LOG_FILE,
42
- maxBytes=5_000_000,
43
- backupCount=3,
44
- )
45
- file_format = "[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s"
46
- file_handler.setFormatter(logging.Formatter(file_format, "%Y-%m-%d %H:%M:%S"))
47
-
48
- logger.addHandler(console_handler)
49
- logger.addHandler(file_handler)
50
-
51
- return logger
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/utils/seed.py DELETED
@@ -1 +0,0 @@
1
- """Auto-generated placeholder module for Stable Diffusion Image Generator."""
 
 
assets/__init__.py DELETED
@@ -1 +0,0 @@
1
- """Package initialization file for Stable Diffusion Image Generator."""
 
 
assets/lora/__init__.py DELETED
@@ -1 +0,0 @@
1
- """Package initialization file for Stable Diffusion Image Generator."""
 
 
main.py DELETED
@@ -1 +0,0 @@
1
- """Auto-generated placeholder module for Stable Diffusion Image Generator."""
 
 
pyproject.toml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "sdgen"
3
+ version = "0.0.0"
4
+ requires-python = ">=3.10"
5
+ dependencies = []
6
+
7
+ [build-system]
8
+ requires = ["setuptools", "wheel"]
9
+ build-backend = "setuptools.build_meta"
10
+
11
+ [project.scripts]
12
+ sdgen = "sdgen.main:main"
requirements.txt CHANGED
@@ -16,7 +16,7 @@ safetensors==0.4.2
16
 
17
 
18
  # UI FRAMEWORK
19
- gradio==4.29.0
20
 
21
 
22
  # IMAGE PROCESSING & UTILITIES
@@ -36,3 +36,5 @@ realesrgan-ncnn-py==2.0.0
36
  black==24.3.0
37
  ruff==0.3.5
38
  pre-commit==3.7.0
 
 
 
16
 
17
 
18
  # UI FRAMEWORK
19
+ gradio==3.50.2
20
 
21
 
22
  # IMAGE PROCESSING & UTILITIES
 
36
  black==24.3.0
37
  ruff==0.3.5
38
  pre-commit==3.7.0
39
+
40
+ -e .
src/assets/history/entries/30517a2b-2b3f-468d-a22c-0365852e9fd4.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mode": "txt2img",
3
+ "prompt": "oil painting, impasto brush strokes, classical lighting, Rembrandt style",
4
+ "negative_prompt": "blurry, cartoonish, digital artifacts",
5
+ "steps": 40,
6
+ "guidance_scale": 8.5,
7
+ "width": 512,
8
+ "height": 768,
9
+ "seed": 7008176382479260353,
10
+ "elapsed_seconds": 20.270400285720825,
11
+ "timestamp": "2025-12-04T10:18:33.634636",
12
+ "id": "30517a2b-2b3f-468d-a22c-0365852e9fd4",
13
+ "thumbnail": "/home/sanskar-modi/current_working_personal_projects/stable-diffusion-image-generator/src/assets/history/thumbnails/30517a2b-2b3f-468d-a22c-0365852e9fd4.png",
14
+ "full_image": "/home/sanskar-modi/current_working_personal_projects/stable-diffusion-image-generator/src/assets/history/full/30517a2b-2b3f-468d-a22c-0365852e9fd4.png"
15
+ }
src/assets/history/entries/6c2372b4-ad89-4f9a-845d-729447fbfc42.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mode": "txt2img",
3
+ "prompt": "oil painting, impasto brush strokes, classical lighting, Rembrandt style",
4
+ "negative_prompt": "blurry, cartoonish, digital artifacts",
5
+ "steps": 40,
6
+ "guidance_scale": 8.5,
7
+ "width": 512,
8
+ "height": 768,
9
+ "seed": 8697126389267085321,
10
+ "elapsed_seconds": 18.847933292388916,
11
+ "timestamp": "2025-12-04T07:59:00.004141",
12
+ "id": "6c2372b4-ad89-4f9a-845d-729447fbfc42",
13
+ "thumbnail": "/home/sanskar-modi/current_working_personal_projects/stable-diffusion-image-generator/src/assets/history/thumbnails/6c2372b4-ad89-4f9a-845d-729447fbfc42.png",
14
+ "full_image": "/home/sanskar-modi/current_working_personal_projects/stable-diffusion-image-generator/src/assets/history/full/6c2372b4-ad89-4f9a-845d-729447fbfc42.png"
15
+ }
src/assets/history/entries/763d106c-d607-4a42-a4c7-4264c54d0033.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mode": "txt2img",
3
+ "prompt": "ultra realistic, 35mm photography, photorealistic, cinematic lighting",
4
+ "negative_prompt": "low quality, blurry, deformed, extra limbs",
5
+ "steps": 28,
6
+ "guidance_scale": 7.5,
7
+ "width": 512,
8
+ "height": 512,
9
+ "seed": 7647575900507438056,
10
+ "elapsed_seconds": 8.190003871917725,
11
+ "timestamp": "2025-12-04T07:58:10.667954",
12
+ "id": "763d106c-d607-4a42-a4c7-4264c54d0033",
13
+ "thumbnail": "/home/sanskar-modi/current_working_personal_projects/stable-diffusion-image-generator/src/assets/history/thumbnails/763d106c-d607-4a42-a4c7-4264c54d0033.png",
14
+ "full_image": "/home/sanskar-modi/current_working_personal_projects/stable-diffusion-image-generator/src/assets/history/full/763d106c-d607-4a42-a4c7-4264c54d0033.png"
15
+ }
src/assets/history/entries/a6861b54-0afb-4b32-bb33-cacdadd7e639.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mode": "txt2img",
3
+ "prompt": "dramatic cinematic lighting, moody, film grain, Kodak Portra, filmic color grading",
4
+ "negative_prompt": "oversaturated, low detail, flat lighting",
5
+ "steps": 30,
6
+ "guidance_scale": 7.0,
7
+ "width": 768,
8
+ "height": 512,
9
+ "seed": 2005184672833822731,
10
+ "elapsed_seconds": 16.372806072235107,
11
+ "timestamp": "2025-12-04T10:26:46.533003",
12
+ "id": "a6861b54-0afb-4b32-bb33-cacdadd7e639",
13
+ "thumbnail": "/home/sanskar-modi/current_working_personal_projects/stable-diffusion-image-generator/src/assets/history/thumbnails/a6861b54-0afb-4b32-bb33-cacdadd7e639.png",
14
+ "full_image": "/home/sanskar-modi/current_working_personal_projects/stable-diffusion-image-generator/src/assets/history/full/a6861b54-0afb-4b32-bb33-cacdadd7e639.png"
15
+ }
src/assets/history/entries/dbc7d811-d607-4432-a10f-94245b06a629.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mode": "txt2img",
3
+ "prompt": "cyberpunk city, neon reflections, wet streets, high detail, synthwave",
4
+ "negative_prompt": "low detail, daytime, blurry",
5
+ "steps": 50,
6
+ "guidance_scale": 15.0,
7
+ "width": 768,
8
+ "height": 768,
9
+ "seed": 1759688396546594556,
10
+ "elapsed_seconds": 43.618977308273315,
11
+ "timestamp": "2025-12-04T09:47:30.607670",
12
+ "id": "dbc7d811-d607-4432-a10f-94245b06a629",
13
+ "thumbnail": "/home/sanskar-modi/current_working_personal_projects/stable-diffusion-image-generator/src/assets/history/thumbnails/dbc7d811-d607-4432-a10f-94245b06a629.png",
14
+ "full_image": "/home/sanskar-modi/current_working_personal_projects/stable-diffusion-image-generator/src/assets/history/full/dbc7d811-d607-4432-a10f-94245b06a629.png"
15
+ }
src/assets/history/index.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "id": "a6861b54-0afb-4b32-bb33-cacdadd7e639",
4
+ "prompt": "dramatic cinematic lighting, moody, film grain, Kodak Portra, filmic color grading",
5
+ "mode": "txt2img",
6
+ "seed": 2005184672833822731,
7
+ "width": 768,
8
+ "height": 512,
9
+ "timestamp": "2025-12-04T10:26:46.533003",
10
+ "thumbnail": "/home/sanskar-modi/current_working_personal_projects/stable-diffusion-image-generator/src/assets/history/thumbnails/a6861b54-0afb-4b32-bb33-cacdadd7e639.png"
11
+ },
12
+ {
13
+ "id": "30517a2b-2b3f-468d-a22c-0365852e9fd4",
14
+ "prompt": "oil painting, impasto brush strokes, classical lighting, Rembrandt style",
15
+ "mode": "txt2img",
16
+ "seed": 7008176382479260353,
17
+ "width": 512,
18
+ "height": 768,
19
+ "timestamp": "2025-12-04T10:18:33.634636",
20
+ "thumbnail": "/home/sanskar-modi/current_working_personal_projects/stable-diffusion-image-generator/src/assets/history/thumbnails/30517a2b-2b3f-468d-a22c-0365852e9fd4.png"
21
+ },
22
+ {
23
+ "id": "dbc7d811-d607-4432-a10f-94245b06a629",
24
+ "prompt": "cyberpunk city, neon reflections, wet streets, high detail, synthwave",
25
+ "mode": "txt2img",
26
+ "seed": 1759688396546594556,
27
+ "width": 768,
28
+ "height": 768,
29
+ "timestamp": "2025-12-04T09:47:30.607670",
30
+ "thumbnail": "/home/sanskar-modi/current_working_personal_projects/stable-diffusion-image-generator/src/assets/history/thumbnails/dbc7d811-d607-4432-a10f-94245b06a629.png"
31
+ },
32
+ {
33
+ "id": "6c2372b4-ad89-4f9a-845d-729447fbfc42",
34
+ "prompt": "oil painting, impasto brush strokes, classical lighting, Rembrandt style",
35
+ "mode": "txt2img",
36
+ "seed": 8697126389267085321,
37
+ "width": 512,
38
+ "height": 768,
39
+ "timestamp": "2025-12-04T07:59:00.004141",
40
+ "thumbnail": "/home/sanskar-modi/current_working_personal_projects/stable-diffusion-image-generator/src/assets/history/thumbnails/6c2372b4-ad89-4f9a-845d-729447fbfc42.png"
41
+ },
42
+ {
43
+ "id": "763d106c-d607-4a42-a4c7-4264c54d0033",
44
+ "prompt": "ultra realistic, 35mm photography, photorealistic, cinematic lighting",
45
+ "mode": "txt2img",
46
+ "seed": 7647575900507438056,
47
+ "width": 512,
48
+ "height": 512,
49
+ "timestamp": "2025-12-04T07:58:10.667954",
50
+ "thumbnail": "/home/sanskar-modi/current_working_personal_projects/stable-diffusion-image-generator/src/assets/history/thumbnails/763d106c-d607-4a42-a4c7-4264c54d0033.png"
51
+ }
52
+ ]
src/sdgen/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from .main import main
4
+
5
+ __all__ = ["main"]
src/sdgen/config/__init__.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Configuration exports for the sdgen package.
2
+
3
+ This module re-exports commonly used configuration paths and settings
4
+ so they can be imported directly from `sdgen.config`.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from .paths import (
10
+ ASSETS_ROOT,
11
+ HISTORY_ENTRIES_DIR,
12
+ HISTORY_FULL_DIR,
13
+ HISTORY_ROOT,
14
+ HISTORY_THUMBS_DIR,
15
+ LOGS_ROOT,
16
+ PROJECT_ROOT,
17
+ )
18
+ from .settings import AppSettings
19
+
20
+ __all__ = [
21
+ "AppSettings",
22
+ "PROJECT_ROOT",
23
+ "ASSETS_ROOT",
24
+ "HISTORY_ROOT",
25
+ "HISTORY_ENTRIES_DIR",
26
+ "HISTORY_THUMBS_DIR",
27
+ "HISTORY_FULL_DIR",
28
+ "LOGS_ROOT",
29
+ ]
src/sdgen/config/paths.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Path configuration for sdgen.
2
+
3
+ All filesystem paths are resolved relative to the project root.
4
+ The project root is detected by walking upward until a marker
5
+ file (e.g., `pyproject.toml` or `.git`) is found.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from pathlib import Path
11
+
12
+
13
+ def _detect_project_root() -> Path:
14
+ """Return the project root by scanning upward for a marker file."""
15
+ current = Path(__file__).resolve()
16
+
17
+ for parent in current.parents:
18
+ if (parent / "pyproject.toml").exists() or (parent / ".git").exists():
19
+ return parent
20
+
21
+ # Fallback: use the last resolved parent
22
+ return current.parents[-1]
23
+
24
+
25
+ PROJECT_ROOT: Path = _detect_project_root()
26
+
27
+ ASSETS_ROOT: Path = PROJECT_ROOT / "src" / "assets"
28
+ ASSETS_ROOT.mkdir(parents=True, exist_ok=True)
29
+
30
+ HISTORY_ROOT: Path = ASSETS_ROOT / "history"
31
+ HISTORY_ENTRIES_DIR: Path = HISTORY_ROOT / "entries"
32
+ HISTORY_THUMBS_DIR: Path = HISTORY_ROOT / "thumbnails"
33
+ HISTORY_FULL_DIR: Path = HISTORY_ROOT / "full"
34
+
35
+ for p in [
36
+ HISTORY_ROOT,
37
+ HISTORY_ENTRIES_DIR,
38
+ HISTORY_THUMBS_DIR,
39
+ HISTORY_FULL_DIR,
40
+ ]:
41
+ p.mkdir(parents=True, exist_ok=True)
42
+
43
+ LOGS_ROOT: Path = PROJECT_ROOT / "logs"
44
+ LOGS_ROOT.mkdir(parents=True, exist_ok=True)
src/sdgen/config/settings.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Application runtime settings for sdgen.
2
+
3
+ AppSettings reads configuration values from environment variables at
4
+ process start and exposes them as strongly typed attributes.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import os
10
+ from dataclasses import dataclass
11
+
12
+
13
+ @dataclass
14
+ class AppSettings:
15
+ """Config values for the Stable Diffusion app.
16
+
17
+ Supported environment variables:
18
+ - MODEL_ID: HuggingFace model name
19
+ - XFORMERS: 1/0 to enable xformers
20
+ - WARMUP: 1/0 to warm up CUDA kernels
21
+ - PORT: server port for Gradio
22
+ - HOST: server host address
23
+ - SHARE: enable Gradio public sharing link
24
+ """
25
+
26
+ model_id: str = os.getenv("MODEL_ID", "runwayml/stable-diffusion-v1-5")
27
+ enable_xformers: bool = bool(int(os.getenv("XFORMERS", "0")))
28
+ warmup: bool = bool(int(os.getenv("WARMUP", "1")))
29
+ server_port: int = int(os.getenv("PORT", "7860"))
30
+ server_host: str = os.getenv("HOST", "0.0.0.0")
31
+ share: bool = bool(int(os.getenv("SHARE", "1")))
src/sdgen/main.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Main entrypoint for the Stable Diffusion application.
2
+
3
+ This module initializes the text-to-image and image-to-image pipelines,
4
+ sets up the UI, and launches the Gradio interface.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import torch
10
+ from dotenv import load_dotenv
11
+
12
+ from sdgen.config import AppSettings
13
+ from sdgen.sd.img2img import prepare_img2img_pipeline
14
+ from sdgen.sd.pipeline import load_pipeline, warmup_pipeline
15
+ from sdgen.ui import build_ui
16
+ from sdgen.utils.logger import get_logger
17
+
18
+ logger = get_logger(__name__)
19
+ load_dotenv()
20
+
21
+
22
+ def detect_device() -> str:
23
+ """Return `"cuda"` if a GPU is available, otherwise `"cpu"`.
24
+
25
+ Returns:
26
+ The selected device string.
27
+ """
28
+ if torch.cuda.is_available():
29
+ logger.info("CUDA available → using GPU")
30
+ return "cuda"
31
+
32
+ logger.warning("CUDA not detected → falling back to CPU")
33
+ return "cpu"
34
+
35
+
36
+ def main() -> None:
37
+ """Start the Stable Diffusion UI and initialize inference pipelines."""
38
+ settings = AppSettings()
39
+ model_id = settings.model_id
40
+
41
+ device = detect_device()
42
+
43
+ logger.info("Loading pipeline %s", model_id)
44
+ pipe = load_pipeline(
45
+ model_id=model_id,
46
+ device=device,
47
+ use_fp16=device == "cuda",
48
+ enable_xformers=settings.enable_xformers,
49
+ )
50
+
51
+ if device == "cuda" and settings.warmup:
52
+ warmup_pipeline(pipe)
53
+
54
+ img2img_pipe = prepare_img2img_pipeline(pipe)
55
+
56
+ demo = build_ui(pipe, img2img_pipe)
57
+ demo.launch(
58
+ server_name=settings.server_host,
59
+ server_port=settings.server_port,
60
+ share=settings.share,
61
+ )
62
+
63
+
64
+ if __name__ == "__main__":
65
+ main()
src/sdgen/presets/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from .styles import get_preset, list_presets
4
+
5
+ __all__ = ["get_preset", "list_presets"]
src/sdgen/presets/styles.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Preset configurations for text-to-image generation.
2
+
3
+ This module defines a collection of named presets including prompt,
4
+ negative prompt, sampler parameters, and recommended resolutions.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import Any, Dict, List
10
+
11
+ # Global preset registry: {preset_name: parameters}
12
+ PRESETS: Dict[str, Dict[str, Any]] = {
13
+ "Realistic Photo": {
14
+ "prompt": (
15
+ "ultra realistic, 35mm photography, \
16
+ photorealistic, "
17
+ "cinematic lighting"
18
+ ),
19
+ "negative_prompt": "low quality, blurry, deformed, extra limbs",
20
+ "steps": 28,
21
+ "guidance_scale": 7.5,
22
+ "width": 512,
23
+ "height": 512,
24
+ "note": "Natural lighting, sharp details, realistic skin texture",
25
+ "tags": ["realistic", "photo"],
26
+ },
27
+ "Anime": {
28
+ "prompt": (
29
+ "high quality anime, clean lines, vibrant colors, \
30
+ soft rim lighting, "
31
+ "studio lighting"
32
+ ),
33
+ "negative_prompt": "blurry, low detail, mutation, deformed",
34
+ "steps": 30,
35
+ "guidance_scale": 8.0,
36
+ "width": 512,
37
+ "height": 512,
38
+ "note": "Use for anime-style character generation",
39
+ "tags": ["anime", "stylized"],
40
+ },
41
+ "Cinematic / Moody": {
42
+ "prompt": (
43
+ "dramatic cinematic lighting, moody, film grain, \
44
+ Kodak Portra, "
45
+ "filmic color grading"
46
+ ),
47
+ "negative_prompt": "oversaturated, low detail, flat lighting",
48
+ "steps": 30,
49
+ "guidance_scale": 7.0,
50
+ "width": 768,
51
+ "height": 512,
52
+ "note": "Wider aspect ratio for cinematic feel",
53
+ "tags": ["cinematic", "moody"],
54
+ },
55
+ "Oil Painting / Classic Art": {
56
+ "prompt": (
57
+ "oil painting, impasto brush strokes, classical \
58
+ lighting, "
59
+ "Rembrandt style"
60
+ ),
61
+ "negative_prompt": "blurry, cartoonish, digital artifacts",
62
+ "steps": 40,
63
+ "guidance_scale": 8.5,
64
+ "width": 512,
65
+ "height": 768,
66
+ "note": "Painterly aesthetic reminiscent of classical oil art",
67
+ "tags": ["art", "oil", "painterly"],
68
+ },
69
+ "Cyberpunk / Neon": {
70
+ "prompt": (
71
+ "cyberpunk city, neon reflections, wet streets, \
72
+ high detail, "
73
+ "synthwave aesthetic"
74
+ ),
75
+ "negative_prompt": "low detail, daytime, blurry",
76
+ "steps": 30,
77
+ "guidance_scale": 7.5,
78
+ "width": 512,
79
+ "height": 768,
80
+ "note": "Vibrant neon-lit futuristic look",
81
+ "tags": ["cyberpunk", "neon"],
82
+ },
83
+ }
84
+
85
+
86
+ def get_preset(name: str) -> Dict[str, Any] | None:
87
+ """Return a shallow copy of a preset by name."""
88
+ data = PRESETS.get(name)
89
+ return dict(data) if data else None
90
+
91
+
92
+ def list_presets() -> List[str]:
93
+ """List preset names in a stable UI order."""
94
+ # Avoid unexpected reordering: use insertion order
95
+ return list(PRESETS.keys())
src/sdgen/sd/__init__.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from .generator import generate_image
4
+ from .img2img import generate_img2img, prepare_img2img_pipeline
5
+ from .models import GenerationMetadata, HistorySummary, Img2ImgConfig, Txt2ImgConfig
6
+ from .pipeline import load_pipeline, warmup_pipeline
7
+
8
+ __all__ = [
9
+ "Txt2ImgConfig",
10
+ "Img2ImgConfig",
11
+ "GenerationMetadata",
12
+ "HistorySummary",
13
+ "generate_image",
14
+ "generate_img2img",
15
+ "prepare_img2img_pipeline",
16
+ "load_pipeline",
17
+ "warmup_pipeline",
18
+ ]
src/sdgen/sd/generator.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Text-to-image generation with clean metadata output."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import time
6
+ from typing import Tuple
7
+
8
+ import torch
9
+ from PIL import Image
10
+
11
+ from sdgen.sd.models import GenerationMetadata, Txt2ImgConfig
12
+ from sdgen.utils.common import validate_resolution
13
+ from sdgen.utils.logger import get_logger
14
+
15
+ logger = get_logger(__name__)
16
+
17
+
18
+ def generate_image(
19
+ pipe: any,
20
+ cfg: Txt2ImgConfig,
21
+ ) -> Tuple[Image.Image, GenerationMetadata]:
22
+ """Generate an image from text using a Stable Diffusion pipeline.
23
+
24
+ Args:
25
+ pipe: A diffusers StableDiffusionPipeline instance.
26
+ cfg: Structured configuration for text-to-image generation.
27
+
28
+ Returns:
29
+ A tuple of (PIL image, GenerationMetadata).
30
+ """
31
+ width, height = validate_resolution(cfg.width, cfg.height)
32
+ start = time.time()
33
+
34
+ seed = cfg.seed
35
+ if seed is None:
36
+ seed = int(torch.seed() & ((1 << 63) - 1))
37
+
38
+ device = cfg.device
39
+ gen = torch.Generator("cpu" if device == "cpu" else device).manual_seed(int(seed))
40
+
41
+ logger.info(
42
+ "txt2img: steps=%s cfg=%s res=%sx%s seed=%s",
43
+ cfg.steps,
44
+ cfg.guidance_scale,
45
+ width,
46
+ height,
47
+ seed,
48
+ )
49
+
50
+ autocast_device = device if device == "cuda" else "cpu"
51
+ with torch.autocast(device_type=autocast_device):
52
+ out = pipe(
53
+ prompt=cfg.prompt,
54
+ negative_prompt=cfg.negative_prompt or None,
55
+ width=width,
56
+ height=height,
57
+ num_inference_steps=int(cfg.steps),
58
+ guidance_scale=float(cfg.guidance_scale),
59
+ generator=gen,
60
+ )
61
+
62
+ img = out.images[0]
63
+ elapsed = time.time() - start
64
+
65
+ meta = GenerationMetadata(
66
+ mode="txt2img",
67
+ prompt=cfg.prompt,
68
+ negative_prompt=cfg.negative_prompt or "",
69
+ steps=int(cfg.steps),
70
+ guidance_scale=float(cfg.guidance_scale),
71
+ width=width,
72
+ height=height,
73
+ seed=int(seed),
74
+ elapsed_seconds=float(elapsed),
75
+ )
76
+ return img, meta
src/sdgen/sd/img2img.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Img2Img pipeline setup and generation utilities."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import time
6
+
7
+ import torch
8
+ from diffusers import StableDiffusionImg2ImgPipeline
9
+ from PIL import Image
10
+
11
+ from sdgen.sd.models import GenerationMetadata, Img2ImgConfig
12
+ from sdgen.utils.common import validate_resolution
13
+ from sdgen.utils.logger import get_logger
14
+
15
+ logger = get_logger(__name__)
16
+
17
+
18
+ def prepare_img2img_pipeline(
19
+ base_pipe: StableDiffusionImg2ImgPipeline,
20
+ model_id: str = "runwayml/stable-diffusion-v1-5",
21
+ ) -> StableDiffusionImg2ImgPipeline:
22
+ """Create an Img2Img pipeline using an existing base pipeline.
23
+
24
+ Attempts `from_pipe` first for efficiency, then falls back to
25
+ a clean `from_pretrained` load if necessary.
26
+
27
+ Args:
28
+ base_pipe: Loaded text-to-image Stable Diffusion pipeline.
29
+ model_id: Fallback Hugging Face model ID.
30
+
31
+ Returns:
32
+ Configured `StableDiffusionImg2ImgPipeline`.
33
+ """
34
+ try:
35
+ pipe = StableDiffusionImg2ImgPipeline.from_pipe(base_pipe)
36
+ logger.info("Img2Img pipeline created via from_pipe().")
37
+ except Exception as exc:
38
+ logger.warning("from_pipe() failed: %s → falling back.", exc)
39
+ pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
40
+ model_id,
41
+ torch_dtype=base_pipe.unet.dtype,
42
+ safety_checker=None,
43
+ )
44
+ device = next(base_pipe.unet.parameters()).device
45
+ pipe = pipe.to(device)
46
+
47
+ # Optimizations
48
+ try:
49
+ pipe.enable_attention_slicing()
50
+ except Exception:
51
+ pass
52
+
53
+ try:
54
+ if hasattr(pipe.vae, "enable_tiling"):
55
+ pipe.vae.enable_tiling()
56
+ except Exception:
57
+ pass
58
+
59
+ return pipe
60
+
61
+
62
+ def generate_img2img(
63
+ pipe: StableDiffusionImg2ImgPipeline,
64
+ cfg: Img2ImgConfig,
65
+ init_image: Image.Image,
66
+ ) -> tuple[Image.Image, GenerationMetadata]:
67
+ """Run Img2Img generation using the configured pipeline and metadata config.
68
+
69
+ Args:
70
+ pipe: Stable Diffusion Img2Img pipeline.
71
+ cfg: Img2Img inference settings (prompt, steps, etc.).
72
+ init_image: The source image to transform.
73
+
74
+ Raises:
75
+ ValueError: If strength is outside (0, 1].
76
+
77
+ Returns:
78
+ A tuple of `(output_image, metadata)`.
79
+ """
80
+ if not (0.0 < cfg.strength <= 1.0):
81
+ raise ValueError("strength must be in (0, 1].")
82
+
83
+ width, height = validate_resolution(cfg.width, cfg.height)
84
+ start = time.time()
85
+
86
+ # Deterministic seed
87
+ seed = cfg.seed
88
+ if seed is None:
89
+ seed = int(torch.seed() & ((1 << 63) - 1))
90
+
91
+ # Resize input
92
+ init = init_image.convert("RGB").resize((width, height), Image.LANCZOS)
93
+
94
+ # Correct generator device
95
+ device = cfg.device if cfg.device in ("cuda", "cpu") else "cuda"
96
+ generator = torch.Generator(device).manual_seed(int(seed))
97
+
98
+ logger.info(
99
+ "img2img: steps=%s cfg=%s strength=%.2f res=%sx%s seed=%s",
100
+ cfg.steps,
101
+ cfg.guidance_scale,
102
+ cfg.strength,
103
+ width,
104
+ height,
105
+ seed,
106
+ )
107
+
108
+ # Autocast context
109
+ autocast_device = "cuda" if device == "cuda" else "cpu"
110
+ with torch.autocast(device_type=autocast_device):
111
+ out = pipe(
112
+ prompt=cfg.prompt,
113
+ negative_prompt=cfg.neg_prompt or None,
114
+ image=init,
115
+ strength=float(cfg.strength),
116
+ num_inference_steps=int(cfg.steps),
117
+ guidance_scale=float(cfg.guidance_scale),
118
+ generator=generator,
119
+ )
120
+
121
+ img = out.images[0]
122
+ elapsed = time.time() - start
123
+
124
+ meta = GenerationMetadata(
125
+ mode="img2img",
126
+ prompt=cfg.prompt,
127
+ negative_prompt=cfg.neg_prompt or "",
128
+ steps=int(cfg.steps),
129
+ guidance_scale=float(cfg.guidance_scale),
130
+ width=width,
131
+ height=height,
132
+ seed=int(seed),
133
+ strength=float(cfg.strength),
134
+ elapsed_seconds=float(elapsed),
135
+ )
136
+ return img, meta
src/sdgen/sd/models.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Configuration dataclasses for Stable Diffusion execution and history storage."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import asdict, dataclass, field
6
+ from datetime import datetime
7
+ from typing import Any, Dict, Optional
8
+
9
+
10
+ @dataclass
11
+ class Txt2ImgConfig:
12
+ """Configuration for text-to-image generation.
13
+
14
+ Attributes:
15
+ prompt: Positive prompt text.
16
+ negative_prompt: Negative prompt text.
17
+ steps: Number of diffusion steps.
18
+ guidance_scale: Classifier-free guidance scale.
19
+ width: Requested image width.
20
+ height: Requested image height.
21
+ seed: Optional random seed.
22
+ device: Target torch device ("cuda" or "cpu").
23
+ """
24
+
25
+ prompt: str
26
+ negative_prompt: str = ""
27
+ steps: int = 30
28
+ guidance_scale: float = 7.5
29
+ width: int = 512
30
+ height: int = 512
31
+ seed: Optional[int] = None
32
+ device: str = "cuda"
33
+
34
+
35
+ @dataclass
36
+ class Img2ImgConfig:
37
+ """Configuration for image-to-image generation.
38
+
39
+ Attributes:
40
+ prompt: Positive prompt text.
41
+ init_image_path: Optional file path to source image.
42
+ negative_prompt: Negative prompt text.
43
+ strength: Img2Img blend strength in (0, 1].
44
+ steps: Number of diffusion steps.
45
+ guidance_scale: CFG scale.
46
+ width: Requested image width.
47
+ height: Requested image height.
48
+ seed: Optional random seed.
49
+ device: Target device.
50
+ """
51
+
52
+ prompt: str
53
+ init_image_path: Optional[str] = None
54
+ negative_prompt: str = ""
55
+ strength: float = 0.7
56
+ steps: int = 30
57
+ guidance_scale: float = 7.5
58
+ width: int = 512
59
+ height: int = 512
60
+ seed: Optional[int] = None
61
+ device: str = "cuda"
62
+
63
+
64
+ @dataclass
65
+ class GenerationMetadata:
66
+ """Output metadata for a generated image.
67
+
68
+ Attributes:
69
+ mode: Generation mode ("txt2img", "img2img", "upscale", ...).
70
+ prompt: Prompt text.
71
+ negative_prompt: Negative prompt text.
72
+ steps: Number of diffusion steps.
73
+ guidance_scale: CFG scale.
74
+ width: Output width.
75
+ height: Output height.
76
+ seed: Resolved random seed.
77
+ strength: Img2Img strength; None for Txt2Img.
78
+ elapsed_seconds: Wall-clock runtime.
79
+ timestamp: UTC timestamp.
80
+ id: Unique entry ID.
81
+ thumbnail: Local thumbnail path.
82
+ full_image: Local full-size image path.
83
+ """
84
+
85
+ mode: str
86
+ prompt: str
87
+ negative_prompt: str = ""
88
+ steps: int = 30
89
+ guidance_scale: float = 7.5
90
+ width: int = 512
91
+ height: int = 512
92
+ seed: Optional[int] = None
93
+ strength: Optional[float] = None
94
+ elapsed_seconds: float = 0.0
95
+ timestamp: str = field(default_factory=lambda: datetime.utcnow().isoformat())
96
+ id: Optional[str] = None
97
+ thumbnail: Optional[str] = None
98
+ full_image: Optional[str] = None
99
+
100
+ def to_dict(self) -> Dict[str, Any]:
101
+ """Return a dict representation excluding None values."""
102
+ data = asdict(self)
103
+ return {key: value for key, value in data.items() if value is not None}
104
+
105
+
106
+ @dataclass
107
+ class HistorySummary:
108
+ """Minimal entry used for UI history lists."""
109
+
110
+ id: str
111
+ prompt: str
112
+ mode: str
113
+ seed: Optional[int]
114
+ width: int
115
+ height: int
116
+ timestamp: str
117
+ thumbnail: str
118
+
119
+ def to_dict(self) -> Dict[str, Any]:
120
+ """Return a serializable dict representation."""
121
+ return asdict(self)
{app → src/sdgen/sd}/pipeline.py RENAMED
@@ -1,33 +1,26 @@
1
- """Model pipeline loader for Stable Diffusion (HuggingFace Diffusers).
2
 
3
- load_pipeline(...) returns a GPU-ready pipeline with memory optimizations.
4
- """
5
 
6
  import os
7
  from typing import Optional
8
 
9
  import torch
10
- from diffusers import (
11
- DPMSolverMultistepScheduler,
12
- StableDiffusionPipeline,
13
- )
14
- from dotenv import load_dotenv
15
 
16
- from app.utils.logger import get_logger
17
 
18
  logger = get_logger(__name__)
19
- load_dotenv()
20
 
21
 
22
- def _try_enable_xformers(pipe):
 
23
  try:
24
  if hasattr(pipe, "enable_xformers_memory_efficient_attention"):
25
  pipe.enable_xformers_memory_efficient_attention()
26
  logger.info("Enabled xFormers memory-efficient attention.")
27
- else:
28
- logger.info("xFormers not available via API; skipping.")
29
- except Exception as err:
30
- logger.info(f"xFormers not enabled: {err}")
31
 
32
 
33
  def load_pipeline(
@@ -36,9 +29,21 @@ def load_pipeline(
36
  use_fp16: bool = True,
37
  enable_xformers: bool = False,
38
  torch_dtype: Optional[torch.dtype] = None,
39
- scheduler=None,
40
- ):
41
- """Load and return an optimized StableDiffusionPipeline."""
 
 
 
 
 
 
 
 
 
 
 
 
42
  if torch_dtype is None:
43
  torch_dtype = torch.float16 if use_fp16 and device == "cuda" else torch.float32
44
 
@@ -51,7 +56,12 @@ def load_pipeline(
51
  except Exception:
52
  scheduler = None
53
 
54
- logger.info(f"Loading pipeline {model_id} " f"dtype={torch_dtype} on {device} ...")
 
 
 
 
 
55
 
56
  pipe = StableDiffusionPipeline.from_pretrained(
57
  model_id,
@@ -59,9 +69,7 @@ def load_pipeline(
59
  safety_checker=None,
60
  scheduler=scheduler,
61
  use_auth_token=os.getenv("HUGGINGFACE_HUB_TOKEN"),
62
- )
63
-
64
- pipe = pipe.to(device)
65
 
66
  try:
67
  pipe.enable_attention_slicing()
@@ -87,36 +95,37 @@ def load_pipeline(
87
 
88
 
89
  def warmup_pipeline(
90
- pipe,
91
  prompt: str = "A photo of a cat",
92
  height: int = 512,
93
  width: int = 512,
94
- ):
95
- """Run a quick inference to allocate CUDA kernels and memory."""
96
  try:
97
  if hasattr(pipe, "parameters"):
98
  device = next(pipe.parameters()).device
99
  else:
100
  device = "cuda"
101
-
102
  except Exception:
103
  device = "cuda"
104
 
105
  try:
106
- gen = torch.Generator(device if device != "cpu" else "cpu").manual_seed(0)
 
107
 
108
  logger.info("Warmup: running one-step inference to initialize kernels.")
109
-
110
- _ = pipe(
111
  prompt=prompt,
112
  num_inference_steps=1,
113
  guidance_scale=1.0,
114
  height=height,
115
  width=width,
116
- generator=gen,
117
  )
118
 
119
- torch.cuda.empty_cache()
 
 
120
  logger.info("Warmup complete.")
121
- except Exception as err:
122
- logger.warning(f"Warmup failed: {err}")
 
1
+ """Stable Diffusion pipeline loading and warmup helpers."""
2
 
3
+ from __future__ import annotations
 
4
 
5
  import os
6
  from typing import Optional
7
 
8
  import torch
9
+ from diffusers import DPMSolverMultistepScheduler, StableDiffusionPipeline
 
 
 
 
10
 
11
+ from sdgen.utils.logger import get_logger
12
 
13
  logger = get_logger(__name__)
 
14
 
15
 
16
+ def _try_enable_xformers(pipe: StableDiffusionPipeline) -> None:
17
+ """Enable xFormers memory-efficient attention if available."""
18
  try:
19
  if hasattr(pipe, "enable_xformers_memory_efficient_attention"):
20
  pipe.enable_xformers_memory_efficient_attention()
21
  logger.info("Enabled xFormers memory-efficient attention.")
22
+ except Exception as exc:
23
+ logger.info("xFormers not enabled: %s", exc)
 
 
24
 
25
 
26
  def load_pipeline(
 
29
  use_fp16: bool = True,
30
  enable_xformers: bool = False,
31
  torch_dtype: Optional[torch.dtype] = None,
32
+ scheduler: Optional[DPMSolverMultistepScheduler] = None,
33
+ ) -> StableDiffusionPipeline:
34
+ """Load the Stable Diffusion pipeline with optional scheduler and xFormers.
35
+
36
+ Args:
37
+ model_id: HuggingFace model ID.
38
+ device: Execution device ("cuda" or "cpu").
39
+ use_fp16: Enable float16 precision on CUDA.
40
+ enable_xformers: Whether to enable xFormers attention.
41
+ torch_dtype: Explicit dtype override.
42
+ scheduler: Optional preconfigured scheduler.
43
+
44
+ Returns:
45
+ A configured `StableDiffusionPipeline` instance.
46
+ """
47
  if torch_dtype is None:
48
  torch_dtype = torch.float16 if use_fp16 and device == "cuda" else torch.float32
49
 
 
56
  except Exception:
57
  scheduler = None
58
 
59
+ logger.info(
60
+ "Loading pipeline %s dtype=%s on %s",
61
+ model_id,
62
+ torch_dtype,
63
+ device,
64
+ )
65
 
66
  pipe = StableDiffusionPipeline.from_pretrained(
67
  model_id,
 
69
  safety_checker=None,
70
  scheduler=scheduler,
71
  use_auth_token=os.getenv("HUGGINGFACE_HUB_TOKEN"),
72
+ ).to(device)
 
 
73
 
74
  try:
75
  pipe.enable_attention_slicing()
 
95
 
96
 
97
  def warmup_pipeline(
98
+ pipe: StableDiffusionPipeline,
99
  prompt: str = "A photo of a cat",
100
  height: int = 512,
101
  width: int = 512,
102
+ ) -> None:
103
+ """Run a one-step warmup pass to initialize CUDA kernels."""
104
  try:
105
  if hasattr(pipe, "parameters"):
106
  device = next(pipe.parameters()).device
107
  else:
108
  device = "cuda"
 
109
  except Exception:
110
  device = "cuda"
111
 
112
  try:
113
+ gen_device = "cpu" if str(device) == "cpu" else device
114
+ generator = torch.Generator(gen_device).manual_seed(0)
115
 
116
  logger.info("Warmup: running one-step inference to initialize kernels.")
117
+ pipe(
 
118
  prompt=prompt,
119
  num_inference_steps=1,
120
  guidance_scale=1.0,
121
  height=height,
122
  width=width,
123
+ generator=generator,
124
  )
125
 
126
+ if device == "cuda":
127
+ torch.cuda.empty_cache()
128
+
129
  logger.info("Warmup complete.")
130
+ except Exception as exc:
131
+ logger.warning("Warmup failed: %s", exc)
src/sdgen/ui/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from sdgen.ui.layout import build_ui
4
+
5
+ __all__ = ["build_ui"]
src/sdgen/ui/layout.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """UI layout builder for the Stable Diffusion Gradio app."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Dict, Tuple
6
+
7
+ import gradio as gr
8
+
9
+ from sdgen.sd.generator import generate_image
10
+ from sdgen.sd.img2img import generate_img2img
11
+ from sdgen.sd.models import Img2ImgConfig, Txt2ImgConfig
12
+ from sdgen.ui.tabs import (
13
+ build_history_tab,
14
+ build_img2img_tab,
15
+ build_presets_tab,
16
+ build_txt2img_tab,
17
+ build_upscaler_tab,
18
+ )
19
+ from sdgen.ui.tabs.img2img_tab import Img2ImgControls
20
+ from sdgen.ui.tabs.txt2img_tab import Txt2ImgControls
21
+ from sdgen.upscaler.upscaler import Upscaler
22
+ from sdgen.utils.common import pretty_json, to_pil
23
+ from sdgen.utils.history import save_history_entry
24
+ from sdgen.utils.logger import get_logger
25
+
26
+ logger = get_logger(__name__)
27
+
28
+
29
+ def _resolve_seed(value: Any) -> int | None:
30
+ """Return integer seed if valid, otherwise None."""
31
+ if value is None:
32
+ return None
33
+ if isinstance(value, int):
34
+ return value
35
+ text = str(value).strip()
36
+ if not text:
37
+ return None
38
+ try:
39
+ return int(text)
40
+ except ValueError:
41
+ logger.warning("Invalid seed input: %s", value)
42
+ return None
43
+
44
+
45
+ def _txt2img_handler(
46
+ pipe: Any,
47
+ prompt: str,
48
+ negative: str,
49
+ steps: int,
50
+ guidance: float,
51
+ width: int,
52
+ height: int,
53
+ seed: Any,
54
+ ) -> Tuple[Any, str]:
55
+ """Run text-to-image generation."""
56
+ cfg = Txt2ImgConfig(
57
+ prompt=prompt or "",
58
+ negative_prompt=negative or "",
59
+ steps=int(steps),
60
+ guidance_scale=float(guidance),
61
+ width=int(width),
62
+ height=int(height),
63
+ seed=_resolve_seed(seed),
64
+ device=pipe.device.type,
65
+ )
66
+
67
+ image, meta = generate_image(pipe, cfg)
68
+
69
+ try:
70
+ save_history_entry(meta, image)
71
+ except Exception as exc: # noqa: BLE001
72
+ logger.exception("Failed to save history entry: %s", exc)
73
+
74
+ return image, pretty_json(meta.to_dict())
75
+
76
+
77
+ def _img2img_handler(
78
+ pipe: Any,
79
+ input_image: Any,
80
+ prompt: str,
81
+ negative: str,
82
+ strength: float,
83
+ steps: int,
84
+ guidance: float,
85
+ seed: Any,
86
+ ) -> Tuple[Any, str]:
87
+ """Run image-to-image generation."""
88
+ if input_image is None:
89
+ raise gr.Error("Upload an image to continue.")
90
+
91
+ pil_image = to_pil(input_image)
92
+
93
+ cfg = Img2ImgConfig(
94
+ prompt=prompt or "",
95
+ negative_prompt=negative or "",
96
+ strength=float(strength),
97
+ steps=int(steps),
98
+ guidance_scale=float(guidance),
99
+ width=pil_image.width,
100
+ height=pil_image.height,
101
+ seed=_resolve_seed(seed),
102
+ device=pipe.device.type,
103
+ )
104
+
105
+ image, meta = generate_img2img(pipe, cfg, pil_image)
106
+
107
+ try:
108
+ save_history_entry(meta, image)
109
+ except Exception as exc: # noqa: BLE001
110
+ logger.exception("Failed to save history entry: %s", exc)
111
+
112
+ return image, pretty_json(meta.to_dict())
113
+
114
+
115
+ def _upscale_handler(
116
+ input_image: Any,
117
+ scale: str,
118
+ ) -> Tuple[Any, str]:
119
+ """Run image upscaling."""
120
+ if input_image is None:
121
+ raise gr.Error("Upload an image to continue.")
122
+
123
+ pil_image = to_pil(input_image)
124
+
125
+ # scale is str → convert to int
126
+ try:
127
+ scale_int = int(float(scale))
128
+ except Exception as exc: # noqa: BLE001
129
+ raise gr.Error("Scale must be numeric (2 or 4).") from exc
130
+
131
+ upscaler = Upscaler(scale=scale_int, prefer="ncnn")
132
+ out_image = upscaler.upscale(pil_image)
133
+
134
+ meta: Dict[str, Any] = {
135
+ "mode": "upscale",
136
+ "scale": scale_int,
137
+ "width": out_image.width,
138
+ "height": out_image.height,
139
+ }
140
+
141
+ try:
142
+ save_history_entry(meta, out_image)
143
+ except Exception as exc: # noqa: BLE001
144
+ logger.exception("Failed to save history entry: %s", exc)
145
+
146
+ return out_image, pretty_json(meta)
147
+
148
+
149
+ def build_ui(txt2img_pipe: Any, img2img_pipe: Any) -> gr.Blocks:
150
+ """Build the entire Gradio UI."""
151
+ with gr.Blocks() as demo:
152
+ gr.Markdown(
153
+ "# Stable Diffusion Generator\n"
154
+ "Clean, local Stable \
155
+ Diffusion toolkit."
156
+ )
157
+
158
+ txt_controls: Txt2ImgControls = build_txt2img_tab(
159
+ handler=lambda *args: _txt2img_handler(txt2img_pipe, *args),
160
+ )
161
+
162
+ img_controls: Img2ImgControls = build_img2img_tab(
163
+ handler=lambda *args: _img2img_handler(img2img_pipe, *args),
164
+ )
165
+
166
+ build_upscaler_tab(
167
+ handler=_upscale_handler,
168
+ )
169
+
170
+ build_presets_tab(
171
+ txt_controls=txt_controls,
172
+ img_controls=img_controls,
173
+ )
174
+
175
+ build_history_tab()
176
+
177
+ gr.Markdown(
178
+ "### Notes\n"
179
+ "- Seeds left blank will be randomized.\n"
180
+ "- Use **History → Refresh History** if new thumbnails do not appear.\n"
181
+ "- Presets apply to both **Text → Image** and **Image → Image** tabs.\n"
182
+ )
183
+
184
+ return demo
src/sdgen/ui/tabs/__init__.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from .history_tab import build_history_tab
4
+ from .img2img_tab import build_img2img_tab
5
+ from .presets_tab import build_presets_tab
6
+ from .txt2img_tab import build_txt2img_tab
7
+ from .upscaler_tab import build_upscaler_tab
8
+
9
+ __all__ = [
10
+ "build_txt2img_tab",
11
+ "build_img2img_tab",
12
+ "build_upscaler_tab",
13
+ "build_presets_tab",
14
+ "build_history_tab",
15
+ ]
src/sdgen/ui/tabs/history_tab.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """UI for History section."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Dict, List, Optional, Tuple
6
+
7
+ import gradio as gr
8
+ from PIL import Image
9
+
10
+ from sdgen.utils.common import pretty_json, short_prompt
11
+ from sdgen.utils.history import (
12
+ delete_history_entry,
13
+ list_history,
14
+ load_entry,
15
+ )
16
+ from sdgen.utils.logger import get_logger
17
+
18
+ logger = get_logger(__name__)
19
+
20
+
21
+ # Internal helpers
22
+
23
+
24
+ def _label(entry: Dict[str, Any]) -> str:
25
+ """Human-readable dropdown label."""
26
+ ts = entry.get("timestamp", "")[:19].replace("T", " ")
27
+ mode = entry.get("mode", "unknown")
28
+ prompt = short_prompt(entry.get("prompt", ""), 60)
29
+ return f"{ts} — {mode} — {prompt}" if prompt else f"{ts} — {mode}"
30
+
31
+
32
+ def _build_index(limit: int = 500) -> Tuple[List[str], List[str], List[Dict[str, Any]]]:
33
+ """Load history index → (ids, labels, raw entries)."""
34
+ entries = list_history(limit)
35
+ ids = [e.get("id", "") for e in entries]
36
+ labels = [_label(e) for e in entries]
37
+ return ids, labels, entries
38
+
39
+
40
+ def _id_from_label(label: str, entries: List[Dict[str, Any]]) -> Optional[str]:
41
+ """Resolve entry ID from label text."""
42
+ for e in entries:
43
+ if _label(e) == label:
44
+ return e.get("id")
45
+ return None
46
+
47
+
48
+ # Operations
49
+
50
+
51
+ def load_from_dropdown(selected_label: str, entries: List[Dict[str, Any]]):
52
+ """Load a history entry from dropdown."""
53
+ if not selected_label:
54
+ raise gr.Error("No entry selected.")
55
+
56
+ entry_id = _id_from_label(selected_label, entries)
57
+ if not entry_id:
58
+ raise gr.Error("Entry not found.")
59
+
60
+ data = load_entry(entry_id)
61
+ if not data:
62
+ raise gr.Error("Entry JSON missing.")
63
+
64
+ thumb_path = data.get("thumbnail")
65
+ img = Image.open(thumb_path) if thumb_path else None
66
+
67
+ # pretty_json returns string → JSON component will parse it
68
+ return img, pretty_json(data)
69
+
70
+
71
+ def refresh_history():
72
+ """Refresh dropdown + state.
73
+
74
+ Clear output.
75
+ """
76
+ _, labels, entries = _build_index()
77
+ if labels:
78
+ dd = gr.update(choices=labels, value=labels[0])
79
+ else:
80
+ dd = gr.update(choices=[], value=None)
81
+
82
+ return dd, entries, None, ""
83
+
84
+
85
+ def delete_entry(selected_label: str, entries: List[Dict[str, Any]]):
86
+ """Delete and refresh UI."""
87
+ if not selected_label:
88
+ raise gr.Error("Select an entry first.")
89
+
90
+ entry_id = _id_from_label(selected_label, entries)
91
+ if not entry_id:
92
+ raise gr.Error("Entry not found.")
93
+
94
+ ok = delete_history_entry(entry_id)
95
+ if not ok:
96
+ raise gr.Error("Delete failed.")
97
+
98
+ _, labels, new_entries = _build_index()
99
+
100
+ if labels:
101
+ dd = gr.update(choices=labels, value=labels[0])
102
+ else:
103
+ dd = gr.update(choices=[], value=None)
104
+
105
+ return None, "", dd, new_entries
106
+
107
+
108
+ # UI
109
+
110
+
111
+ def build_history_tab() -> None:
112
+ """History tab: dropdown, load button, delete, refresh."""
113
+ _, labels, entries = _build_index()
114
+ initial = labels[0] if labels else None
115
+
116
+ with gr.Tab("History"):
117
+ with gr.Row():
118
+ # Left panel: controls
119
+ with gr.Column(scale=1):
120
+ dropdown = gr.Dropdown(
121
+ label="History entries",
122
+ choices=labels,
123
+ value=initial,
124
+ interactive=True,
125
+ )
126
+
127
+ load_btn = gr.Button("Load entry")
128
+ refresh_btn = gr.Button("Refresh")
129
+ delete_btn = gr.Button("Delete selected", variant="stop")
130
+
131
+ # Right panel: output
132
+ with gr.Column(scale=2):
133
+ thumb = gr.Image(
134
+ label="Thumbnail",
135
+ show_label=True,
136
+ type="pil",
137
+ )
138
+ meta = gr.JSON(
139
+ label="Metadata",
140
+ )
141
+
142
+ state = gr.State(entries)
143
+
144
+ # Events
145
+
146
+ load_btn.click(
147
+ fn=load_from_dropdown,
148
+ inputs=[dropdown, state],
149
+ outputs=[thumb, meta],
150
+ )
151
+
152
+ refresh_btn.click(
153
+ fn=refresh_history,
154
+ inputs=None,
155
+ outputs=[dropdown, state, thumb, meta],
156
+ )
157
+
158
+ delete_btn.click(
159
+ fn=delete_entry,
160
+ inputs=[dropdown, state],
161
+ outputs=[thumb, meta, dropdown, state],
162
+ )
src/sdgen/ui/tabs/img2img_tab.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """UI for image to image generation section."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from typing import Any, Callable, Tuple
7
+
8
+ import gradio as gr
9
+
10
+
11
+ @dataclass
12
+ class Img2ImgControls:
13
+ """References to Image → Image controls used by the presets tab."""
14
+
15
+ input_image: gr.Image
16
+ prompt: gr.Textbox
17
+ negative: gr.Textbox
18
+ strength: gr.Slider
19
+ steps: gr.Slider
20
+ guidance: gr.Slider
21
+ seed: gr.Textbox
22
+
23
+
24
+ def build_img2img_tab(handler: Callable[..., Tuple[Any, dict]]) -> Img2ImgControls:
25
+ """Build the Image → Image tab and connect it to the provided handler.
26
+
27
+ Args:
28
+ handler: A callable accepting the UI inputs and returning:
29
+ (output_image, metadata_dict)
30
+
31
+ Returns:
32
+ Img2ImgControls: A container with references to UI components.
33
+ """
34
+ with gr.Tab("Image → Image"):
35
+ with gr.Row():
36
+ # Left: Controls
37
+ with gr.Column(scale=1):
38
+ input_image = gr.Image(
39
+ label="Input Image",
40
+ type="numpy",
41
+ tool="editor",
42
+ )
43
+
44
+ prompt = gr.Textbox(
45
+ label="Prompt",
46
+ placeholder="Describe desired changes...",
47
+ )
48
+
49
+ negative = gr.Textbox(
50
+ label="Negative Prompt",
51
+ placeholder="Artifacts to avoid...",
52
+ )
53
+
54
+ strength = gr.Slider(
55
+ minimum=0.1,
56
+ maximum=1.0,
57
+ value=0.6,
58
+ step=0.05,
59
+ label="Strength",
60
+ )
61
+ gr.Markdown(
62
+ "Controls how strongly the prompt \
63
+ alters the original image."
64
+ )
65
+
66
+ steps = gr.Slider(
67
+ minimum=10,
68
+ maximum=50,
69
+ value=25,
70
+ step=1,
71
+ label="Steps",
72
+ )
73
+
74
+ guidance = gr.Slider(
75
+ minimum=1,
76
+ maximum=15,
77
+ value=7.0,
78
+ step=0.5,
79
+ label="Guidance Scale",
80
+ )
81
+
82
+ seed = gr.Textbox(
83
+ label="Seed",
84
+ value="",
85
+ placeholder="Leave empty for random",
86
+ )
87
+
88
+ generate_button = gr.Button("Generate")
89
+
90
+ # Right: Output preview
91
+ with gr.Column(scale=2):
92
+ out_image = gr.Image(
93
+ label="Output",
94
+ type="pil",
95
+ )
96
+ out_metadata = gr.JSON(
97
+ label="Metadata",
98
+ )
99
+
100
+ generate_button.click(
101
+ fn=handler,
102
+ inputs=[
103
+ input_image,
104
+ prompt,
105
+ negative,
106
+ strength,
107
+ steps,
108
+ guidance,
109
+ seed,
110
+ ],
111
+ outputs=[out_image, out_metadata],
112
+ )
113
+
114
+ return Img2ImgControls(
115
+ input_image=input_image,
116
+ prompt=prompt,
117
+ negative=negative,
118
+ strength=strength,
119
+ steps=steps,
120
+ guidance=guidance,
121
+ seed=seed,
122
+ )
src/sdgen/ui/tabs/presets_tab.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """UI for presets section."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Tuple
6
+
7
+ import gradio as gr
8
+
9
+ from sdgen.presets.styles import get_preset, list_presets
10
+ from sdgen.ui.tabs.img2img_tab import Img2ImgControls
11
+ from sdgen.ui.tabs.txt2img_tab import Txt2ImgControls
12
+
13
+
14
+ def apply_preset(preset_name: Any) -> Tuple[Any, ...]:
15
+ """Return values to populate txt2img and img2img controls.
16
+
17
+ Args:
18
+ preset_name: A string or a one-element list representing the preset key.
19
+
20
+ Returns:
21
+ A tuple with values mapped to Text→Image and Image→Image UI controls.
22
+ """
23
+ # unwrap dropdown list behavior
24
+ if isinstance(preset_name, (list, tuple)):
25
+ preset_name = preset_name[0] if preset_name else None
26
+
27
+ if not preset_name:
28
+ raise gr.Error("Select a preset first.")
29
+
30
+ preset = get_preset(str(preset_name))
31
+ if preset is None:
32
+ raise gr.Error("Invalid preset selected.")
33
+
34
+ prompt = preset.get("prompt", "")
35
+ negative = preset.get("negative_prompt", "")
36
+
37
+ steps = int(preset.get("steps", 30))
38
+ guidance = float(preset.get("guidance_scale", 7.5))
39
+ width = int(preset.get("width", 512))
40
+ height = int(preset.get("height", 512))
41
+
42
+ # For Img2Img:
43
+ img_steps = max(10, steps)
44
+ img_guidance = guidance
45
+ img_strength = 0.6 # neutral default
46
+ img_seed = ""
47
+
48
+ # only return data; UI wiring chooses what to set
49
+ status_msg = f"Applied preset: {preset_name}"
50
+
51
+ return (
52
+ # txt2img
53
+ prompt,
54
+ negative,
55
+ steps,
56
+ guidance,
57
+ width,
58
+ height,
59
+ # img2img
60
+ prompt,
61
+ negative,
62
+ img_steps,
63
+ img_guidance,
64
+ img_strength,
65
+ img_seed,
66
+ # status
67
+ status_msg,
68
+ )
69
+
70
+
71
+ def build_presets_tab(
72
+ txt_controls: Txt2ImgControls,
73
+ img_controls: Img2ImgControls,
74
+ ) -> None:
75
+ """Construct the Presets tab and link values to both txt2img and img2img controls.
76
+
77
+ Args:
78
+ txt_controls: References to Text→Image input controls.
79
+ img_controls: References to Image→Image input controls.
80
+ """
81
+ with gr.Tab("Presets"):
82
+ with gr.Row():
83
+ with gr.Column():
84
+ preset_name = gr.Dropdown(
85
+ choices=list_presets(),
86
+ label="Select style",
87
+ )
88
+ apply_button = gr.Button("Apply Preset")
89
+ status_box = gr.Markdown("")
90
+
91
+ with gr.Column():
92
+ gr.Markdown(
93
+ "Applying a preset fills prompt, negative prompt, steps, "
94
+ "guidance, and resolution for both **Text → Image** "
95
+ "and **Image → Image** tabs.",
96
+ )
97
+
98
+ apply_button.click(
99
+ fn=apply_preset,
100
+ inputs=[preset_name],
101
+ outputs=[
102
+ # txt2img
103
+ txt_controls.prompt,
104
+ txt_controls.negative,
105
+ txt_controls.steps,
106
+ txt_controls.guidance,
107
+ txt_controls.width,
108
+ txt_controls.height,
109
+ # img2img
110
+ img_controls.prompt,
111
+ img_controls.negative,
112
+ img_controls.steps,
113
+ img_controls.guidance,
114
+ img_controls.strength,
115
+ img_controls.seed,
116
+ # status markdown
117
+ status_box,
118
+ ],
119
+ )
src/sdgen/ui/tabs/txt2img_tab.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """UI for text to image generation section."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from typing import Callable, Tuple
7
+
8
+ import gradio as gr
9
+
10
+
11
+ @dataclass
12
+ class Txt2ImgControls:
13
+ """UI element references for the Text → Image tab.
14
+
15
+ These allow the Presets tab to populate the fields programmatically.
16
+ """
17
+
18
+ prompt: gr.components.Textbox
19
+ negative: gr.components.Textbox
20
+ steps: gr.components.Slider
21
+ guidance: gr.components.Slider
22
+ width: gr.components.Slider
23
+ height: gr.components.Slider
24
+ seed: gr.components.Textbox
25
+
26
+
27
+ def build_txt2img_tab(handler: Callable[..., Tuple]) -> Txt2ImgControls:
28
+ """Construct the Text → Image tab and bind the Generate button.
29
+
30
+ Args:
31
+ handler: Function that performs txt2img and returns (image, metadata).
32
+
33
+ Returns:
34
+ A Txt2ImgControls instance containing references to all UI controls.
35
+ """
36
+ with gr.Tab("Text → Image"):
37
+ with gr.Row():
38
+ with gr.Column():
39
+ prompt = gr.Textbox(
40
+ label="Prompt",
41
+ placeholder="A futuristic city at dusk, cinematic lighting",
42
+ )
43
+ negative = gr.Textbox(
44
+ label="Negative prompt",
45
+ placeholder="low quality, blurry, extra limbs",
46
+ )
47
+
48
+ steps = gr.Slider(
49
+ minimum=10,
50
+ maximum=50,
51
+ value=30,
52
+ step=1,
53
+ label="Steps",
54
+ )
55
+ gr.Markdown(
56
+ "More steps → finer detail, slower runtime. 20–40 is typical.",
57
+ )
58
+
59
+ guidance = gr.Slider(
60
+ minimum=1,
61
+ maximum=15,
62
+ value=7.5,
63
+ step=0.5,
64
+ label="Guidance Scale (CFG)",
65
+ )
66
+ gr.Markdown(
67
+ "Higher values make generation match the prompt more strictly. "
68
+ "7–9 is a common range.",
69
+ )
70
+
71
+ width = gr.Slider(
72
+ minimum=256,
73
+ maximum=768,
74
+ value=512,
75
+ step=64,
76
+ label="Width",
77
+ )
78
+ height = gr.Slider(
79
+ minimum=256,
80
+ maximum=768,
81
+ value=512,
82
+ step=64,
83
+ label="Height",
84
+ )
85
+
86
+ seed = gr.Textbox(
87
+ label="Seed (optional)",
88
+ value="",
89
+ placeholder="Leave empty for random",
90
+ )
91
+
92
+ generate_button = gr.Button("Generate")
93
+
94
+ with gr.Column():
95
+ out_image = gr.Image(label="Output")
96
+ out_meta = gr.JSON(label="Metadata (JSON)")
97
+
98
+ generate_button.click(
99
+ fn=handler,
100
+ inputs=[prompt, negative, steps, guidance, width, height, seed],
101
+ outputs=[out_image, out_meta],
102
+ )
103
+
104
+ return Txt2ImgControls(
105
+ prompt=prompt,
106
+ negative=negative,
107
+ steps=steps,
108
+ guidance=guidance,
109
+ width=width,
110
+ height=height,
111
+ seed=seed,
112
+ )
src/sdgen/ui/tabs/upscaler_tab.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """UI for upscaler section."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Callable
6
+
7
+ import gradio as gr
8
+
9
+
10
+ def build_upscaler_tab(handler: Callable[..., tuple]) -> None:
11
+ """Build the Upscaler tab and wire it to the given handler."""
12
+ with gr.Tab("Upscaler"):
13
+ with gr.Row():
14
+ with gr.Column():
15
+ input_image = gr.Image(
16
+ label="Upload Image to Upscale",
17
+ type="numpy",
18
+ )
19
+ scale = gr.Radio(
20
+ choices=["2.0", "4.0"],
21
+ value="2.0",
22
+ label="Upscale Factor",
23
+ )
24
+ upscale_button = gr.Button("Upscale")
25
+
26
+ with gr.Column():
27
+ out_image = gr.Image(label="Upscaled Image")
28
+ out_meta = gr.JSON(
29
+ label="Metadata (JSON)",
30
+ )
31
+
32
+ upscale_button.click(
33
+ fn=handler,
34
+ inputs=[input_image, scale],
35
+ outputs=[out_image, out_meta],
36
+ )
{app → src/sdgen/upscaler}/__init__.py RENAMED
File without changes
src/sdgen/upscaler/realesrgan.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """NCNN RealESRGAN upscaler wrapper.
2
+
3
+ This module exposes:
4
+ - NCNNUpscaler: lightweight RealESRGAN upscaling (2× or 4×)
5
+ backed by realesrgan-ncnn-py.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import Final
11
+
12
+ from PIL import Image
13
+ from realesrgan_ncnn_py import Realesrgan
14
+
15
+ from sdgen.utils.logger import get_logger
16
+
17
+ logger = get_logger(__name__)
18
+
19
+ # Map scale → realesrgan-ncnn model index
20
+ _SCALE_MODEL_MAP: Final[dict[int, int]] = {
21
+ 2: 3, # realesrgan-x2plus
22
+ 4: 0, # realesrgan-x4plus
23
+ }
24
+
25
+
26
+ class NCNNUpscaler:
27
+ """NCNN RealESRGAN engine using realesrgan-ncnn-py.
28
+
29
+ This class provides 2× or 4× super-resolution on CPU/GPU
30
+ without requiring the full PyTorch RealESRGAN stack.
31
+
32
+ Args:
33
+ scale: Target scale factor. Valid values: 2 or 4.
34
+
35
+ Raises:
36
+ ValueError: If an unsupported scale is provided.
37
+ RuntimeError: If the model cannot be loaded.
38
+ """
39
+
40
+ def __init__(self, scale: int = 2) -> None:
41
+ """Initialize realesrgan."""
42
+ if scale not in _SCALE_MODEL_MAP:
43
+ msg = "Scale must be 2 or 4 for NCNN RealESRGAN, got: %s"
44
+ raise ValueError(msg % scale)
45
+
46
+ self.scale: int = scale
47
+ model_index = _SCALE_MODEL_MAP[scale]
48
+
49
+ logger.info(
50
+ "Initializing NCNN RealESRGAN (scale=%s, model_index=%s)",
51
+ scale,
52
+ model_index,
53
+ )
54
+
55
+ try:
56
+ self.model = Realesrgan(model=model_index)
57
+ except Exception as exc: # noqa: BLE001
58
+ msg = "Failed to initialize Realesrgan engine: %s"
59
+ logger.error(msg, exc)
60
+ raise RuntimeError(msg % exc) from exc
61
+
62
+ def upscale(self, image: Image.Image) -> Image.Image:
63
+ """Upscale a PIL image using the NCNN RealESRGAN engine.
64
+
65
+ Args:
66
+ image: A PIL.Image instance.
67
+
68
+ Returns:
69
+ The upscaled PIL.Image.
70
+
71
+ Raises:
72
+ TypeError: If the input is not a PIL.Image.
73
+ """
74
+ if not isinstance(image, Image.Image):
75
+ msg = "Input must be a PIL.Image, got: %s"
76
+ raise TypeError(msg % type(image).__name__)
77
+
78
+ logger.info(
79
+ "Upscaling image (%sx%s) by %sx",
80
+ image.width,
81
+ image.height,
82
+ self.scale,
83
+ )
84
+
85
+ return self.model.process_pil(image)
src/sdgen/upscaler/upscaler.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Unified interface for image upscaling.
2
+
3
+ This module selects an upscaling backend at runtime.
4
+ Currently supported:
5
+ - NCNN RealESRGAN (recommended)
6
+
7
+ Planned:
8
+ - Stable Diffusion-based upscaler
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from typing import Optional
14
+
15
+ from PIL import Image
16
+
17
+ from sdgen.upscaler.realesrgan import NCNNUpscaler
18
+ from sdgen.utils.logger import get_logger
19
+
20
+ logger = get_logger(__name__)
21
+
22
+
23
+ class Upscaler:
24
+ """Unified high-level upscaler wrapper.
25
+
26
+ Args:
27
+ scale: Target scale factor. Typically 2 or 4.
28
+ prefer: Preferred backend name:
29
+ - "ncnn": NCNN RealESRGAN (local, fast)
30
+ - "auto": Try known engines in order
31
+
32
+ Raises:
33
+ RuntimeError: If no backend could be initialized.
34
+ ValueError: Invalid scale value given.
35
+ """
36
+
37
+ _VALID_SCALES = {2, 4}
38
+ _BACKENDS_ORDER = ("ncnn",)
39
+
40
+ def __init__(self, scale: float = 2.0, prefer: str = "ncnn") -> None:
41
+ """Initialize upscaler class."""
42
+ if int(scale) not in self._VALID_SCALES:
43
+ msg = "Scale must be 2 or 4 for RealESRGAN. Got: %s"
44
+ raise ValueError(msg % scale)
45
+
46
+ self.scale = int(scale)
47
+ self.engine: Optional[object] = None
48
+
49
+ logger.info("Upscaler init (prefer=%s, scale=%s)", prefer, self.scale)
50
+
51
+ if prefer == "auto":
52
+ self._init_auto()
53
+ elif prefer == "ncnn":
54
+ self._init_ncnn()
55
+ else:
56
+ msg = "Unknown upscaler backend: %s"
57
+ raise ValueError(msg % prefer)
58
+
59
+ if self.engine is None:
60
+ raise RuntimeError("No valid upscaler engine available.")
61
+
62
+ def _init_auto(self) -> None:
63
+ """Try available engines in priority order."""
64
+ for backend in self._BACKENDS_ORDER:
65
+ try:
66
+ if backend == "ncnn":
67
+ self._init_ncnn()
68
+ return
69
+ except Exception as err: # noqa: BLE001
70
+ logger.warning("Upscaler init failed (%s): %s", backend, err)
71
+
72
+ def _init_ncnn(self) -> None:
73
+ """Initialize RealESRGAN NCNN backend."""
74
+ try:
75
+ self.engine = NCNNUpscaler(scale=self.scale)
76
+ logger.info("Using NCNN RealESRGAN engine.")
77
+ except Exception as err: # noqa: BLE001
78
+ logger.warning("NCNN RealESRGAN init failed: %s", err)
79
+ self.engine = None
80
+
81
+ def upscale(self, image: Image.Image) -> Image.Image:
82
+ """Upscale the given image.
83
+
84
+ Args:
85
+ image: Input PIL image.
86
+
87
+ Returns:
88
+ The upscaled PIL image.
89
+
90
+ Raises:
91
+ RuntimeError: If the engine is not initialized.
92
+ """
93
+ if self.engine is None:
94
+ raise RuntimeError("Upscaler is not initialized.")
95
+ return self.engine.upscale(image)
src/sdgen/utils/__init__.py ADDED
File without changes