Spaces:

SanskarModi
/

sd-image-gen-toolkit

Running

App Files Files Community

SanskarModi commited on Dec 8, 2025

Commit

600587b

1 Parent(s): 09a9be4

updated code to auto downlaod loras

Browse files

Files changed (6) hide show

.gitignore +214 -0
README.md +128 -240
src/sdgen/config/__init__.py +2 -0
src/sdgen/config/lora_urls.py +21 -0
src/sdgen/main.py +8 -2
src/sdgen/utils/lora_downloader.py +58 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,214 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[codz]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py.cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+#poetry.toml
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
+#pdm.lock
+#pdm.toml
+.pdm-python
+.pdm-build/
+# pixi
+#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
+#pixi.lock
+#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
+#   in the .venv directory. It is recommended not to include this directory in version control.
+.pixi
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.envrc
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Abstra
+# Abstra is an AI-powered process automation framework.
+# Ignore directories containing user credentials, local state, and settings.
+# Learn more at https://abstra.io/docs
+.abstra/
+# Visual Studio Code
+#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
+#  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+#  and can be added to the global gitignore or merged into this file. However, if you prefer,
+#  you could uncomment the following to ignore the entire vscode folder
+# .vscode/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc
+# Cursor
+#  Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
+#  exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
+#  refer to https://docs.cursor.com/context/ignore-files
+.cursorignore
+.cursorindexingignore
+# Marimo
+marimo/_static/
+marimo/_lsp/
+__marimo__/
+src/assets
+*.ckpt
+*.safetensors
+*.pt
+*.bin
+*.pth

README.md CHANGED Viewed

@@ -5,162 +5,179 @@ sdk: gradio
 sdk_version: 6.0.2
 ---
-# Stable Diffusion Image Generator Toolkit
 ![appdemo](https://drive.google.com/uc?export=view&id=1dO2bnYmEEj3fNU0-dV692icUPSwyP93G)
-A modular image generation system built on **HuggingFace Diffusers**, with support for multiple Stable Diffusion pipelines, configurable inference parameters, a clean **Gradio UI**, and a lightweight local **history/metadata store**.
-The system supports **text-to-image**, **image-to-image**, and **super-resolution upscaling** using **Real-ESRGAN (NCNN)**.
-Designed with a focus on **extensibility**, **clean code**, and **practical deployment constraints** (CPU or low-memory environments).
-[Visit App](https://huggingface.co/spaces/SanskarModi/sd-image-gen-toolkit)
 ---
-# Core Features
-## Text-to-Image Generation
-* Stable Diffusion pipelines (SD 1.5, Turbo)
-* Adjustable **CFG scale**, **inference steps**, resolution, and seed
-* Structured metadata (JSON) for reproducibility
-* Style presets with recommended parameters
-## Image-to-Image (Img2Img)
-* Pipeline reuse to avoid model reload cost
-* Alpha-preserving prompt transforms
-* Configurable denoising strength
-* Deterministic or stochastic sampling
-## Upscaling (Real-ESRGAN NCNN)
-* Lightweight **NCNN backend** (GPU not required)
-* Supports 2× and 4× scaling
-* Optional SD-upscaler backend planned
-* Minimal dependencies, fast on CPU
-## Prompt History & Metadata Tracking
-* Local metadata index with atomic writes
-* Thumbnail + full-size image storage
-* JSON schema for portability
-* History browser UI
-## Multi-Model Runtime Switching
-* Multiple pipelines loaded once
-* Selection at inference without reload
-* Shared tokenizer/encoder where possible
-* Warm-up logic for fast Turbo inference
 ---
-# Architecture Overview
 ```
-src/sdgen/
-│
-├── sd/
-│   ├── pipeline.py          # pipeline loader, warmup, dtype/device logic
-│   ├── generator.py         # text-to-image
-│   ├── img2img.py           # image-to-image
-│   └── models.py            # config/metadata dataclasses
 │
-├── ui/
-│   ├── layout.py            # top-level UI composition
-│   └── tabs/                # individual UI components
 │
-├── presets/
-│   └── styles.py            # curated style presets
 │
-├── upscaler/
-│   └── realesrgan.py        # NCNN Real-ESRGAN backend
 │
-├── utils/
-│   ├── history.py           # persistence layer
-│   ├── common.py            # PIL/NumPy helpers
-│   └── logger.py            # structured logging
 │
-└── config/
-    ├── settings.py          # runtime config/env
-    └── paths.py             # project paths
-```
----
-# Technical Highlights
-### Efficient CPU Deployment
-HF Spaces have **no GPU**, 16 GB RAM.
-Generation speed is optimized via:
-* latent consistency (Turbo)
-* reduced step ranges
-* VAE tiling for memory distribution
-* attention slicing
-* deferring safety checker if private
-This reduces **CPU inference from ~220s → <70s** for 512px prompts, without unacceptable quality loss.
-### Multi-Pipeline Switching
-Both SD pipelines are instantiated once.
-The UI passes `model_choice` to the handler, which selects the correct pipeline **without rebuilding**.
-This avoids 4-7 GB reload cost per click.
 ---
-# Local Installation
-### 1. Clone
 ```bash
 git clone https://github.com/sanskarmodi8/stable-diffusion-image-generator
 cd stable-diffusion-image-generator
-```
-### 2. Environment
 ```bash
 python -m venv .venv
 source .venv/bin/activate
 ```
-### 3. Install Dependencies
-Install PyTorch for GPU (leave if on CPU):
-```bash
-pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
-```
-Install core libs:
 ```bash
 pip install -r requirements.txt
 pip install -e .
 ```
-### 4. HuggingFace Login (optional)
 ```bash
-huggingface-cli login
 ```
 ---
-# Running
 ```bash
 python src/sdgen/main.py
 ```
-UI available at:
 ```
 http://127.0.0.1:7860
@@ -168,191 +185,62 @@ http://127.0.0.1:7860
 ---
-# Roadmap (Focused, High-Impact Features)
-This project is under active development. The next milestones focus on **practical model customization and multi-model support**, optimized for **CPU-only deployment environments** such as Hugging Face Spaces.
-The roadmap is intentionally **lean** to maximize value within limited compute constraints.
----
-## 1. LoRA Runtime Inference (Core Feature)
-Add lightweight **Low-Rank Adaptation** support for Stable Diffusion pipelines without modifying base model weights.
-### Scope
-- Load external **`.safetensors` LoRA adapters** into UNet
-- Apply LoRA modules dynamically at inference
-- **Alpha (weight) slider** to control influence
-- **UI dropdown** for selecting LoRA adapters
-- **Automatic discovery** of LoRAs under:
 ```
 src/assets/loras/
-```
-### Deliverables
-- `lora_loader.py` utility
-- integration into existing `load_pipeline()`
-- UI: LoRA selector + alpha parameter
-- history metadata with:
-- `lora_paths`
-- `lora_weights`
----
-## 2. Multi-LoRA Mixing (2 adapters)
-Support mixing **two LoRA adapters** with independent weights.
-### Scope
-- Simple weighted merge at attention processors
-- UI:
-- LoRA A dropdown + alpha
-- LoRA B dropdown + alpha
-- Conflict handling for overlapping layers
-### Deliverables
-- `apply_lora_mix()` utility
-- metadata persistence
----
-## 3. SDXL-Turbo Pipeline Support
-Add a **third runtime model**:
 ```
-stabilityai/stable-diffusion-xl-base
-stabilityai/sdxl-turbo
-````
-### Scope
-- instantiate SDXL Turbo pipeline
-- auto configure:
-  - steps (1-4)
-  - CFG (0-1)
-- model selection integrated in UI
-- reproducible metadata
-### Notes
-SDXL Turbo is optimized for **fast generation** and works well on constrained environments with reduced steps.
 ---
-## 4. Enhanced Presets
-Presets currently define only prompts. Extend them to define **full recommended parameter sets** per use case.
-### Scope
-Each preset can define:
-- prompt
-- negative prompt
-- inference steps
-- CFG scale
-- resolution
-- recommended model
-- recommended LoRA (+alpha)
-### Example
-```json
-{
-  "preset": "Anime Portrait",
-  "prompt": "...",
-  "negative": "...",
-  "steps": 15,
-  "cfg": 6,
-  "width": 512,
-  "height": 768,
-  "model": "SD1.5",
-  "lora": {
-    "path": "anime_face.safetensors",
-    "alpha": 0.8
-  }
-}
-````
----
-## 5. Metadata Improvements
-Enhance metadata tracking for **reproducibility**.
-### Added Fields
-* `model_id`
-* `lora_names`
-* `lora_alphas`
-* `preset_used`
-* `resolution`
-* provenance timestamp
-This enables exact replication of generated images.
 ---
-## 6. Example LoRA & Training Scripts (No UI)
-Provide **self-contained example** to demonstrate training:
-* a Colab notebook for **LoRA fine-tuning**
-* a small 20-image dataset
-* training duration < 45 minutes on free GPU
-* export `.safetensors` file
-* use it in presets
-### Deliverables
-* `examples/train_lora.ipynb`
-* resulting LoRA stored at `assets/loras/example.safetensors`
----
-# Contributing
-This repo is configured with **pre-commit**:
-* black
 * ruff
 * isort
-* docstring linting (Google style)
-Install hooks:
-```bash
-pre-commit install
-```
-Test formatting:
 ```bash
 ruff check .
 black .
 ```
-Branching convention:
-```
-feat/<feature>
-fix/<issue>
-refactor/<module>
-```
 ---
-# License
-This project is licensed under [MIT License](LICENSE).
 ---
-# Author
-**Sanskar Modi**
-Machine Learning Engineer
-Focused on production-grade ML systems.
-GitHub: [https://github.com/sanskarmodi8](https://github.com/sanskarmodi8)

 sdk_version: 6.0.2
 ---
+# Stable Diffusion Image Generation Toolkit
 ![appdemo](https://drive.google.com/uc?export=view&id=1dO2bnYmEEj3fNU0-dV692icUPSwyP93G)
+[**Live Demo**](https://huggingface.co/spaces/SanskarModi/sd-image-gen-toolkit)
+---
+## Overview
+A modular, lightweight image generation toolkit built on **Hugging Face Diffusers**, designed for **CPU-friendly deployment**, clean architecture, and practical usability.
+It supports **Text → Image**, **Image → Image**, and **Upscaling**, with a **preset system**, optional **LoRA adapters**, and a local **metadata history** for reproducibility.
 ---
+## Features
+### Text → Image
+- Stable Diffusion **1.5** and **Turbo**
+- Configurable prompt parameters:
+  - prompt / negative prompt
+  - steps
+  - guidance (CFG)
+  - resolution
+  - seed (optional)
+- JSON metadata output
+- Style presets for quick experimentation
+### Image → Image
+- Modify existing images via the SD Img2Img pipeline
+- Denoising strength control
+- Full parameter configuration
+- Shared preset system
+- History saved for reproducibility
+### Upscaling (Real-ESRGAN NCNN)
+- **2× and 4×** upscaling
+- NCNN backend (no GPU required)
+- Minimal dependencies
+- Fast on CPU environments (HF Spaces)
+### LoRA Adapter Support
+- Runtime loading of `.safetensors` adapters
+- Up to **two adapters** with independent weights
+- Alpha range `-2 → +2` per adapter
+- Automatic discovery under:
+```
+src/assets/loras/
+```
+- LoRA UI is **disabled for Turbo**, since Turbo does not benefit from LoRA injection
+### Metadata History
+Every generation stores:
+- model id
+- prompt + negative prompt
+- steps, cfg, resolution
+- seed
+- LoRA names + weights
+- timestamp
+All generated data is stored in a tree structure under:
+```
+src/assets/history/
+```
 ---
+## Architecture
 ```
+src/
+└── sdgen/
+├── sd/                     # Stable Diffusion runtime
+│   ├── pipeline.py         # model loading, device config
+│   ├── generator.py        # text-to-image inference
+│   ├── img2img.py          # image-to-image inference
+│   ├── lora_loader.py      # LoRA discovery & injection
+│   └── models.py           # typed config & metadata objects
 │
+├── ui/                     # Gradio UI components
+│   ├── layout.py           # composition root for UI
+│   └── tabs/               # modular tabs
+│       ├── txt2img_tab.py
+│       ├── img2img_tab.py
+│       ├── upscaler_tab.py
+│       ├── presets_tab.py
+│       └── history_tab.py
 │
+├── presets/                # curated basic presets
+│   └── styles.py           # preset registry
 │
+├── upscaler/               # Real-ESRGAN NCNN backend
+│   ├── upscaler.py         # interface + metadata
+│   └── realesrgan.py       # NCNN wrapper
 │
+├── utils/                  # shared utilities
+│   ├── history.py          # atomic storage format
+│   ├── common.py           # PIL helpers
+│   └── logger.py           # structured logging
 │
+└── config/                 # static configuration
+├── paths.py            # resolved directories
+└── settings.py         # environment settings
+````
+---
+## Presets (Included)
+The project includes **four style presets**, each defining:
+- prompt
+- negative prompt
+These presets are neutral and work with both **SD1.5** and **Turbo**:
+| Name               | Style                     |
+|--------------------|----------------------------|
+| Realistic Photo    | 35mm, photorealistic       |
+| Anime              | clean anime illustration   |
+| Cinematic / Moody  | cinematic lighting/grain   |
+| Oil Painting       | classical oil painting     |
+Presets do **not include LoRA parameters**.
+Users may manually combine presets with LoRA adapters.
 ---
+## Installation
+### Clone
 ```bash
 git clone https://github.com/sanskarmodi8/stable-diffusion-image-generator
 cd stable-diffusion-image-generator
+````
+### Environment
 ```bash
 python -m venv .venv
 source .venv/bin/activate
 ```
+### Install Dependencies (CPU)
 ```bash
 pip install -r requirements.txt
 pip install -e .
 ```
+### GPU (optional)
 ```bash
+pip install torch torchvision torchaudio \
+  --index-url https://download.pytorch.org/whl/cu121
 ```
 ---
+## Run
 ```bash
 python src/sdgen/main.py
 ```
+Open in browser:
 ```
 http://127.0.0.1:7860
 ---
+## Adding LoRA Models
+Place `.safetensors` files here:
 ```
 src/assets/loras/
 ```
+They will be automatically detected and displayed in the UI (SD1.5 only).
+This repository **does not include** LoRA files.
 ---
+## Third-Party LoRA Models
+The app supports optional LoRA adapters.
+LoRA weights are **not included** and are **the property of their respective authors**.
+If you choose to download LoRA files automatically (see `lora_urls.py`), they are fetched directly from their original sources (**Civitai**).
+This project does **not** redistribute LoRA weights.
+Refer to each model’s license on Civitai.
 ---
+## Development
+The repo uses `pre-commit` hooks for consistency:
+```bash
+pre-commit install
+```
+Tools:
 * ruff
+* black
 * isort
+Check formatting:
 ```bash
 ruff check .
 black .
 ```
 ---
+## License
+This project is licensed under the **MIT License**.
+See the [`LICENSE`](LICENSE) file.
 ---
+## Author
+[**Sanskar Modi**](https://github.com/sanskarmodi8)

src/sdgen/config/__init__.py CHANGED Viewed

@@ -6,6 +6,7 @@ so they can be imported directly from `sdgen.config`.
 from __future__ import annotations
 from .paths import (
     ASSETS_ROOT,
     HISTORY_ENTRIES_DIR,
@@ -26,4 +27,5 @@ __all__ = [
     "HISTORY_THUMBS_DIR",
     "HISTORY_FULL_DIR",
     "LOGS_ROOT",
 ]

 from __future__ import annotations
+from .lora_urls import LORA_URLS
 from .paths import (
     ASSETS_ROOT,
     HISTORY_ENTRIES_DIR,
     "HISTORY_THUMBS_DIR",
     "HISTORY_FULL_DIR",
     "LOGS_ROOT",
+    "LORA_URLS",
 ]

src/sdgen/config/lora_urls.py ADDED Viewed

	@@ -0,0 +1,21 @@

+"""URLs for external LoRA adapters.
+These URLs point directly to the original creators' downloads on Civitai.
+We download the adapters at runtime if they are not present locally.
+Legal note:
+We do NOT redistribute LoRA weights in this repository.
+Users download them from the original source at runtime.
+"""
+from __future__ import annotations
+from typing import Dict
+# Direct API download endpoints from Civitai.
+# we expect to have under `assets/loras/`.
+LORA_URLS: Dict[str, str] = {
+    "DetailTweak.safetensors": "https://civitai.com/api/download/models/62833?type=Model&format=SafeTensor",
+    "MangaPanels.safetensors": "https://civitai.com/api/download/models/28907?type=Model&format=SafeTensor&size=full&fp=fp16",
+    "AnimeTarotCards.safetensors": "https://civitai.com/api/download/models/28609?type=Model&format=SafeTensor&size=full&fp=fp16",
+}

src/sdgen/main.py CHANGED Viewed

@@ -16,10 +16,10 @@ import torch
 from dotenv import load_dotenv
 from sdgen.config import AppSettings
-from sdgen.sd.img2img import prepare_img2img_pipeline
-from sdgen.sd.pipeline import load_pipeline, warmup_pipeline
 from sdgen.ui import build_ui
 from sdgen.utils.logger import get_logger
 logger = get_logger(__name__)
 load_dotenv()
@@ -47,6 +47,12 @@ def main() -> None:
     device = "cpu"
     logger.info("Loading pipeline %s", model_id1)
     pipes = {
         "SD1.5": load_pipeline(

 from dotenv import load_dotenv
 from sdgen.config import AppSettings
+from sdgen.sd import load_pipeline, prepare_img2img_pipeline, warmup_pipeline
 from sdgen.ui import build_ui
 from sdgen.utils.logger import get_logger
+from sdgen.utils.lora_downloader import ensure_loras
 logger = get_logger(__name__)
 load_dotenv()
     device = "cpu"
+    # Download LoRAs (runtime)
+    try:
+        ensure_loras()
+    except Exception as exc:
+        logger.warning("LoRA download issue: %s", exc)
     logger.info("Loading pipeline %s", model_id1)
     pipes = {
         "SD1.5": load_pipeline(

src/sdgen/utils/lora_downloader.py ADDED Viewed

	@@ -0,0 +1,58 @@

+"""Runtime downloader for external LoRA adapters.
+If a LoRA file is not found locally under `assets/loras/`,
+we download it from the original source URL.
+"""
+from __future__ import annotations
+import os
+import shutil
+from pathlib import Path
+import requests
+from sdgen.config.lora_urls import LORA_URLS
+from sdgen.config.paths import ASSETS_ROOT
+from sdgen.utils.logger import get_logger
+logger = get_logger(__name__)
+LORA_DIR: Path = ASSETS_ROOT / "loras"
+def ensure_lora_dir() -> None:
+    """Create lora directory if missing."""
+    LORA_DIR.mkdir(parents=True, exist_ok=True)
+def download_file(url: str, dst: Path, chunk: int = 8192) -> None:
+    """Stream download a file to destination path."""
+    with requests.get(url, stream=True, timeout=60) as r:
+        r.raise_for_status()
+        with open(dst, "wb") as f:
+            shutil.copyfileobj(r.raw, f)
+    logger.info("Downloaded LoRA: %s", dst.name)
+def ensure_loras() -> None:
+    """Download missing LoRA weights at runtime."""
+    ensure_lora_dir()
+    for filename, url in LORA_URLS.items():
+        path = LORA_DIR / filename
+        if path.exists() and path.stat().st_size > 0:
+            logger.info("LoRA exists: %s", filename)
+            continue
+        logger.info("Downloading LoRA: %s", filename)
+        try:
+            download_file(url, path)
+        except Exception as exc:  # noqa: BLE001
+            logger.exception("Failed to download LoRA %s: %s", filename, exc)
+            # cleanup partial file
+            if path.exists():
+                try:
+                    os.remove(path)
+                except Exception:
+                    pass