basimazam
/

safe-diffusion-guidance

Model card Files Files and versions

xet

Community

basimazam commited on Aug 11, 2025

Commit

febc264

verified ·

1 Parent(s): 01836d7

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

README.md +54 -48
safe_diffusion_guidance.py +45 -33

README.md CHANGED Viewed

@@ -1,48 +1,54 @@
----
-library_name: diffusers
-pipeline_tag: text-to-image
-tags:
-  - safety
-  - classifier-guidance
-  - stable-diffusion
-  - plug-and-play
-license: apache-2.0
----
-# Safe Diffusion Guidance (SDG) —
-**Safe Diffusion Guidance (SDG)** is a *classifier-guided denoising* layer that steers the sampling trajectory away from unsafe content **without retraining** the base model.
-It works **standalone** with SD 1.4 / 1.5 / 2.1.
-## Quickstart (SD 1.5)
-```python
-import torch
-from diffusers import StableDiffusionPipeline
-# 1) Load base SD pipeline (disable default safety checker)
-base = StableDiffusionPipeline.from_pretrained(
-    "runwayml/stable-diffusion-v1-5",
-    torch_dtype=torch.float16,
-    safety_checker=None
-).to("cuda")
-# 2) Load SDG custom pipeline from Hub (this repo)
-sdg = StableDiffusionPipeline.from_pretrained(
-    "basimazam/safe-diffusion-guidance",
-    custom_pipeline="safe_diffusion_guidance",
-    torch_dtype=torch.float16
-).to("cuda")
-img = sdg(
-    base_pipe=base,
-    prompt="portrait photograph, studio light, 85mm, realistic",
-    num_inference_steps=50,
-    guidance_scale=7.5,
-    safety_scale=5.0,
-    mid_fraction=1.0,
-    safe_class_index=3
-).images[0]
-img.save("sdg_safe_output.png")

+---
+library_name: diffusers
+pipeline_tag: text-to-image
+tags:
+  - safety
+  - classifier-guidance
+  - stable-diffusion
+  - plug-and-play
+license: apache-2.0
+---
+# Safe Diffusion Guidance (SDG) — plug-and-play safety layer for Stable Diffusion
+**Safe Diffusion Guidance (SDG)** is a *classifier-guided denoising* layer that steers the sampling trajectory away from unsafe content **without retraining** the base model.
+It works **standalone** with SD 1.4 / 1.5 / 2.1 and **composes** cleanly with ESD/UCE/SLD.
+- **Safety signal:** a 5-class mid-UNet feature classifier (classes: `gore, hate, medical, safe, sexual`) trained on (1280×8×8) features.
+- **Controls:** `safety_scale` (strength), `mid_fraction` (fraction of steps guided).
+- **Plug-in:** drop into any SD pipeline, or stack on top of ESD/UCE/SLD.
+- **No retraining:** small gradient nudges to latents during denoising.
+> **Note on metrics** (matching our paper): FID/KID are computed vs. _baseline model outputs_ rather than real images; baseline FID/KID are ≈0 by construction.
+## Quickstart (SD 1.5)
+```python
+import torch
+from diffusers import StableDiffusionPipeline
+# 1) Load base SD pipeline (disable default safety checker)
+base = StableDiffusionPipeline.from_pretrained(
+    "runwayml/stable-diffusion-v1-5",
+    torch_dtype=torch.float16,
+    safety_checker=None
+).to("cuda")
+# 2) Load SDG custom pipeline from Hub (this repo)
+sdg = StableDiffusionPipeline.from_pretrained(
+    "your-org/safe-diffusion-guidance",
+    custom_pipeline="safe_diffusion_guidance",
+    torch_dtype=torch.float16
+).to("cuda")
+img = sdg(
+    base_pipe=base,
+    prompt="portrait photograph, studio light, 85mm, realistic",
+    num_inference_steps=50,
+    guidance_scale=7.5,
+    safety_scale=5.0,        # strength: ~2–8 (Light→Strong)
+    mid_fraction=1.0,        # guide fraction of steps: 0.5, 0.8, 1.0
+    safe_class_index=3       # index of 'safe' in [gore,hate,medical,safe,sexual]
+).images[0]
+img.save("sdg_safe_output.png")

safe_diffusion_guidance.py CHANGED Viewed

@@ -1,30 +1,25 @@
 # safe_diffusion_guidance.py
 import torch
 from typing import Optional, List
 from diffusers import DiffusionPipeline, StableDiffusionPipeline
 from diffusers.utils import BaseOutput
-# utils/adaptive_classifiers.py
-import torch
 import torch.nn as nn
-from typing import Optional
 CLASS_NAMES = ['gore', 'hate', 'medical', 'safe', 'sexual']
 class SafetyClassifier1280(nn.Module):
-    """
-    Unified safety classifier for mid-UNet features of shape (B, 1280, H, W).
-    Robust to variable HxW via AdaptiveAvgPool2d((8,8)) before the head.
-    """
     def __init__(self, num_classes: int = 5):
         super().__init__()
         self.pre = nn.AdaptiveAvgPool2d((8, 8))
         self.net = nn.Sequential(
             nn.Conv2d(1280, 512, 3, padding=1),
-            nn.BatchNorm2d(512), nn.ReLU(inplace=True), nn.MaxPool2d(2),    # 512 x 4 x 4
             nn.Conv2d(512, 256, 3, padding=1),
-            nn.BatchNorm2d(256), nn.ReLU(inplace=True), nn.MaxPool2d(2),    # 256 x 2 x 2
-            nn.AdaptiveAvgPool2d(1), nn.Flatten(),                          # 256
             nn.Linear(256, 128), nn.ReLU(inplace=True), nn.Dropout(0.3),
             nn.Linear(128, num_classes)
         )
@@ -41,9 +36,32 @@ class SafetyClassifier1280(nn.Module):
             nn.init.ones_(m.weight); nn.init.zeros_(m.bias)
     def forward(self, x: torch.Tensor) -> torch.Tensor:
-        x = self.pre(x)  # (B, 1280, 8, 8)
         return self.net(x)
 def load_classifier_1280(
     weights_path: str,
     device: Optional[torch.device] = None,
@@ -58,28 +76,23 @@ def load_classifier_1280(
     return model
 def pick_weights_for_pipe(pipe) -> str:
-    """
-    Optional helper: return a default weights file based on the base SD pipeline id.
-    You can also use a single shared file 'classifiers/safety_classifier_1280.pth'.
-    """
-    name = str(getattr(pipe, "_internal_dict", {}).get("_name_or_path", "")).lower()
-    # Adjust logic as you like — default to a single shared file:
-    return "classifiers/safety_classifier_1280.pth"
 class SDGOutput(BaseOutput):
     images: List
 class SafeDiffusionGuidance(DiffusionPipeline):
-    """
-    Pure custom pipeline. No pre-saved components in the repo.
-    It auto-loads a base SD pipeline if `base_pipe` is None.
-    """
-    def __init__(self, **kwargs):
-        # Accept any extra kwargs Diffusers might pass; we ignore them.
         super().__init__()
-        self.base_pipe_ = None  # lazy cache
     def _ensure_base(self, base_pipe, base_model_id, torch_dtype):
         if base_pipe is not None:
@@ -87,9 +100,7 @@ class SafeDiffusionGuidance(DiffusionPipeline):
             return self.base_pipe_
         if self.base_pipe_ is None:
             self.base_pipe_ = StableDiffusionPipeline.from_pretrained(
-                base_model_id,
-                torch_dtype=torch_dtype,
-                safety_checker=None
             ).to(self.device)
         return self.base_pipe_
@@ -108,7 +119,6 @@ class SafeDiffusionGuidance(DiffusionPipeline):
         generator: Optional[torch.Generator] = None,
         **kwargs
     ) -> SDGOutput:
         base = self._ensure_base(base_pipe, base_model_id, torch_dtype=torch.float16)
         device = getattr(base, "_execution_device", base.device)
         dtype  = base.unet.dtype
@@ -133,9 +143,9 @@ class SafeDiffusionGuidance(DiffusionPipeline):
         base.scheduler.set_timesteps(num_inference_steps, device=device)
         timesteps = base.scheduler.timesteps
-        # classifier (fp32)
-        weights = classifier_weights or pick_weights_for_pipe(base)
-        clf = load_classifier_1280(weights, device=device, dtype=torch.float32).eval()
         # mid-block hook
         mid = {}
@@ -186,3 +196,5 @@ class SafeDiffusionGuidance(DiffusionPipeline):
             img = base.decode_latents(latents)
             img = base.image_processor.postprocess(img, output_type="pil")[0]
         return SDGOutput(images=[img])

 # safe_diffusion_guidance.py
+import os
 import torch
 from typing import Optional, List
 from diffusers import DiffusionPipeline, StableDiffusionPipeline
 from diffusers.utils import BaseOutput
+# ---- Classifier (unchanged) ----
 import torch.nn as nn
 CLASS_NAMES = ['gore', 'hate', 'medical', 'safe', 'sexual']
 class SafetyClassifier1280(nn.Module):
     def __init__(self, num_classes: int = 5):
         super().__init__()
         self.pre = nn.AdaptiveAvgPool2d((8, 8))
         self.net = nn.Sequential(
             nn.Conv2d(1280, 512, 3, padding=1),
+            nn.BatchNorm2d(512), nn.ReLU(inplace=True), nn.MaxPool2d(2),
             nn.Conv2d(512, 256, 3, padding=1),
+            nn.BatchNorm2d(256), nn.ReLU(inplace=True), nn.MaxPool2d(2),
+            nn.AdaptiveAvgPool2d(1), nn.Flatten(),
             nn.Linear(256, 128), nn.ReLU(inplace=True), nn.Dropout(0.3),
             nn.Linear(128, num_classes)
         )
             nn.init.ones_(m.weight); nn.init.zeros_(m.bias)
     def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.pre(x)
         return self.net(x)
+# ---- NEW: robust path resolution for weights ----
+def _resolve_repo_path(rel_path: str) -> str:
+    """Return an absolute path inside the cached repo; fallback to hf_hub_download."""
+    here = os.path.dirname(__file__)
+    local_path = os.path.join(here, rel_path)
+    if os.path.exists(local_path):
+        return local_path
+    # Fallback: try hub download (works even if code is executed outside repo root)
+    try:
+        from huggingface_hub import hf_hub_download
+        # Best effort to get repo id; default to your public repo if unknown
+        repo_id = getattr(_resolve_repo_path, "_repo_id", None)
+        if repo_id is None:
+            # Diffusers stores name or path in internal dict sometimes:
+            repo_id = getattr(SafeDiffusionGuidance, "__repo_id__", "basimazam/safe-diffusion-guidance")
+        return hf_hub_download(repo_id=repo_id, filename=rel_path)
+    except Exception as e:
+        raise FileNotFoundError(
+            f"Could not find classifier weights at '{rel_path}'. "
+            f"Make sure the file exists in the repo, or pass `classifier_weights=...`. "
+            f"Original error: {e}"
+        )
 def load_classifier_1280(
     weights_path: str,
     device: Optional[torch.device] = None,
     return model
 def pick_weights_for_pipe(pipe) -> str:
+    # Always use the standard path inside the repo
+    return _resolve_repo_path("classifiers/safety_classifier_1280.pth")
 class SDGOutput(BaseOutput):
     images: List
 class SafeDiffusionGuidance(DiffusionPipeline):
+    """Pure custom pipeline; loads base SD lazily at runtime."""
+    def __init__(self):                    # <-- IMPORTANT: no **kwargs
         super().__init__()
+        self.base_pipe_ = None
+        # Hint for the fallback downloader (optional)
+        try:
+            SafeDiffusionGuidance.__repo_id__ = self.config._name_or_path  # diffusers sets this sometimes
+        except Exception:
+            pass
     def _ensure_base(self, base_pipe, base_model_id, torch_dtype):
         if base_pipe is not None:
             return self.base_pipe_
         if self.base_pipe_ is None:
             self.base_pipe_ = StableDiffusionPipeline.from_pretrained(
+                base_model_id, torch_dtype=torch_dtype, safety_checker=None
             ).to(self.device)
         return self.base_pipe_
         generator: Optional[torch.Generator] = None,
         **kwargs
     ) -> SDGOutput:
         base = self._ensure_base(base_pipe, base_model_id, torch_dtype=torch.float16)
         device = getattr(base, "_execution_device", base.device)
         dtype  = base.unet.dtype
         base.scheduler.set_timesteps(num_inference_steps, device=device)
         timesteps = base.scheduler.timesteps
+        # classifier (fp32) — use provided path or default resolved path
+        weights_file = classifier_weights or pick_weights_for_pipe(base)
+        clf = load_classifier_1280(weights_file, device=device, dtype=torch.float32).eval()
         # mid-block hook
         mid = {}
             img = base.decode_latents(latents)
             img = base.image_processor.postprocess(img, output_type="pil")[0]
         return SDGOutput(images=[img])
+__all__ = ["SafeDiffusionGuidance"]