basimazam
/

safe-diffusion-guidance

Model card Files Files and versions

xet

Community

basimazam commited on Aug 11, 2025

Commit

855692d

verified ·

1 Parent(s): b8877ca

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

model_index.json +3 -9
safe_diffusion_guidance.py +54 -70

model_index.json CHANGED Viewed

@@ -1,11 +1,5 @@
 {
-  "library_name": "diffusers",
-  "pipeline": "StableDiffusionPipeline",
-  "tags": [
-    "safety",
-    "classifier-guidance",
-    "stable-diffusion",
-    "plug-and-play"
-  ],
-  "inference": "Use `custom_pipeline='safe_diffusion_guidance'` and pass your base SD pipeline via `base_pipe=...`."
 }

 {
+  "_class_name": "SafeDiffusionGuidance",
+  "_diffusers_version": "0.29.0",
+  "custom_pipeline": "safe_diffusion_guidance"
 }

safe_diffusion_guidance.py CHANGED Viewed

@@ -1,40 +1,30 @@
 # safe_diffusion_guidance.py
 import torch
 from typing import Optional, List
-from diffusers import StableDiffusionPipeline, DiffusionPipeline
 from diffusers.utils import BaseOutput
-from utils.adaptive_classifiers import (
-    load_classifier_1280, pick_weights_for_pipe
-)
 class SDGOutput(BaseOutput):
     images: List
-class SafeDiffusionGuidancePipeline(StableDiffusionPipeline):
     """
-    Plug-and-play safety guidance for Stable Diffusion.
-    - If `base_pipe` is None, we auto-load the base SD checkpoint specified by `base_model_id`.
-    - Classifier weights are shipped in this repo; no extra installs required.
-    Typical use:
-      sdg = DiffusionPipeline.from_pretrained("your-org/safe-diffusion-guidance",
-                                              custom_pipeline="safe_diffusion_guidance",
-                                              torch_dtype=torch.float16).to("cuda")
-      out = sdg(prompt="...", base_model_id="runwayml/stable-diffusion-v1-5",
-                safety_scale=5.0, mid_fraction=1.0, safe_class_index=3)
     """
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.base_pipe_ = None  # lazy-loaded base pipeline cache
     def _ensure_base(self, base_pipe, base_model_id, torch_dtype):
         if base_pipe is not None:
             self.base_pipe_ = base_pipe
             return self.base_pipe_
         if self.base_pipe_ is None:
-            # lazy load chosen SD checkpoint
             self.base_pipe_ = StableDiffusionPipeline.from_pretrained(
                 base_model_id,
                 torch_dtype=torch_dtype,
@@ -52,92 +42,86 @@ class SafeDiffusionGuidancePipeline(StableDiffusionPipeline):
         mid_fraction: float = 1.0,
         safe_class_index: int = 3,
         classifier_weights: Optional[str] = None,
-        # new convenience arg (optional):
         base_pipe: Optional[StableDiffusionPipeline] = None,
         base_model_id: str = "runwayml/stable-diffusion-v1-5",
         generator: Optional[torch.Generator] = None,
         **kwargs
     ) -> SDGOutput:
-        # 0) choose / load base SD
-        base = self._ensure_base(base_pipe, base_model_id, torch_dtype=self.unet.dtype)
-        device = base._execution_device if hasattr(base, "_execution_device") else base.device
-        dtype   = base.unet.dtype
-        # 1) Text embeddings (CFG)
         tok = base.tokenizer
         max_len = tok.model_max_length
-        text_inputs = tok([prompt], padding="max_length", max_length=max_len, return_tensors="pt")
-        text_embeds = base.text_encoder(text_inputs.input_ids.to(device)).last_hidden_state
         if negative_prompt is not None:
-            uncond_inputs = tok([negative_prompt], padding="max_length", max_length=max_len, return_tensors="pt")
         else:
-            uncond_inputs = tok([""], padding="max_length", max_length=max_len, return_tensors="pt")
-        uncond_embeds = base.text_encoder(uncond_inputs.input_ids.to(device)).last_hidden_state
-        cond_embeds = torch.cat([uncond_embeds, text_embeds], dim=0)
-        # 2) Latent init
-        height = kwargs.pop("height", 512); width = kwargs.pop("width", 512)
-        latents = torch.randn(
-            (1, base.unet.in_channels, height // 8, width // 8),
-            device=device, generator=generator, dtype=dtype
-        )
         base.scheduler.set_timesteps(num_inference_steps, device=device)
         timesteps = base.scheduler.timesteps
-        # 3) Load classifier (fp32) from this repo
-        weights_path = classifier_weights or pick_weights_for_pipe(base)
-        classifier = load_classifier_1280(weights_path, device=device, dtype=torch.float32).eval()
-        # 4) Hook mid-block features
-        mid_cache = {}
-        def mid_hook(module, inputs, output):
-            mid_cache["feat"] = output[0] if isinstance(output, tuple) else output
-        h = base.unet.mid_block.register_forward_hook(mid_hook)
-        base_alpha = 1e-3  # small step
         with torch.no_grad():
             for i, t in enumerate(timesteps):
-                latent_in = base.scheduler.scale_model_input(latents, t)
-                latent_pair = torch.cat([latent_in, latent_in], dim=0)
                 do_guide = (i / len(timesteps)) <= mid_fraction and safety_scale > 0
                 if do_guide:
                     with torch.enable_grad():
-                        latents_g = latents.detach().clone().requires_grad_(True)
-                        latent_in_g = base.scheduler.scale_model_input(latents_g, t)
-                        latent_pair_g = torch.cat([latent_in_g, latent_in_g], dim=0)
-                        _ = base.unet(latent_pair_g, t, encoder_hidden_states=cond_embeds).sample
-                        feat = mid_cache["feat"].detach().float()
-                        logits = classifier(feat)
                         probs  = torch.softmax(logits, dim=-1)
-                        unsafe_prob = 1.0 - probs[:, safe_class_index].mean()
-                        loss = safety_scale * unsafe_prob
                         loss.backward()
                         alpha = base_alpha
                         if hasattr(base.scheduler, "sigmas"):
-                            step_idx = min(i, len(base.scheduler.sigmas) - 1)
-                            alpha = base_alpha * float(base.scheduler.sigmas[step_idx])
-                        latents = (latents_g - alpha * latents_g.grad).detach()
-                    latent_in = base.scheduler.scale_model_input(latents, t)
-                    latent_pair = torch.cat([latent_in, latent_in], dim=0)
-                    noise_pred = base.unet(latent_pair, t, encoder_hidden_states=cond_embeds).sample
                 else:
-                    noise_pred = base.unet(latent_pair, t, encoder_hidden_states=cond_embeds).sample
-                noise_uncond, noise_text = noise_pred.chunk(2)
-                noise = noise_uncond + guidance_scale * (noise_text - noise_uncond)
                 latents = base.scheduler.step(noise, t, latents).prev_sample
-        h.remove()
         with torch.no_grad():
-            image = base.decode_latents(latents)
-            image = base.image_processor.postprocess(image, output_type="pil")[0]
-        return SDGOutput(images=[image])

 # safe_diffusion_guidance.py
 import torch
 from typing import Optional, List
+from diffusers import DiffusionPipeline, StableDiffusionPipeline
 from diffusers.utils import BaseOutput
+from utils.adaptive_classifiers import load_classifier_1280, pick_weights_for_pipe
 class SDGOutput(BaseOutput):
     images: List
+class SafeDiffusionGuidance(DiffusionPipeline):
     """
+    Pure custom pipeline. No pre-saved components in the repo.
+    It auto-loads a base SD pipeline if `base_pipe` is None.
     """
+    def __init__(self, **kwargs):
+        # Accept any extra kwargs Diffusers might pass; we ignore them.
+        super().__init__()
+        self.base_pipe_ = None  # lazy cache
     def _ensure_base(self, base_pipe, base_model_id, torch_dtype):
         if base_pipe is not None:
             self.base_pipe_ = base_pipe
             return self.base_pipe_
         if self.base_pipe_ is None:
             self.base_pipe_ = StableDiffusionPipeline.from_pretrained(
                 base_model_id,
                 torch_dtype=torch_dtype,
         mid_fraction: float = 1.0,
         safe_class_index: int = 3,
         classifier_weights: Optional[str] = None,
         base_pipe: Optional[StableDiffusionPipeline] = None,
         base_model_id: str = "runwayml/stable-diffusion-v1-5",
         generator: Optional[torch.Generator] = None,
         **kwargs
     ) -> SDGOutput:
+        base = self._ensure_base(base_pipe, base_model_id, torch_dtype=torch.float16)
+        device = getattr(base, "_execution_device", base.device)
+        dtype  = base.unet.dtype
+        # text embeds (CFG)
         tok = base.tokenizer
         max_len = tok.model_max_length
+        txt = tok([prompt], padding="max_length", max_length=max_len, return_tensors="pt")
+        cond = base.text_encoder(txt.input_ids.to(device)).last_hidden_state
         if negative_prompt is not None:
+            uncond_txt = tok([negative_prompt], padding="max_length", max_length=max_len, return_tensors="pt")
         else:
+            uncond_txt = tok([""], padding="max_length", max_length=max_len, return_tensors="pt")
+        uncond = base.text_encoder(uncond_txt.input_ids.to(device)).last_hidden_state
+        cond_embeds = torch.cat([uncond, cond], dim=0)
+        # latents
+        h = kwargs.pop("height", 512); w = kwargs.pop("width", 512)
+        latents = torch.randn((1, base.unet.in_channels, h // 8, w // 8),
+                              device=device, generator=generator, dtype=dtype)
         base.scheduler.set_timesteps(num_inference_steps, device=device)
         timesteps = base.scheduler.timesteps
+        # classifier (fp32)
+        weights = classifier_weights or pick_weights_for_pipe(base)
+        clf = load_classifier_1280(weights, device=device, dtype=torch.float32).eval()
+        # mid-block hook
+        mid = {}
+        def hook(_, __, out): mid["feat"] = out[0] if isinstance(out, tuple) else out
+        handle = base.unet.mid_block.register_forward_hook(hook)
+        base_alpha = 1e-3
         with torch.no_grad():
             for i, t in enumerate(timesteps):
+                lat_in = base.scheduler.scale_model_input(latents, t)
+                lat_cat = torch.cat([lat_in, lat_in], dim=0)
                 do_guide = (i / len(timesteps)) <= mid_fraction and safety_scale > 0
                 if do_guide:
                     with torch.enable_grad():
+                        lg = latents.detach().clone().requires_grad_(True)
+                        lin = base.scheduler.scale_model_input(lg, t)
+                        lcat = torch.cat([lin, lin], dim=0)
+                        _ = base.unet(lcat, t, encoder_hidden_states=cond_embeds).sample
+                        feat = mid["feat"].detach().float()
+                        logits = clf(feat)
                         probs  = torch.softmax(logits, dim=-1)
+                        unsafe = 1.0 - probs[:, safe_class_index].mean()
+                        loss = safety_scale * unsafe
                         loss.backward()
                         alpha = base_alpha
                         if hasattr(base.scheduler, "sigmas"):
+                            idx = min(i, len(base.scheduler.sigmas) - 1)
+                            alpha = base_alpha * float(base.scheduler.sigmas[idx])
+                        latents = (lg - alpha * lg.grad).detach()
+                    lat_in = base.scheduler.scale_model_input(latents, t)
+                    lat_cat = torch.cat([lat_in, lat_in], dim=0)
+                    noise_pred = base.unet(lat_cat, t, encoder_hidden_states=cond_embeds).sample
                 else:
+                    noise_pred = base.unet(lat_cat, t, encoder_hidden_states=cond_embeds).sample
+                n_uncond, n_text = noise_pred.chunk(2)
+                noise = n_uncond + guidance_scale * (n_text - n_uncond)
                 latents = base.scheduler.step(noise, t, latents).prev_sample
+        handle.remove()
         with torch.no_grad():
+            img = base.decode_latents(latents)
+            img = base.image_processor.postprocess(img, output_type="pil")[0]
+        return SDGOutput(images=[img])