basimazam
/

safe-diffusion-guidance

Model card Files Files and versions

xet

Community

basimazam commited on Aug 11, 2025

Commit

a2cf62b

verified ·

1 Parent(s): 1932ce5

Upload SDG pipeline + classifier weights

Browse files

Files changed (1) hide show

safe_diffusion_guidance.py +61 -66

safe_diffusion_guidance.py CHANGED Viewed

@@ -7,93 +7,87 @@ import torch.nn as nn
 from diffusers import DiffusionPipeline, StableDiffusionPipeline
 from diffusers.utils import BaseOutput
-# ----------------------------- Classifier ------------------------------------
-CLASS_NAMES = ["gore", "hate", "medical", "safe", "sexual"]
-class SafetyClassifier1280(nn.Module):
     """
-    Safety classifier for mid-UNet features of shape (B, 1280, H, W).
-    Robust to HxW via AdaptiveAvgPool2d((8,8)) before the head.
     """
     def __init__(self, num_classes: int = 5):
         super().__init__()
         self.pre = nn.AdaptiveAvgPool2d((8, 8))
-        self.net = nn.Sequential(
-            nn.Conv2d(1280, 512, 3, padding=1),
-            nn.BatchNorm2d(512), nn.ReLU(inplace=True), nn.MaxPool2d(2),  # 512x4x4
-            nn.Conv2d(512, 256, 3, padding=1),
-            nn.BatchNorm2d(256), nn.ReLU(inplace=True), nn.MaxPool2d(2),  # 256x2x2
-            nn.AdaptiveAvgPool2d(1), nn.Flatten(),                         # 256
-            nn.Linear(256, 128), nn.ReLU(inplace=True), nn.Dropout(0.3),
             nn.Linear(128, num_classes)
         )
-        self.apply(self._init_weights)
     @staticmethod
-    def _init_weights(m):
         if isinstance(m, nn.Linear):
-            nn.init.xavier_uniform_(m.weight)
-            if m.bias is not None: nn.init.zeros_(m.bias)
         elif isinstance(m, nn.Conv2d):
-            nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
             if m.bias is not None: nn.init.zeros_(m.bias)
         elif isinstance(m, nn.BatchNorm2d):
             nn.init.ones_(m.weight); nn.init.zeros_(m.bias)
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        x = self.pre(x)  # (B, 1280, 8, 8)
-        return self.net(x)
 def load_classifier_1280(
-    weights_path: str,
-    device=None,
     dtype: torch.dtype = torch.float32
-) -> SafetyClassifier1280:
-    """
-    Robustly load classifier weights saved either as a pure state dict or as a
-    training checkpoint (with 'model_state_dict'), handling key prefixes and
-    PyTorch 2.6's weights_only behavior.
-    """
-    model = SafetyClassifier1280().to(device or "cpu", dtype=dtype)
-    # --- load robustly (PyTorch 2.6 & older) ---
-    try:
-        state = torch.load(weights_path, map_location="cpu", weights_only=False)
-    except TypeError:
-        state = torch.load(weights_path, map_location="cpu")
-    except Exception:
-        # Allowlist common numpy globals if needed for old pickles
-        try:
-            import numpy as np
-            from torch.serialization import add_safe_globals
-            add_safe_globals([np.dtype, np.number])
-        except Exception:
-            pass
-        state = torch.load(weights_path, map_location="cpu", weights_only=False)
-    # --- unwrap training checkpoint ---
-    if isinstance(state, dict) and "model_state_dict" in state:
-        state = state["model_state_dict"]
-    # --- normalize keys: 'module.' -> '', 'model.' -> 'net.' ---
-    norm_state = {}
-    for k, v in state.items():
-        k = k.replace("module.", "")
-        k = k.replace("model.", "net.")
-        norm_state[k] = v
-    # --- load tolerating tiny diffs (e.g., extra keys from older heads) ---
-    missing, unexpected = model.load_state_dict(norm_state, strict=False)
     if missing or unexpected:
-        print(f"[SDG] Loaded classifier with relaxed strictness. "
-              f"Missing: {len(missing)}, Unexpected: {len(unexpected)}")
     model.eval()
     return model
 def _here(*paths: str) -> str:
     return os.path.join(os.path.dirname(__file__), *paths)
@@ -197,8 +191,9 @@ class SafeDiffusionGuidance(DiffusionPipeline):
         timesteps = base.scheduler.timesteps
         # 4) classifier (run in fp32)
-        weights_file = classifier_weights or pick_weights_path()
-        clf = load_classifier_1280(weights_file, device=device, dtype=torch.float32)
         # 5) mid-block hook
         mid = {}
@@ -222,7 +217,7 @@ class SafeDiffusionGuidance(DiffusionPipeline):
                     lcat = torch.cat([lin, lin], dim=0)
                     _ = base.unet(lcat, t, encoder_hidden_states=cond_embeds).sample
-                    feat = mid["feat"].detach().float()  # (B*2, 1280, H, W)
                     logits = clf(feat)
                     probs = torch.softmax(logits, dim=-1)
                     unsafe = 1.0 - probs[:, safe_class_index].mean()  # encourage "safe"

 from diffusers import DiffusionPipeline, StableDiffusionPipeline
 from diffusers.utils import BaseOutput
+import torch, torch.nn as nn, os
+from typing import Optional
+CLASS_NAMES = ['gore', 'hate', 'medical', 'safe', 'sexual']
+class AdaptiveClassifier1280(nn.Module):
     """
+    Same CNN topology you trained (keys start with 'model.*').
+    Input (B,1280,H,W) -> AdaptiveAvgPool2d(8,8) -> conv stack -> head
     """
     def __init__(self, num_classes: int = 5):
         super().__init__()
         self.pre = nn.AdaptiveAvgPool2d((8, 8))
+        # Keep the attribute name 'model' to match the checkpoint keys.
+        self.model = nn.Sequential(
+            nn.Conv2d(1280, 512, kernel_size=3, padding=1),
+            nn.BatchNorm2d(512), nn.ReLU(inplace=True), nn.MaxPool2d(2),  # (512,4,4)
+            nn.Dropout2d(0.1),
+            nn.Conv2d(512, 256, kernel_size=3, padding=1),
+            nn.BatchNorm2d(256), nn.ReLU(inplace=True), nn.MaxPool2d(2),  # (256,2,2)
+            nn.Dropout2d(0.1),
+            nn.AdaptiveAvgPool2d(1),  # -> (256,1,1)
+            nn.Flatten(),             # -> (256,)
+            nn.Linear(256, 128), nn.ReLU(inplace=True), nn.Dropout(0.5),
             nn.Linear(128, num_classes)
         )
+        self.apply(self._init)
     @staticmethod
+    def _init(m):
         if isinstance(m, nn.Linear):
+            nn.init.xavier_uniform_(m.weight); nn.init.zeros_(m.bias)
         elif isinstance(m, nn.Conv2d):
+            nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
             if m.bias is not None: nn.init.zeros_(m.bias)
         elif isinstance(m, nn.BatchNorm2d):
             nn.init.ones_(m.weight); nn.init.zeros_(m.bias)
+    def forward(self, x):
+        x = self.pre(x)  # (B,1280,8,8)
+        return self.model(x)
+def _find_weights_path() -> str:
+    # 1) explicit env; 2) repo root file; 3) classifiers/ subdir
+    env_p = os.getenv("SDG_CLASSIFIER_WEIGHTS")
+    if env_p and os.path.exists(env_p): return env_p
+    for p in ["safety_classifier_1280.pth", os.path.join("classifiers","safety_classifier_1280.pth")]:
+        if os.path.exists(p): return p
+    # If running from HF cache, these paths are relative to the cached repo folder.
+    raise FileNotFoundError(
+        "Safety-classifier weights not found. Provide via env SDG_CLASSIFIER_WEIGHTS, "
+        "place 'safety_classifier_1280.pth' at repo root or 'classifiers/', "
+        "or pass `classifier_weights=...` to the pipeline call."
+    )
 def load_classifier_1280(
+    weights_path: Optional[str],
+    device: torch.device,
     dtype: torch.dtype = torch.float32
+) -> AdaptiveClassifier1280:
+    path = weights_path or _find_weights_path()
+    ckpt = torch.load(path, map_location="cpu", weights_only=False)
+    # Extract actual state dict
+    if isinstance(ckpt, dict) and "model_state_dict" in ckpt:
+        state = ckpt["model_state_dict"]
+    elif isinstance(ckpt, dict) and any(k.startswith("model.") for k in ckpt.keys()):
+        state = ckpt
+    else:
+        # Fallback: allow whole-object saves (only if trusted)
+        state = ckpt
+    model = AdaptiveClassifier1280().to(device=device, dtype=torch.float32)  # keep classifier in fp32
+    missing, unexpected = model.load_state_dict(state, strict=False)
     if missing or unexpected:
+        print(f"[SDG] load_state_dict: missing={missing[:4]}... ({len(missing)}), unexpected={unexpected[:4]}... ({len(unexpected)})")
     model.eval()
     return model
 def _here(*paths: str) -> str:
     return os.path.join(os.path.dirname(__file__), *paths)
         timesteps = base.scheduler.timesteps
         # 4) classifier (run in fp32)
+        weights = classifier_weights or pick_weights_for_pipe(base)
+        clf = load_classifier_1280(weights, device=device, dtype=torch.float32).eval()
         # 5) mid-block hook
         mid = {}
                     lcat = torch.cat([lin, lin], dim=0)
                     _ = base.unet(lcat, t, encoder_hidden_states=cond_embeds).sample
+                    feat = mid["feat"].detach().to(torch.float32)
                     logits = clf(feat)
                     probs = torch.softmax(logits, dim=-1)
                     unsafe = 1.0 - probs[:, safe_class_index].mean()  # encourage "safe"