Spaces:

anthonym21
/

safety-lens

Running on Zero

App Files Files Community

anthonym21 commited on 20 days ago

Commit

4328168

verified ·

1 Parent(s): 30b5d0d

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +41 -20

app.py CHANGED Viewed

@@ -22,6 +22,33 @@ DEFAULT_LAYER = 6
 NUM_GENERATE_TOKENS = 30
 def load_model(model_id: str, layer_idx: int):
     """Load a model and calibrate persona vectors."""
     status_lines = [f"Loading {model_id}..."]
@@ -31,30 +58,24 @@ def load_model(model_id: str, layer_idx: int):
     if tokenizer.pad_token is None:
         tokenizer.pad_token = tokenizer.eos_token
-    dtype = torch.float16 if torch.cuda.is_available() else torch.float32
-    device_map = "auto" if torch.cuda.is_available() else "cpu"
-    model = AutoModelForCausalLM.from_pretrained(
-        model_id, torch_dtype=dtype, device_map=device_map
-    )
-    model.eval()
-    lens = SafetyLens(model, tokenizer)
-    _state["lens"] = lens
-    _state["model"] = model
-    _state["tokenizer"] = tokenizer
-    _state["vectors"] = {}
-    status_lines.append(f"Loaded on {lens.device}. Calibrating persona vectors on layer {layer_idx}...")
     yield "\n".join(status_lines), None, None
-    for name, stim in STIMULUS_SETS.items():
-        vec = lens.extract_persona_vector(stim["pos"], stim["neg"], layer_idx)
-        _state["vectors"][name] = vec
-        status_lines.append(f"  Calibrated: {name}")
-        yield "\n".join(status_lines), None, None
-    status_lines.append("Ready for scanning.")
     yield "\n".join(status_lines), None, None

 NUM_GENERATE_TOKENS = 30
+def _calibrate_on_gpu(model, tokenizer, layer_idx: int):
+    """Calibrate persona vectors — runs inside @spaces.GPU on ZeroGPU."""
+    if torch.cuda.is_available():
+        model = model.half().to("cuda")
+    model.eval()
+    lens = SafetyLens(model, tokenizer)
+    _state["lens"] = lens
+    _state["model"] = model
+    _state["tokenizer"] = tokenizer
+    _state["vectors"] = {}
+    vectors = {}
+    for name, stim in STIMULUS_SETS.items():
+        vec = lens.extract_persona_vector(stim["pos"], stim["neg"], layer_idx)
+        vectors[name] = vec
+    _state["vectors"] = vectors
+    return lens.device, list(vectors.keys())
+# Wrap calibration for ZeroGPU when on HF Spaces
+if IS_HF_SPACE:
+    _calibrate_on_gpu = spaces.GPU()(_calibrate_on_gpu)
 def load_model(model_id: str, layer_idx: int):
     """Load a model and calibrate persona vectors."""
     status_lines = [f"Loading {model_id}..."]
     if tokenizer.pad_token is None:
         tokenizer.pad_token = tokenizer.eos_token
+    # On ZeroGPU, load on CPU first — GPU is only available inside @spaces.GPU
+    if IS_HF_SPACE:
+        model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float32)
+    else:
+        dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+        device_map = "auto" if torch.cuda.is_available() else "cpu"
+        model = AutoModelForCausalLM.from_pretrained(
+            model_id, torch_dtype=dtype, device_map=device_map
+        )
+    status_lines.append(f"Model loaded. Calibrating persona vectors on layer {layer_idx}...")
     yield "\n".join(status_lines), None, None
+    device, calibrated = _calibrate_on_gpu(model, tokenizer, layer_idx)
+    for name in calibrated:
+        status_lines.append(f"  Calibrated: {name}")
+    status_lines.append(f"Ready for scanning on {device}.")
     yield "\n".join(status_lines), None, None