Spaces:
Running on Zero
Running on Zero
Upload 133 files
Browse files- app.py +49 -2
- obliteratus/abliterate.py +7 -2
- obliteratus/reporting/report.py +12 -2
- pyproject.toml +1 -1
- tests/test_abliterate.py +13 -4
app.py
CHANGED
|
@@ -164,6 +164,7 @@ _state: dict = {
|
|
| 164 |
"model_name": None,
|
| 165 |
"method": None,
|
| 166 |
"status": "idle", # idle | obliterating | ready
|
|
|
|
| 167 |
"log": [],
|
| 168 |
# Activation steering metadata (survives model reload)
|
| 169 |
"steering": None, # dict with refusal_directions, strong_layers, steering_strength
|
|
@@ -755,6 +756,27 @@ def _should_quantize(model_id: str, is_preset: bool = False) -> str | None:
|
|
| 755 |
# Obliteration
|
| 756 |
# ---------------------------------------------------------------------------
|
| 757 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 758 |
def _clear_gpu():
|
| 759 |
"""Free GPU/accelerator memory. Resilient to device errors."""
|
| 760 |
with _lock:
|
|
@@ -1913,6 +1935,9 @@ def obliterate(model_choice: str, method_choice: str,
|
|
| 1913 |
use_custom = custom_harmful and custom_harmful.strip()
|
| 1914 |
dataset_key = get_source_key_from_label(dataset_source_choice) if dataset_source_choice else "builtin"
|
| 1915 |
|
|
|
|
|
|
|
|
|
|
| 1916 |
_clear_gpu()
|
| 1917 |
with _lock:
|
| 1918 |
if _state["status"] == "obliterating":
|
|
@@ -1920,6 +1945,7 @@ def obliterate(model_choice: str, method_choice: str,
|
|
| 1920 |
return
|
| 1921 |
_state["log"] = []
|
| 1922 |
_state["status"] = "obliterating"
|
|
|
|
| 1923 |
_state["model_name"] = model_choice
|
| 1924 |
_state["method"] = method
|
| 1925 |
|
|
@@ -2094,6 +2120,7 @@ def obliterate(model_choice: str, method_choice: str,
|
|
| 2094 |
log_lines.append(f"\nERROR: {err_msg}")
|
| 2095 |
with _lock:
|
| 2096 |
_state["status"] = "idle"
|
|
|
|
| 2097 |
_state["log"] = log_lines
|
| 2098 |
yield f"**Error:** {err_msg}", "\n".join(log_lines), get_chat_header(), gr.update(), gr.update(), gr.update()
|
| 2099 |
return
|
|
@@ -2107,6 +2134,7 @@ def obliterate(model_choice: str, method_choice: str,
|
|
| 2107 |
# (e.g. import failure caught internally, or early return in worker).
|
| 2108 |
with _lock:
|
| 2109 |
_state["status"] = "idle"
|
|
|
|
| 2110 |
log_lines.append("\nERROR: Pipeline completed but produced no result.")
|
| 2111 |
with _lock:
|
| 2112 |
_state["log"] = log_lines
|
|
@@ -2200,6 +2228,7 @@ def obliterate(model_choice: str, method_choice: str,
|
|
| 2200 |
_state["model"] = pipeline.handle.model
|
| 2201 |
_state["tokenizer"] = pipeline.handle.tokenizer
|
| 2202 |
_state["status"] = "ready"
|
|
|
|
| 2203 |
else:
|
| 2204 |
# Model too large for generation at full precision. Free it and
|
| 2205 |
# reload a smaller copy so the KV cache fits in GPU.
|
|
@@ -2252,6 +2281,7 @@ def obliterate(model_choice: str, method_choice: str,
|
|
| 2252 |
_state["model"] = model_reloaded
|
| 2253 |
_state["tokenizer"] = tokenizer_reloaded
|
| 2254 |
_state["status"] = "ready"
|
|
|
|
| 2255 |
can_generate = True
|
| 2256 |
log_lines.append("Reloaded in 4-bit — chat is ready!")
|
| 2257 |
except Exception as e:
|
|
@@ -2293,6 +2323,7 @@ def obliterate(model_choice: str, method_choice: str,
|
|
| 2293 |
_state["model"] = model_reloaded
|
| 2294 |
_state["tokenizer"] = tokenizer_reloaded
|
| 2295 |
_state["status"] = "ready"
|
|
|
|
| 2296 |
can_generate = True
|
| 2297 |
log_lines.append("Reloaded with CPU offload — chat is ready (may be slower).")
|
| 2298 |
except Exception as e:
|
|
@@ -2300,6 +2331,7 @@ def obliterate(model_choice: str, method_choice: str,
|
|
| 2300 |
log_lines.append("Chat unavailable. Load the saved model on a larger instance.")
|
| 2301 |
with _lock:
|
| 2302 |
_state["status"] = "idle"
|
|
|
|
| 2303 |
|
| 2304 |
# Build metrics summary card while pipeline is still alive
|
| 2305 |
metrics_card = _format_obliteration_metrics(pipeline, method, _elapsed())
|
|
@@ -2346,6 +2378,7 @@ def obliterate(model_choice: str, method_choice: str,
|
|
| 2346 |
log_lines.append(f"\nERROR (post-pipeline): {err_msg}")
|
| 2347 |
with _lock:
|
| 2348 |
_state["status"] = "idle"
|
|
|
|
| 2349 |
_state["log"] = log_lines
|
| 2350 |
yield f"**Error:** {err_msg}", "\n".join(log_lines), get_chat_header(), gr.update(), gr.update(), gr.update()
|
| 2351 |
|
|
@@ -2402,6 +2435,9 @@ def chat_respond(message: str, history: list[dict], system_prompt: str,
|
|
| 2402 |
|
| 2403 |
On ZeroGPU, allocates a GPU for up to 2 minutes per response.
|
| 2404 |
"""
|
|
|
|
|
|
|
|
|
|
| 2405 |
with _lock:
|
| 2406 |
model = _state["model"]
|
| 2407 |
tokenizer = _state["tokenizer"]
|
|
@@ -2418,7 +2454,12 @@ def chat_respond(message: str, history: list[dict], system_prompt: str,
|
|
| 2418 |
if model_dev.type == "meta":
|
| 2419 |
_needs_reload = True
|
| 2420 |
elif dev.is_gpu_available() and model_dev.type not in ("cuda", "mps"):
|
| 2421 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2422 |
except Exception:
|
| 2423 |
_needs_reload = True
|
| 2424 |
|
|
@@ -2707,6 +2748,9 @@ def load_bench_into_chat(choice: str, progress=gr.Progress()):
|
|
| 2707 |
)
|
| 2708 |
return
|
| 2709 |
|
|
|
|
|
|
|
|
|
|
| 2710 |
with _lock:
|
| 2711 |
if _state["status"] == "obliterating":
|
| 2712 |
yield "**Error:** An obliteration is already in progress.", ""
|
|
@@ -2888,7 +2932,10 @@ def ab_chat_respond(message: str, history_left: list[dict], history_right: list[
|
|
| 2888 |
if model_dev.type == "meta":
|
| 2889 |
_needs_reload = True
|
| 2890 |
elif dev.is_gpu_available() and model_dev.type not in ("cuda", "mps"):
|
| 2891 |
-
|
|
|
|
|
|
|
|
|
|
| 2892 |
except Exception:
|
| 2893 |
_needs_reload = True
|
| 2894 |
|
|
|
|
| 164 |
"model_name": None,
|
| 165 |
"method": None,
|
| 166 |
"status": "idle", # idle | obliterating | ready
|
| 167 |
+
"obliterate_started_at": None, # time.time() when obliteration started
|
| 168 |
"log": [],
|
| 169 |
# Activation steering metadata (survives model reload)
|
| 170 |
"steering": None, # dict with refusal_directions, strong_layers, steering_strength
|
|
|
|
| 756 |
# Obliteration
|
| 757 |
# ---------------------------------------------------------------------------
|
| 758 |
|
| 759 |
+
def _unstick_stale_obliterating(max_age: float = 360.0) -> bool:
|
| 760 |
+
"""Reset status from 'obliterating' to 'idle' if it has been stuck too long.
|
| 761 |
+
|
| 762 |
+
ZeroGPU can kill the obliterate generator mid-execution (duration=300s
|
| 763 |
+
timeout), leaving _state["status"] permanently stuck at "obliterating".
|
| 764 |
+
This helper detects that condition and resets to "idle" so the Chat tab
|
| 765 |
+
and subsequent obliterations aren't permanently blocked.
|
| 766 |
+
|
| 767 |
+
Returns True if the status was reset.
|
| 768 |
+
"""
|
| 769 |
+
with _lock:
|
| 770 |
+
if _state["status"] != "obliterating":
|
| 771 |
+
return False
|
| 772 |
+
started = _state.get("obliterate_started_at")
|
| 773 |
+
if started is None or (time.time() - started) > max_age:
|
| 774 |
+
_state["status"] = "idle"
|
| 775 |
+
_state["obliterate_started_at"] = None
|
| 776 |
+
return True
|
| 777 |
+
return False
|
| 778 |
+
|
| 779 |
+
|
| 780 |
def _clear_gpu():
|
| 781 |
"""Free GPU/accelerator memory. Resilient to device errors."""
|
| 782 |
with _lock:
|
|
|
|
| 1935 |
use_custom = custom_harmful and custom_harmful.strip()
|
| 1936 |
dataset_key = get_source_key_from_label(dataset_source_choice) if dataset_source_choice else "builtin"
|
| 1937 |
|
| 1938 |
+
# Unstick stale "obliterating" status left behind by ZeroGPU timeout
|
| 1939 |
+
_unstick_stale_obliterating()
|
| 1940 |
+
|
| 1941 |
_clear_gpu()
|
| 1942 |
with _lock:
|
| 1943 |
if _state["status"] == "obliterating":
|
|
|
|
| 1945 |
return
|
| 1946 |
_state["log"] = []
|
| 1947 |
_state["status"] = "obliterating"
|
| 1948 |
+
_state["obliterate_started_at"] = time.time()
|
| 1949 |
_state["model_name"] = model_choice
|
| 1950 |
_state["method"] = method
|
| 1951 |
|
|
|
|
| 2120 |
log_lines.append(f"\nERROR: {err_msg}")
|
| 2121 |
with _lock:
|
| 2122 |
_state["status"] = "idle"
|
| 2123 |
+
_state["obliterate_started_at"] = None
|
| 2124 |
_state["log"] = log_lines
|
| 2125 |
yield f"**Error:** {err_msg}", "\n".join(log_lines), get_chat_header(), gr.update(), gr.update(), gr.update()
|
| 2126 |
return
|
|
|
|
| 2134 |
# (e.g. import failure caught internally, or early return in worker).
|
| 2135 |
with _lock:
|
| 2136 |
_state["status"] = "idle"
|
| 2137 |
+
_state["obliterate_started_at"] = None
|
| 2138 |
log_lines.append("\nERROR: Pipeline completed but produced no result.")
|
| 2139 |
with _lock:
|
| 2140 |
_state["log"] = log_lines
|
|
|
|
| 2228 |
_state["model"] = pipeline.handle.model
|
| 2229 |
_state["tokenizer"] = pipeline.handle.tokenizer
|
| 2230 |
_state["status"] = "ready"
|
| 2231 |
+
_state["obliterate_started_at"] = None
|
| 2232 |
else:
|
| 2233 |
# Model too large for generation at full precision. Free it and
|
| 2234 |
# reload a smaller copy so the KV cache fits in GPU.
|
|
|
|
| 2281 |
_state["model"] = model_reloaded
|
| 2282 |
_state["tokenizer"] = tokenizer_reloaded
|
| 2283 |
_state["status"] = "ready"
|
| 2284 |
+
_state["obliterate_started_at"] = None
|
| 2285 |
can_generate = True
|
| 2286 |
log_lines.append("Reloaded in 4-bit — chat is ready!")
|
| 2287 |
except Exception as e:
|
|
|
|
| 2323 |
_state["model"] = model_reloaded
|
| 2324 |
_state["tokenizer"] = tokenizer_reloaded
|
| 2325 |
_state["status"] = "ready"
|
| 2326 |
+
_state["obliterate_started_at"] = None
|
| 2327 |
can_generate = True
|
| 2328 |
log_lines.append("Reloaded with CPU offload — chat is ready (may be slower).")
|
| 2329 |
except Exception as e:
|
|
|
|
| 2331 |
log_lines.append("Chat unavailable. Load the saved model on a larger instance.")
|
| 2332 |
with _lock:
|
| 2333 |
_state["status"] = "idle"
|
| 2334 |
+
_state["obliterate_started_at"] = None
|
| 2335 |
|
| 2336 |
# Build metrics summary card while pipeline is still alive
|
| 2337 |
metrics_card = _format_obliteration_metrics(pipeline, method, _elapsed())
|
|
|
|
| 2378 |
log_lines.append(f"\nERROR (post-pipeline): {err_msg}")
|
| 2379 |
with _lock:
|
| 2380 |
_state["status"] = "idle"
|
| 2381 |
+
_state["obliterate_started_at"] = None
|
| 2382 |
_state["log"] = log_lines
|
| 2383 |
yield f"**Error:** {err_msg}", "\n".join(log_lines), get_chat_header(), gr.update(), gr.update(), gr.update()
|
| 2384 |
|
|
|
|
| 2435 |
|
| 2436 |
On ZeroGPU, allocates a GPU for up to 2 minutes per response.
|
| 2437 |
"""
|
| 2438 |
+
# Unstick stale "obliterating" status left behind by ZeroGPU timeout
|
| 2439 |
+
_unstick_stale_obliterating()
|
| 2440 |
+
|
| 2441 |
with _lock:
|
| 2442 |
model = _state["model"]
|
| 2443 |
tokenizer = _state["tokenizer"]
|
|
|
|
| 2454 |
if model_dev.type == "meta":
|
| 2455 |
_needs_reload = True
|
| 2456 |
elif dev.is_gpu_available() and model_dev.type not in ("cuda", "mps"):
|
| 2457 |
+
# Only move to GPU if the model wasn't loaded with device_map
|
| 2458 |
+
# (distributed models can't be moved with a single .to() call).
|
| 2459 |
+
if hasattr(model, "hf_device_map"):
|
| 2460 |
+
_needs_reload = True
|
| 2461 |
+
else:
|
| 2462 |
+
model.to(dev.get_device())
|
| 2463 |
except Exception:
|
| 2464 |
_needs_reload = True
|
| 2465 |
|
|
|
|
| 2748 |
)
|
| 2749 |
return
|
| 2750 |
|
| 2751 |
+
# Unstick stale "obliterating" status left behind by ZeroGPU timeout
|
| 2752 |
+
_unstick_stale_obliterating()
|
| 2753 |
+
|
| 2754 |
with _lock:
|
| 2755 |
if _state["status"] == "obliterating":
|
| 2756 |
yield "**Error:** An obliteration is already in progress.", ""
|
|
|
|
| 2932 |
if model_dev.type == "meta":
|
| 2933 |
_needs_reload = True
|
| 2934 |
elif dev.is_gpu_available() and model_dev.type not in ("cuda", "mps"):
|
| 2935 |
+
if hasattr(abliterated_model, "hf_device_map"):
|
| 2936 |
+
_needs_reload = True
|
| 2937 |
+
else:
|
| 2938 |
+
abliterated_model.to(dev.get_device())
|
| 2939 |
except Exception:
|
| 2940 |
_needs_reload = True
|
| 2941 |
|
obliteratus/abliterate.py
CHANGED
|
@@ -1452,8 +1452,13 @@ class AbliterationPipeline:
|
|
| 1452 |
|
| 1453 |
device = self._get_model_device(model)
|
| 1454 |
|
| 1455 |
-
# Batch prompts for throughput — hooks unbatch per-prompt activations
|
| 1456 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1457 |
# Left-pad so position -1 is always the last real token in every batch element
|
| 1458 |
orig_padding_side = getattr(tokenizer, "padding_side", "right")
|
| 1459 |
if batch_size > 1:
|
|
|
|
| 1452 |
|
| 1453 |
device = self._get_model_device(model)
|
| 1454 |
|
| 1455 |
+
# Batch prompts for throughput — hooks unbatch per-prompt activations.
|
| 1456 |
+
# On CPU-only (free_gb=0), batch_size=4 is safe since system RAM is
|
| 1457 |
+
# typically more abundant than GPU VRAM.
|
| 1458 |
+
if not dev.is_gpu_available():
|
| 1459 |
+
batch_size = 4
|
| 1460 |
+
else:
|
| 1461 |
+
batch_size = 16 if free_gb > _tight_gb else 8 if free_gb > _low_gb else 1
|
| 1462 |
# Left-pad so position -1 is always the last real token in every batch element
|
| 1463 |
orig_padding_side = getattr(tokenizer, "padding_side", "right")
|
| 1464 |
if batch_size > 1:
|
obliteratus/reporting/report.py
CHANGED
|
@@ -144,7 +144,12 @@ class AblationReport:
|
|
| 144 |
if output_path:
|
| 145 |
matplotlib.use("Agg")
|
| 146 |
import matplotlib.pyplot as plt
|
| 147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
|
| 149 |
if metric is None:
|
| 150 |
metric = list(self.baseline_metrics.keys())[0]
|
|
@@ -182,7 +187,12 @@ class AblationReport:
|
|
| 182 |
if output_path:
|
| 183 |
matplotlib.use("Agg")
|
| 184 |
import matplotlib.pyplot as plt
|
| 185 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
|
| 187 |
df = self.to_dataframe()
|
| 188 |
pct_cols = [c for c in df.columns if c.endswith("_pct_change")]
|
|
|
|
| 144 |
if output_path:
|
| 145 |
matplotlib.use("Agg")
|
| 146 |
import matplotlib.pyplot as plt
|
| 147 |
+
try:
|
| 148 |
+
import seaborn as sns
|
| 149 |
+
except ImportError:
|
| 150 |
+
raise ImportError(
|
| 151 |
+
"seaborn is required for plotting. Install it with: pip install seaborn>=0.12"
|
| 152 |
+
)
|
| 153 |
|
| 154 |
if metric is None:
|
| 155 |
metric = list(self.baseline_metrics.keys())[0]
|
|
|
|
| 187 |
if output_path:
|
| 188 |
matplotlib.use("Agg")
|
| 189 |
import matplotlib.pyplot as plt
|
| 190 |
+
try:
|
| 191 |
+
import seaborn as sns
|
| 192 |
+
except ImportError:
|
| 193 |
+
raise ImportError(
|
| 194 |
+
"seaborn is required for plotting. Install it with: pip install seaborn>=0.12"
|
| 195 |
+
)
|
| 196 |
|
| 197 |
df = self.to_dataframe()
|
| 198 |
pct_cols = [c for c in df.columns if c.endswith("_pct_change")]
|
pyproject.toml
CHANGED
|
@@ -34,7 +34,6 @@ dependencies = [
|
|
| 34 |
"numpy>=1.24",
|
| 35 |
"scikit-learn>=1.3",
|
| 36 |
"tqdm>=4.64",
|
| 37 |
-
"bitsandbytes>=0.46.1",
|
| 38 |
]
|
| 39 |
|
| 40 |
[project.urls]
|
|
@@ -44,6 +43,7 @@ dependencies = [
|
|
| 44 |
|
| 45 |
[project.optional-dependencies]
|
| 46 |
dev = ["pytest>=7.0", "pytest-cov", "ruff", "mypy"]
|
|
|
|
| 47 |
spaces = ["gradio>=5.0,<6.0"]
|
| 48 |
|
| 49 |
[project.scripts]
|
|
|
|
| 34 |
"numpy>=1.24",
|
| 35 |
"scikit-learn>=1.3",
|
| 36 |
"tqdm>=4.64",
|
|
|
|
| 37 |
]
|
| 38 |
|
| 39 |
[project.urls]
|
|
|
|
| 43 |
|
| 44 |
[project.optional-dependencies]
|
| 45 |
dev = ["pytest>=7.0", "pytest-cov", "ruff", "mypy"]
|
| 46 |
+
cuda = ["bitsandbytes>=0.46.1"]
|
| 47 |
spaces = ["gradio>=5.0,<6.0"]
|
| 48 |
|
| 49 |
[project.scripts]
|
tests/test_abliterate.py
CHANGED
|
@@ -42,10 +42,19 @@ def _make_tiny_handle():
|
|
| 42 |
tokenizer = MagicMock()
|
| 43 |
tokenizer.pad_token = "<pad>"
|
| 44 |
tokenizer.eos_token = "<eos>"
|
| 45 |
-
tokenizer.
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
tokenizer.decode.return_value = "The capital of France is Paris, a beautiful city"
|
| 50 |
|
| 51 |
handle = ModelHandle(
|
|
|
|
| 42 |
tokenizer = MagicMock()
|
| 43 |
tokenizer.pad_token = "<pad>"
|
| 44 |
tokenizer.eos_token = "<eos>"
|
| 45 |
+
tokenizer.pad_token_id = 0
|
| 46 |
+
tokenizer.eos_token_id = 1
|
| 47 |
+
# Return batch-aware tensors: if called with a list, batch_size = len(list)
|
| 48 |
+
def _mock_tokenize(text_or_list, **kwargs):
|
| 49 |
+
if isinstance(text_or_list, list):
|
| 50 |
+
bs = len(text_or_list)
|
| 51 |
+
else:
|
| 52 |
+
bs = 1
|
| 53 |
+
return {
|
| 54 |
+
"input_ids": torch.randint(0, 1000, (bs, 10)),
|
| 55 |
+
"attention_mask": torch.ones(bs, 10, dtype=torch.long),
|
| 56 |
+
}
|
| 57 |
+
tokenizer.side_effect = _mock_tokenize
|
| 58 |
tokenizer.decode.return_value = "The capital of France is Paris, a beautiful city"
|
| 59 |
|
| 60 |
handle = ModelHandle(
|