pliny-the-prompter commited on
Commit
a55d60a
Β·
verified Β·
1 Parent(s): 0df9125

Upload 129 files

Browse files
README.md CHANGED
@@ -6,7 +6,7 @@ colorTo: gray
6
  sdk: gradio
7
  sdk_version: "5.29.0"
8
  app_file: app.py
9
- persistent_storage: true
10
  pinned: true
11
  license: agpl-3.0
12
  tags:
 
6
  sdk: gradio
7
  sdk_version: "5.29.0"
8
  app_file: app.py
9
+ persistent_storage: large
10
  pinned: true
11
  license: agpl-3.0
12
  tags:
app.py CHANGED
@@ -1577,16 +1577,6 @@ def obliterate(model_choice: str, method_choice: str, hub_repo: str,
1577
  _ts = datetime.now().strftime("%H:%M")
1578
  _short_model = model_id.split("/")[-1] if "/" in model_id else model_id
1579
  _cache_label = f"{method} on {_short_model} ({_ts})"
1580
- _last_obliterated_label = _cache_label
1581
- _session_models[_cache_label] = {
1582
- "model_id": model_id,
1583
- "model_choice": model_choice,
1584
- "method": method,
1585
- "dataset_key": dataset_key if not use_custom else "custom",
1586
- "prompt_volume": prompt_volume,
1587
- "output_dir": save_dir,
1588
- "source": "obliterate",
1589
- }
1590
 
1591
  # Preserve activation steering metadata for re-installation after reload
1592
  steering_meta = None
@@ -1601,6 +1591,16 @@ def obliterate(model_choice: str, method_choice: str, hub_repo: str,
1601
  "steering_strength": pipeline.steering_strength,
1602
  }
1603
  with _lock:
 
 
 
 
 
 
 
 
 
 
1604
  _state["steering"] = steering_meta
1605
  _state["output_dir"] = save_dir # for ZeroGPU checkpoint reload
1606
 
@@ -3813,13 +3813,15 @@ To opt out, set the environment variable `OBLITERATUS_TELEMETRY=0` before launch
3813
  def _load_leaderboard():
3814
  """Load leaderboard data and format as markdown table."""
3815
  try:
3816
- from obliteratus.telemetry import get_leaderboard_data, is_telemetry_enabled
3817
  if not is_telemetry_enabled():
3818
  return "Telemetry is disabled. Remove `OBLITERATUS_TELEMETRY=0` or set it to `1` to re-enable.", ""
3819
 
3820
  data = get_leaderboard_data()
3821
  if not data:
3822
- return "No benchmark results yet. Run a benchmark to populate the leaderboard!", ""
 
 
3823
 
3824
  # Build markdown table
3825
  lines = [
@@ -3848,16 +3850,20 @@ To opt out, set the environment variable `OBLITERATUS_TELEMETRY=0` before launch
3848
  unique_models = len(set(r['model_id'] for r in data))
3849
  unique_methods = len(set(r['method'] for r in data))
3850
 
3851
- # Check data source
3852
  from obliteratus.telemetry import _TELEMETRY_REPO
3853
  source_note = ""
3854
  if _TELEMETRY_REPO:
3855
  source_note = f" | Data source: local + [{_TELEMETRY_REPO}](https://huggingface.co/datasets/{_TELEMETRY_REPO})"
3856
 
 
 
 
 
3857
  summary = (
3858
  f"**{total_runs}** total runs across "
3859
  f"**{unique_models}** models and "
3860
- f"**{unique_methods}** methods{source_note}"
3861
  )
3862
  return table, summary
3863
  except Exception as e:
 
1577
  _ts = datetime.now().strftime("%H:%M")
1578
  _short_model = model_id.split("/")[-1] if "/" in model_id else model_id
1579
  _cache_label = f"{method} on {_short_model} ({_ts})"
 
 
 
 
 
 
 
 
 
 
1580
 
1581
  # Preserve activation steering metadata for re-installation after reload
1582
  steering_meta = None
 
1591
  "steering_strength": pipeline.steering_strength,
1592
  }
1593
  with _lock:
1594
+ _last_obliterated_label = _cache_label
1595
+ _session_models[_cache_label] = {
1596
+ "model_id": model_id,
1597
+ "model_choice": model_choice,
1598
+ "method": method,
1599
+ "dataset_key": dataset_key if not use_custom else "custom",
1600
+ "prompt_volume": prompt_volume,
1601
+ "output_dir": save_dir,
1602
+ "source": "obliterate",
1603
+ }
1604
  _state["steering"] = steering_meta
1605
  _state["output_dir"] = save_dir # for ZeroGPU checkpoint reload
1606
 
 
3813
  def _load_leaderboard():
3814
  """Load leaderboard data and format as markdown table."""
3815
  try:
3816
+ from obliteratus.telemetry import get_leaderboard_data, is_telemetry_enabled, storage_diagnostic
3817
  if not is_telemetry_enabled():
3818
  return "Telemetry is disabled. Remove `OBLITERATUS_TELEMETRY=0` or set it to `1` to re-enable.", ""
3819
 
3820
  data = get_leaderboard_data()
3821
  if not data:
3822
+ diag = storage_diagnostic()
3823
+ storage_info = f"Storage: `{diag['telemetry_dir']}` (persistent={diag['is_persistent']})"
3824
+ return f"No benchmark results yet. Run a benchmark to populate the leaderboard!\n\n{storage_info}", ""
3825
 
3826
  # Build markdown table
3827
  lines = [
 
3850
  unique_models = len(set(r['model_id'] for r in data))
3851
  unique_methods = len(set(r['method'] for r in data))
3852
 
3853
+ # Check data source and storage status
3854
  from obliteratus.telemetry import _TELEMETRY_REPO
3855
  source_note = ""
3856
  if _TELEMETRY_REPO:
3857
  source_note = f" | Data source: local + [{_TELEMETRY_REPO}](https://huggingface.co/datasets/{_TELEMETRY_REPO})"
3858
 
3859
+ diag = storage_diagnostic()
3860
+ persistent_badge = "persistent" if diag["is_persistent"] else "**EPHEMERAL**"
3861
+ storage_note = f" | Storage: `{diag['telemetry_dir']}` ({persistent_badge})"
3862
+
3863
  summary = (
3864
  f"**{total_runs}** total runs across "
3865
  f"**{unique_models}** models and "
3866
+ f"**{unique_methods}** methods{source_note}{storage_note}"
3867
  )
3868
  return table, summary
3869
  except Exception as e:
hf-spaces/README.md CHANGED
@@ -7,7 +7,7 @@ sdk: gradio
7
  sdk_version: "5.29.0"
8
  app_file: app.py
9
  hardware: zero-a10g
10
- persistent_storage: true
11
  pinned: true
12
  license: agpl-3.0
13
  tags:
 
7
  sdk_version: "5.29.0"
8
  app_file: app.py
9
  hardware: zero-a10g
10
+ persistent_storage: large
11
  pinned: true
12
  license: agpl-3.0
13
  tags:
obliteratus/abliterate.py CHANGED
@@ -961,15 +961,23 @@ class AbliterationPipeline:
961
  n_profiled = sum(1 for v in self._routing_harmful.values() if v)
962
  self.log(f" Router profiling complete: {n_profiled} MoE layers profiled")
963
 
 
964
  for idx in range(n_layers):
965
  if self._harmful_acts[idx] and self._harmless_acts[idx]:
966
  self._harmful_means[idx] = torch.stack(self._harmful_acts[idx]).mean(dim=0)
967
  self._harmless_means[idx] = torch.stack(self._harmless_acts[idx]).mean(dim=0)
968
  else:
969
  # Layer produced no activations (hook failure or skipped layer)
 
970
  hidden = self._harmful_acts[0][0].shape[-1] if self._harmful_acts.get(0) else 768
971
  self._harmful_means[idx] = torch.zeros(1, hidden)
972
  self._harmless_means[idx] = torch.zeros(1, hidden)
 
 
 
 
 
 
973
 
974
  # ── Jailbreak-contrastive probing ─────────────────────────────────
975
  if self.use_jailbreak_contrast:
@@ -1421,6 +1429,31 @@ class AbliterationPipeline:
1421
  norms: dict[int, float] = {}
1422
  n_dirs = self.n_directions
1423
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1424
  # Optionally use Wasserstein-optimal direction extraction
1425
  wasserstein_extractor = None
1426
  if self.use_wasserstein_optimal:
@@ -1653,7 +1686,7 @@ class AbliterationPipeline:
1653
  if (is_small_by_layers or is_small_by_capacity or is_small_by_params) and len(self._strong_layers) > 0:
1654
  if is_small_by_layers:
1655
  max_layer_frac = 0.25
1656
- reason = f"≀16 layers"
1657
  else:
1658
  max_layer_frac = 0.20
1659
  reasons = []
@@ -2877,13 +2910,39 @@ class AbliterationPipeline:
2877
  if self.spectral_cascade and self._strong_layers:
2878
  self._apply_spectral_cascade_weights()
2879
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2880
  # Track previous directions for cosine-similarity early-exit
2881
  _prev_directions: dict[int, torch.Tensor] = {}
2882
 
2883
- for pass_num in range(self.refinement_passes):
2884
  modified_this_pass = 0
2885
- if self.refinement_passes > 1:
2886
- self.log(f"Refinement pass {pass_num + 1}/{self.refinement_passes}")
2887
 
2888
  # True iterative refinement: re-probe and re-distill after first pass
2889
  if pass_num > 0 and self.true_iterative_refinement:
@@ -3439,6 +3498,21 @@ class AbliterationPipeline:
3439
  norms: dict[int, float] = {}
3440
  n_dirs = self.n_directions
3441
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3442
  # Use Wasserstein-optimal extraction when enabled (matching main _distill)
3443
  wasserstein_extractor = None
3444
  if self.use_wasserstein_optimal:
 
961
  n_profiled = sum(1 for v in self._routing_harmful.values() if v)
962
  self.log(f" Router profiling complete: {n_profiled} MoE layers profiled")
963
 
964
+ empty_layers = []
965
  for idx in range(n_layers):
966
  if self._harmful_acts[idx] and self._harmless_acts[idx]:
967
  self._harmful_means[idx] = torch.stack(self._harmful_acts[idx]).mean(dim=0)
968
  self._harmless_means[idx] = torch.stack(self._harmless_acts[idx]).mean(dim=0)
969
  else:
970
  # Layer produced no activations (hook failure or skipped layer)
971
+ empty_layers.append(idx)
972
  hidden = self._harmful_acts[0][0].shape[-1] if self._harmful_acts.get(0) else 768
973
  self._harmful_means[idx] = torch.zeros(1, hidden)
974
  self._harmless_means[idx] = torch.zeros(1, hidden)
975
+ if empty_layers:
976
+ self.log(
977
+ f"WARNING: {len(empty_layers)} layers produced no activations "
978
+ f"(layers {empty_layers[:5]}{'...' if len(empty_layers) > 5 else ''}). "
979
+ f"These will be skipped during direction extraction."
980
+ )
981
 
982
  # ── Jailbreak-contrastive probing ─────────────────────────────────
983
  if self.use_jailbreak_contrast:
 
1429
  norms: dict[int, float] = {}
1430
  n_dirs = self.n_directions
1431
 
1432
+ # ── Small-model direction cap ──────────────────────────────────
1433
+ # On small models, each SVD direction removes a proportionally
1434
+ # larger fraction of weight energy. With norm preservation, this
1435
+ # amplifies noise in the remaining dimensions. Cap n_directions
1436
+ # to prevent over-ablation that destroys coherence.
1437
+ hidden_size = self.handle.hidden_size if self.handle else 0
1438
+ total_params = getattr(self.handle, 'total_params', 0) if self.handle else 0
1439
+ if total_params == 0 and self.handle:
1440
+ try:
1441
+ total_params = sum(p.numel() for p in self.handle.model.parameters())
1442
+ except Exception:
1443
+ pass
1444
+ if n_dirs > 1 and (
1445
+ (0 < hidden_size < 2048)
1446
+ or (0 < total_params < 2_000_000_000)
1447
+ or n_layers <= 16
1448
+ ):
1449
+ max_dirs = max(1, min(n_dirs, 2))
1450
+ if max_dirs < n_dirs:
1451
+ self.log(
1452
+ f"Capped n_directions from {n_dirs} to {max_dirs} for small model "
1453
+ f"(hidden={hidden_size}, params={total_params / 1e9:.1f}B, layers={n_layers})"
1454
+ )
1455
+ n_dirs = max_dirs
1456
+
1457
  # Optionally use Wasserstein-optimal direction extraction
1458
  wasserstein_extractor = None
1459
  if self.use_wasserstein_optimal:
 
1686
  if (is_small_by_layers or is_small_by_capacity or is_small_by_params) and len(self._strong_layers) > 0:
1687
  if is_small_by_layers:
1688
  max_layer_frac = 0.25
1689
+ reason = "≀16 layers"
1690
  else:
1691
  max_layer_frac = 0.20
1692
  reasons = []
 
2910
  if self.spectral_cascade and self._strong_layers:
2911
  self._apply_spectral_cascade_weights()
2912
 
2913
+ # ── Guard: compound norm amplification ────────────────────────
2914
+ # When true_iterative_refinement is disabled, subsequent passes
2915
+ # re-apply the SAME projection directions without re-probing.
2916
+ # With norm_preserve=True and regularization > 0, this creates
2917
+ # pathological amplification: each pass removes residual refusal
2918
+ # energy (reg% of previous), then norm-restoration rescales the
2919
+ # entire weight matrix UP to compensate, amplifying non-refusal
2920
+ # components. On small models (< 2B params) where refusal is a
2921
+ # significant fraction of total weight energy, this compounds into
2922
+ # inf perplexity and destroyed coherence.
2923
+ #
2924
+ # Fix: cap to 1 pass when not re-probing + norm-preserving + partial
2925
+ # regularization, since extra passes are purely destructive noise
2926
+ # amplification in this configuration.
2927
+ effective_passes = self.refinement_passes
2928
+ if (effective_passes > 1
2929
+ and not self.true_iterative_refinement
2930
+ and self.norm_preserve
2931
+ and self.regularization > 0):
2932
+ self.log(
2933
+ f"Capping refinement_passes from {effective_passes} to 1: "
2934
+ f"norm_preserve + regularization without re-probing causes "
2935
+ f"compound amplification (directions are not re-extracted)"
2936
+ )
2937
+ effective_passes = 1
2938
+
2939
  # Track previous directions for cosine-similarity early-exit
2940
  _prev_directions: dict[int, torch.Tensor] = {}
2941
 
2942
+ for pass_num in range(effective_passes):
2943
  modified_this_pass = 0
2944
+ if effective_passes > 1:
2945
+ self.log(f"Refinement pass {pass_num + 1}/{effective_passes}")
2946
 
2947
  # True iterative refinement: re-probe and re-distill after first pass
2948
  if pass_num > 0 and self.true_iterative_refinement:
 
3498
  norms: dict[int, float] = {}
3499
  n_dirs = self.n_directions
3500
 
3501
+ # Small-model direction cap (matching main _distill)
3502
+ hidden_size = self.handle.hidden_size if self.handle else 0
3503
+ total_params = getattr(self.handle, 'total_params', 0) if self.handle else 0
3504
+ if total_params == 0 and self.handle:
3505
+ try:
3506
+ total_params = sum(p.numel() for p in self.handle.model.parameters())
3507
+ except Exception:
3508
+ pass
3509
+ if n_dirs > 1 and (
3510
+ (0 < hidden_size < 2048)
3511
+ or (0 < total_params < 2_000_000_000)
3512
+ or n_layers <= 16
3513
+ ):
3514
+ n_dirs = max(1, min(n_dirs, 2))
3515
+
3516
  # Use Wasserstein-optimal extraction when enabled (matching main _distill)
3517
  wasserstein_extractor = None
3518
  if self.use_wasserstein_optimal:
obliteratus/telemetry.py CHANGED
@@ -72,31 +72,101 @@ _hub_sync_lock = threading.Lock()
72
  _hub_repo_created: bool = False
73
 
74
  # Locate writable telemetry directory
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  def _telemetry_dir() -> Path:
76
  """Find a writable directory for telemetry storage.
77
 
78
- Prefers HuggingFace Spaces persistent storage (/data) when available,
79
- so that telemetry and leaderboard data survive container rebuilds.
 
 
 
 
 
 
 
 
80
  """
81
- candidates = [
82
- # HF Spaces persistent storage β€” survives container rebuilds
83
- Path("/data/obliteratus"),
84
- Path.home() / ".obliteratus",
85
- Path("/tmp/obliteratus_telemetry"),
86
- ]
87
- for d in candidates:
88
- try:
89
- d.mkdir(parents=True, exist_ok=True)
90
- # Test writability
91
- test_file = d / ".write_test"
92
- test_file.write_text("ok")
93
- test_file.unlink()
94
- return d
95
- except (PermissionError, OSError):
96
- continue
97
- # Last resort
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  fallback = Path("/tmp/obliteratus_telemetry")
99
  fallback.mkdir(parents=True, exist_ok=True)
 
 
 
 
 
 
 
 
100
  return fallback
101
 
102
 
@@ -107,6 +177,35 @@ TELEMETRY_FILE = _TELEMETRY_DIR / "telemetry.jsonl"
107
  _write_lock = threading.Lock()
108
 
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  def disable_telemetry():
111
  """Disable telemetry collection."""
112
  global _TELEMETRY_ENABLED, _enabled
@@ -371,6 +470,97 @@ def fetch_hub_records(max_records: int = 10000) -> list[dict[str, Any]]:
371
  return []
372
 
373
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
  # ── Hardware detection ────────────────────────────────────────────────
375
 
376
  def _detect_gpu() -> tuple[str, float]:
 
72
  _hub_repo_created: bool = False
73
 
74
  # Locate writable telemetry directory
75
+ def _is_mount_point(path: Path) -> bool:
76
+ """Check if a path is a mount point (different device from parent)."""
77
+ try:
78
+ if not path.exists():
79
+ return False
80
+ return path.stat().st_dev != path.parent.stat().st_dev
81
+ except (OSError, ValueError):
82
+ return False
83
+
84
+
85
+ def _test_writable(d: Path) -> bool:
86
+ """Test if a directory exists and is writable."""
87
+ try:
88
+ d.mkdir(parents=True, exist_ok=True)
89
+ test_file = d / ".write_test"
90
+ test_file.write_text("ok")
91
+ test_file.unlink()
92
+ return True
93
+ except (PermissionError, OSError):
94
+ return False
95
+
96
+
97
  def _telemetry_dir() -> Path:
98
  """Find a writable directory for telemetry storage.
99
 
100
+ Priority order:
101
+ 1. ``OBLITERATUS_DATA_DIR`` env var (explicit override)
102
+ 2. HuggingFace Spaces persistent storage (``/data/obliteratus``)
103
+ β€” survives container rebuilds and factory resets
104
+ 3. ``~/.obliteratus`` (local installs)
105
+ 4. ``/tmp/obliteratus_telemetry`` (last resort β€” does NOT survive rebuilds)
106
+
107
+ On HF Spaces, ``/data`` is the persistent storage mount point. If it
108
+ exists as a real mount but isn't writable yet (race during boot), we
109
+ retry briefly before falling through.
110
  """
111
+ # 1. Explicit override β€” always wins
112
+ explicit = os.environ.get("OBLITERATUS_DATA_DIR")
113
+ if explicit:
114
+ p = Path(explicit)
115
+ if _test_writable(p):
116
+ logger.info("Telemetry storage: %s (OBLITERATUS_DATA_DIR)", p)
117
+ return p
118
+ logger.warning(
119
+ "OBLITERATUS_DATA_DIR=%s is not writable, falling through", explicit
120
+ )
121
+
122
+ # 2. HF Spaces persistent storage at /data
123
+ if _ON_HF_SPACES:
124
+ data_root = Path("/data")
125
+ hf_dir = data_root / "obliteratus"
126
+ # On Spaces, /data may take a moment to mount after container start.
127
+ # Retry a few times if the directory exists as a mount point but
128
+ # isn't writable yet.
129
+ if data_root.exists():
130
+ for attempt in range(3):
131
+ if _test_writable(hf_dir):
132
+ if attempt > 0:
133
+ logger.info(
134
+ "Telemetry storage: %s (HF persistent, ready after %d retries)",
135
+ hf_dir, attempt,
136
+ )
137
+ else:
138
+ logger.info("Telemetry storage: %s (HF persistent storage)", hf_dir)
139
+ return hf_dir
140
+ # Brief wait for mount to become ready
141
+ if attempt < 2:
142
+ time.sleep(1)
143
+ # /data exists but isn't writable β€” warn loudly
144
+ is_mount = _is_mount_point(data_root)
145
+ logger.warning(
146
+ "/data exists (mount_point=%s) but /data/obliteratus is NOT writable. "
147
+ "Persistent storage may not be enabled for this Space. "
148
+ "Data will NOT survive factory rebuilds! "
149
+ "Enable persistent storage in Space settings or set OBLITERATUS_DATA_DIR.",
150
+ is_mount,
151
+ )
152
+
153
+ # 3. Home directory (local installs)
154
+ home_dir = Path.home() / ".obliteratus"
155
+ if _test_writable(home_dir):
156
+ logger.info("Telemetry storage: %s (home directory)", home_dir)
157
+ return home_dir
158
+
159
+ # 4. Last resort β€” /tmp does NOT survive rebuilds
160
  fallback = Path("/tmp/obliteratus_telemetry")
161
  fallback.mkdir(parents=True, exist_ok=True)
162
+ if _ON_HF_SPACES:
163
+ logger.warning(
164
+ "Telemetry storage: %s β€” this is EPHEMERAL and will be lost on rebuild! "
165
+ "Enable persistent storage in your Space settings.",
166
+ fallback,
167
+ )
168
+ else:
169
+ logger.info("Telemetry storage: %s (temporary)", fallback)
170
  return fallback
171
 
172
 
 
177
  _write_lock = threading.Lock()
178
 
179
 
180
+ def _is_persistent_storage() -> bool:
181
+ """Check if the current telemetry directory is on persistent storage."""
182
+ return str(_TELEMETRY_DIR).startswith("/data")
183
+
184
+
185
+ def storage_diagnostic() -> dict[str, Any]:
186
+ """Return a diagnostic dict about the current storage configuration.
187
+
188
+ Useful for debugging persistent storage issues on HF Spaces.
189
+ """
190
+ data_root = Path("/data")
191
+ return {
192
+ "telemetry_dir": str(_TELEMETRY_DIR),
193
+ "telemetry_file": str(TELEMETRY_FILE),
194
+ "telemetry_file_exists": TELEMETRY_FILE.exists(),
195
+ "telemetry_file_size_bytes": (
196
+ TELEMETRY_FILE.stat().st_size if TELEMETRY_FILE.exists() else 0
197
+ ),
198
+ "is_persistent": _is_persistent_storage(),
199
+ "on_hf_spaces": _ON_HF_SPACES,
200
+ "data_dir_exists": data_root.exists(),
201
+ "data_dir_is_mount": _is_mount_point(data_root),
202
+ "data_dir_writable": os.access(data_root, os.W_OK) if data_root.exists() else False,
203
+ "explicit_data_dir": os.environ.get("OBLITERATUS_DATA_DIR", ""),
204
+ "telemetry_repo": _TELEMETRY_REPO,
205
+ "telemetry_enabled": is_enabled(),
206
+ }
207
+
208
+
209
  def disable_telemetry():
210
  """Disable telemetry collection."""
211
  global _TELEMETRY_ENABLED, _enabled
 
470
  return []
471
 
472
 
473
+ # ── Hub restore (warm-start after rebuild) ────────────────────────────
474
+
475
+ _restore_done = False
476
+ _restore_lock = threading.Lock()
477
+
478
+
479
+ def restore_from_hub() -> int:
480
+ """Download community records from Hub into the local JSONL file.
481
+
482
+ This is the critical path for surviving factory rebuilds: even if
483
+ ``/data`` is wiped or unavailable, we can reconstruct the leaderboard
484
+ from the central Hub dataset on startup.
485
+
486
+ Records already present locally (by ``(session_id, timestamp)`` key)
487
+ are skipped to avoid duplicates.
488
+
489
+ Returns the number of new records restored.
490
+ """
491
+ global _restore_done
492
+ if _restore_done:
493
+ return 0
494
+ with _restore_lock:
495
+ if _restore_done:
496
+ return 0
497
+ _restore_done = True
498
+
499
+ repo = _TELEMETRY_REPO
500
+ if not repo:
501
+ return 0
502
+
503
+ try:
504
+ # Read existing local keys for dedup
505
+ existing_keys: set[tuple[str, str]] = set()
506
+ if TELEMETRY_FILE.exists():
507
+ try:
508
+ with open(TELEMETRY_FILE) as f:
509
+ for line in f:
510
+ line = line.strip()
511
+ if not line:
512
+ continue
513
+ try:
514
+ r = json.loads(line)
515
+ existing_keys.add(
516
+ (r.get("session_id", ""), r.get("timestamp", ""))
517
+ )
518
+ except json.JSONDecodeError:
519
+ continue
520
+ except Exception:
521
+ pass
522
+
523
+ hub_records = fetch_hub_records()
524
+ if not hub_records:
525
+ return 0
526
+
527
+ new_count = 0
528
+ with _write_lock:
529
+ with open(TELEMETRY_FILE, "a") as f:
530
+ for r in hub_records:
531
+ key = (r.get("session_id", ""), r.get("timestamp", ""))
532
+ if key in existing_keys:
533
+ continue
534
+ existing_keys.add(key)
535
+ f.write(json.dumps(r, default=str) + "\n")
536
+ new_count += 1
537
+
538
+ if new_count:
539
+ logger.info(
540
+ "Restored %d records from Hub repo %s to local storage at %s",
541
+ new_count, repo, TELEMETRY_FILE,
542
+ )
543
+ return new_count
544
+ except Exception as e:
545
+ logger.debug("Hub restore failed: %s", e)
546
+ return 0
547
+
548
+
549
+ def _restore_from_hub_bg() -> None:
550
+ """Background thread: restore Hub records to local on startup."""
551
+ try:
552
+ restore_from_hub()
553
+ except Exception as e:
554
+ logger.debug("Background Hub restore failed: %s", e)
555
+
556
+
557
+ # Auto-restore on HF Spaces startup (background, non-blocking).
558
+ # This ensures the leaderboard has data even after a factory rebuild.
559
+ if _ON_HF_SPACES and is_enabled() and _TELEMETRY_REPO:
560
+ _restore_thread = threading.Thread(target=_restore_from_hub_bg, daemon=True)
561
+ _restore_thread.start()
562
+
563
+
564
  # ── Hardware detection ────────────────────────────────────────────────
565
 
566
  def _detect_gpu() -> tuple[str, float]:
tests/test_abliterate.py CHANGED
@@ -1756,7 +1756,12 @@ class TestDistillBasic:
1756
 
1757
  class TestDistillSVD:
1758
  def test_multi_direction_svd(self, handle):
1759
- """Advanced method: SVD extracts multiple refusal directions."""
 
 
 
 
 
1760
  from obliteratus.strategies.utils import get_layer_modules
1761
 
1762
  pipeline = AbliterationPipeline(
@@ -1775,10 +1780,10 @@ class TestDistillSVD:
1775
 
1776
  n_layers = len(get_layer_modules(handle))
1777
  assert len(pipeline.refusal_subspaces) == n_layers
 
 
1778
  for idx, subspace in pipeline.refusal_subspaces.items():
1779
- # Should have min(n_directions, n_prompts, hidden_dim) directions
1780
- n_dirs = min(pipeline.n_directions, 5, handle.hidden_size)
1781
- assert subspace.shape[0] == n_dirs
1782
  assert subspace.shape[1] == handle.hidden_size
1783
 
1784
  # Primary direction should still be a unit vector
 
1756
 
1757
  class TestDistillSVD:
1758
  def test_multi_direction_svd(self, handle):
1759
+ """Advanced method: SVD extracts multiple refusal directions.
1760
+
1761
+ Note: on small models (hidden_size < 2048 or < 2B params), n_directions
1762
+ is automatically capped to 2 to prevent over-ablation. The test model
1763
+ (hidden_size=64, 4 layers) triggers this safeguard.
1764
+ """
1765
  from obliteratus.strategies.utils import get_layer_modules
1766
 
1767
  pipeline = AbliterationPipeline(
 
1780
 
1781
  n_layers = len(get_layer_modules(handle))
1782
  assert len(pipeline.refusal_subspaces) == n_layers
1783
+ # Small-model cap: n_directions capped to 2 for tiny test model
1784
+ expected_dirs = min(2, pipeline.n_directions, 5, handle.hidden_size)
1785
  for idx, subspace in pipeline.refusal_subspaces.items():
1786
+ assert subspace.shape[0] == expected_dirs
 
 
1787
  assert subspace.shape[1] == handle.hidden_size
1788
 
1789
  # Primary direction should still be a unit vector
tests/test_telemetry.py CHANGED
@@ -2,7 +2,9 @@
2
 
3
  import json
4
  import os
 
5
  from dataclasses import dataclass, field
 
6
  from unittest.mock import MagicMock, patch
7
 
8
  import torch
@@ -13,13 +15,17 @@ from obliteratus.telemetry import (
13
  _extract_excise_details,
14
  _extract_prompt_counts,
15
  _extract_analysis_insights,
 
 
16
  build_report,
17
  disable_telemetry,
18
  enable_telemetry,
19
  is_enabled,
20
  maybe_send_informed_report,
21
  maybe_send_pipeline_report,
 
22
  send_report,
 
23
  )
24
 
25
 
@@ -597,3 +603,94 @@ class TestStageDurationTracking:
597
 
598
  p._emit("summon", "running", "loading...", duration=0)
599
  assert p._stage_durations == {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  import json
4
  import os
5
+ import tempfile
6
  from dataclasses import dataclass, field
7
+ from pathlib import Path
8
  from unittest.mock import MagicMock, patch
9
 
10
  import torch
 
15
  _extract_excise_details,
16
  _extract_prompt_counts,
17
  _extract_analysis_insights,
18
+ _is_mount_point,
19
+ _test_writable,
20
  build_report,
21
  disable_telemetry,
22
  enable_telemetry,
23
  is_enabled,
24
  maybe_send_informed_report,
25
  maybe_send_pipeline_report,
26
+ restore_from_hub,
27
  send_report,
28
+ storage_diagnostic,
29
  )
30
 
31
 
 
603
 
604
  p._emit("summon", "running", "loading...", duration=0)
605
  assert p._stage_durations == {}
606
+
607
+
608
+ # ── Storage helpers ──────────────────────────────────────────────────────
609
+
610
+
611
+ class TestStorageHelpers:
612
+ """Test persistent storage helper functions."""
613
+
614
+ def test_test_writable_valid_dir(self):
615
+ with tempfile.TemporaryDirectory() as d:
616
+ assert _test_writable(Path(d) / "subdir")
617
+
618
+ def test_test_writable_unwritable(self):
619
+ # /proc is never writable for arbitrary files
620
+ assert not _test_writable(Path("/proc/obliteratus_test"))
621
+
622
+ def test_is_mount_point_existing_path(self):
623
+ # Should return a bool without raising for any existing path
624
+ result = _is_mount_point(Path("/"))
625
+ assert isinstance(result, bool)
626
+
627
+ def test_is_mount_point_nonexistent(self):
628
+ assert not _is_mount_point(Path("/nonexistent_dir_12345"))
629
+
630
+ def test_storage_diagnostic_returns_dict(self):
631
+ diag = storage_diagnostic()
632
+ assert isinstance(diag, dict)
633
+ assert "telemetry_dir" in diag
634
+ assert "is_persistent" in diag
635
+ assert "on_hf_spaces" in diag
636
+ assert "telemetry_enabled" in diag
637
+ assert "data_dir_exists" in diag
638
+
639
+
640
+ # ── Hub restore ──────────────────────────────────────────────────────────
641
+
642
+
643
+ class TestHubRestore:
644
+ """Test Hub-to-local restore functionality."""
645
+
646
+ def setup_method(self):
647
+ _reset_telemetry()
648
+ # Reset restore state so each test can trigger it
649
+ import obliteratus.telemetry as t
650
+ t._restore_done = False
651
+
652
+ def test_restore_skips_when_no_repo(self):
653
+ with patch("obliteratus.telemetry._TELEMETRY_REPO", ""):
654
+ assert restore_from_hub() == 0
655
+
656
+ def test_restore_deduplicates(self):
657
+ """Records already in local JSONL should not be re-added."""
658
+ import obliteratus.telemetry as t
659
+
660
+ with tempfile.TemporaryDirectory() as d:
661
+ test_file = Path(d) / "telemetry.jsonl"
662
+ existing = {"session_id": "abc", "timestamp": "2025-01-01T00:00:00"}
663
+ test_file.write_text(json.dumps(existing) + "\n")
664
+
665
+ old_file = t.TELEMETRY_FILE
666
+ old_repo = t._TELEMETRY_REPO
667
+ t.TELEMETRY_FILE = test_file
668
+ t._TELEMETRY_REPO = "test/repo"
669
+ t._restore_done = False
670
+
671
+ try:
672
+ hub_records = [
673
+ {"session_id": "abc", "timestamp": "2025-01-01T00:00:00"}, # duplicate
674
+ {"session_id": "def", "timestamp": "2025-01-02T00:00:00"}, # new
675
+ ]
676
+ with patch("obliteratus.telemetry.fetch_hub_records", return_value=hub_records):
677
+ count = restore_from_hub()
678
+ assert count == 1 # Only the new record
679
+
680
+ # Verify file contents
681
+ lines = test_file.read_text().strip().split("\n")
682
+ assert len(lines) == 2 # original + 1 new
683
+ finally:
684
+ t.TELEMETRY_FILE = old_file
685
+ t._TELEMETRY_REPO = old_repo
686
+
687
+ def test_restore_only_runs_once(self):
688
+ """Calling restore_from_hub() twice should be a no-op the second time."""
689
+ import obliteratus.telemetry as t
690
+ t._restore_done = False
691
+
692
+ with patch("obliteratus.telemetry._TELEMETRY_REPO", "test/repo"):
693
+ with patch("obliteratus.telemetry.fetch_hub_records", return_value=[]):
694
+ restore_from_hub()
695
+ # Second call should return 0 immediately
696
+ assert restore_from_hub() == 0