pliny-the-prompter commited on
Commit
4837177
Β·
verified Β·
1 Parent(s): ab1b6fe

Upload 129 files

Browse files
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  title: OBLITERATUS
3
- emoji: "\U0001F513"
4
  colorFrom: green
5
  colorTo: gray
6
  sdk: gradio
@@ -302,9 +302,9 @@ Beyond targeted liberation, OBLITERATUS is a general-purpose ablation suite for
302
 
303
  Each strategy enumerates all possible ablations, applies them one at a time, measures the impact, and restores the model β€” giving you a complete map of where the chains are anchored vs. where the mind lives.
304
 
305
- ## 47 curated models across 5 tiers
306
 
307
- OBLITERATUS ships with presets for 47 models organized by compute requirement:
308
 
309
  | Tier | VRAM | Example models |
310
  |------|------|---------------|
 
1
  ---
2
  title: OBLITERATUS
3
+ emoji: "⛓️‍πŸ’₯"
4
  colorFrom: green
5
  colorTo: gray
6
  sdk: gradio
 
302
 
303
  Each strategy enumerates all possible ablations, applies them one at a time, measures the impact, and restores the model β€” giving you a complete map of where the chains are anchored vs. where the mind lives.
304
 
305
+ ## 116 curated models across 5 tiers
306
 
307
+ OBLITERATUS ships with presets for 116 models organized by compute requirement:
308
 
309
  | Tier | VRAM | Example models |
310
  |------|------|---------------|
app.py CHANGED
@@ -324,13 +324,13 @@ _NEEDS_QUANTIZATION = {
324
  }
325
 
326
 
327
- def _should_quantize(model_id: str) -> str | None:
328
  """Return '4bit' if the model needs quantization for available GPU, else None."""
329
  try:
330
  from obliteratus.models.loader import _estimate_model_memory_gb, _available_gpu_memory_gb
331
  from transformers import AutoConfig
332
  token = os.environ.get("HF_TOKEN") or None
333
- config = AutoConfig.from_pretrained(model_id, trust_remote_code=True, token=token)
334
  # Skip if model already ships with native quantization (e.g. Mxfp4Config)
335
  if getattr(config, "quantization_config", None) is not None:
336
  return None
@@ -701,7 +701,7 @@ def benchmark(
701
  if result.status == "running":
702
  run_logs.append(f"{stage_key.upper()} β€” {result.message}")
703
 
704
- quantization = _should_quantize(model_id)
705
 
706
  def run_pipeline():
707
  try:
@@ -1044,7 +1044,7 @@ def benchmark_multi_model(
1044
  def on_stage(result):
1045
  pass
1046
 
1047
- quantization = _should_quantize(model_id)
1048
 
1049
  def run_pipeline():
1050
  try:
@@ -1359,9 +1359,10 @@ def obliterate(model_choice: str, method_choice: str, hub_repo: str,
1359
  _state["model_name"] = model_choice
1360
  _state["method"] = method
1361
 
1362
- global _obliterate_counter
1363
- _obliterate_counter += 1
1364
- save_dir = f"/tmp/obliterated_{_obliterate_counter}"
 
1365
 
1366
  log_lines = []
1367
  last_yielded = [0]
@@ -1387,7 +1388,7 @@ def obliterate(model_choice: str, method_choice: str, hub_repo: str,
1387
  idx = stage_order.get(stage_key, 0)
1388
  progress((idx + 1) / 6, desc=f"{stage_key.upper()}")
1389
 
1390
- quantization = _should_quantize(model_id)
1391
 
1392
  def run_pipeline():
1393
  try:
@@ -1497,7 +1498,7 @@ def obliterate(model_choice: str, method_choice: str, hub_repo: str,
1497
  # Stream log updates while pipeline runs (max 45 minutes to prevent indefinite hang)
1498
  _max_pipeline_secs = 45 * 60
1499
  _pipeline_start = time.time()
1500
- status_msg = f"**Obliterating\u2026** (0s)"
1501
  while worker.is_alive():
1502
  status_msg = f"**Obliterating\u2026** ({_elapsed()})"
1503
  if len(log_lines) > last_yielded[0]:
@@ -2018,8 +2019,8 @@ def load_bench_into_chat(choice: str, progress=gr.Progress()):
2018
  else:
2019
  n = min(len(harmful_all), len(harmless_all))
2020
 
2021
- quantization = _should_quantize(model_id)
2022
  is_preset = cfg["model_choice"] in MODELS
 
2023
 
2024
  pipeline_ref = [None]
2025
  error_ref = [None]
@@ -2319,7 +2320,7 @@ def strength_sweep(model_choice: str, method_choice: str,
2319
 
2320
  def _run_sweep_point():
2321
  try:
2322
- quantization = _should_quantize(model_id)
2323
  pipe = AbliterationPipeline(
2324
  model_id, method=method_key,
2325
  output_dir=f"/tmp/sweep_{step_i}",
 
324
  }
325
 
326
 
327
+ def _should_quantize(model_id: str, is_preset: bool = False) -> str | None:
328
  """Return '4bit' if the model needs quantization for available GPU, else None."""
329
  try:
330
  from obliteratus.models.loader import _estimate_model_memory_gb, _available_gpu_memory_gb
331
  from transformers import AutoConfig
332
  token = os.environ.get("HF_TOKEN") or None
333
+ config = AutoConfig.from_pretrained(model_id, trust_remote_code=is_preset, token=token)
334
  # Skip if model already ships with native quantization (e.g. Mxfp4Config)
335
  if getattr(config, "quantization_config", None) is not None:
336
  return None
 
701
  if result.status == "running":
702
  run_logs.append(f"{stage_key.upper()} β€” {result.message}")
703
 
704
+ quantization = _should_quantize(model_id, is_preset=is_preset)
705
 
706
  def run_pipeline():
707
  try:
 
1044
  def on_stage(result):
1045
  pass
1046
 
1047
+ quantization = _should_quantize(model_id, is_preset=is_preset_model)
1048
 
1049
  def run_pipeline():
1050
  try:
 
1359
  _state["model_name"] = model_choice
1360
  _state["method"] = method
1361
 
1362
+ with _lock:
1363
+ global _obliterate_counter
1364
+ _obliterate_counter += 1
1365
+ save_dir = f"/tmp/obliterated_{_obliterate_counter}"
1366
 
1367
  log_lines = []
1368
  last_yielded = [0]
 
1388
  idx = stage_order.get(stage_key, 0)
1389
  progress((idx + 1) / 6, desc=f"{stage_key.upper()}")
1390
 
1391
+ quantization = _should_quantize(model_id, is_preset=is_preset)
1392
 
1393
  def run_pipeline():
1394
  try:
 
1498
  # Stream log updates while pipeline runs (max 45 minutes to prevent indefinite hang)
1499
  _max_pipeline_secs = 45 * 60
1500
  _pipeline_start = time.time()
1501
+ status_msg = "**Obliterating\u2026** (0s)"
1502
  while worker.is_alive():
1503
  status_msg = f"**Obliterating\u2026** ({_elapsed()})"
1504
  if len(log_lines) > last_yielded[0]:
 
2019
  else:
2020
  n = min(len(harmful_all), len(harmless_all))
2021
 
 
2022
  is_preset = cfg["model_choice"] in MODELS
2023
+ quantization = _should_quantize(model_id, is_preset=is_preset)
2024
 
2025
  pipeline_ref = [None]
2026
  error_ref = [None]
 
2320
 
2321
  def _run_sweep_point():
2322
  try:
2323
+ quantization = _should_quantize(model_id, is_preset=is_preset)
2324
  pipe = AbliterationPipeline(
2325
  model_id, method=method_key,
2326
  output_dir=f"/tmp/sweep_{step_i}",
hf-spaces/README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  title: OBLITERATUS
3
- emoji: "πŸ”“"
4
  colorFrom: green
5
  colorTo: gray
6
  sdk: gradio
 
1
  ---
2
  title: OBLITERATUS
3
+ emoji: "⛓️‍πŸ’₯"
4
  colorFrom: green
5
  colorTo: gray
6
  sdk: gradio
obliteratus/abliterate.py CHANGED
@@ -949,8 +949,14 @@ class AbliterationPipeline:
949
  self.log(f" Router profiling complete: {n_profiled} MoE layers profiled")
950
 
951
  for idx in range(n_layers):
952
- self._harmful_means[idx] = torch.stack(self._harmful_acts[idx]).mean(dim=0)
953
- self._harmless_means[idx] = torch.stack(self._harmless_acts[idx]).mean(dim=0)
 
 
 
 
 
 
954
 
955
  # ── Jailbreak-contrastive probing ─────────────────────────────────
956
  if self.use_jailbreak_contrast:
@@ -1008,18 +1014,31 @@ class AbliterationPipeline:
1008
 
1009
  n = len(prompts)
1010
  self.log(f" Wrapping {n} prompts with chat template")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1011
  wrapped = []
1012
- for i, prompt in enumerate(prompts):
1013
- messages = [{"role": "user", "content": prompt}]
1014
  try:
1015
  text = tokenizer.apply_chat_template(
1016
- messages, tokenize=False, add_generation_prompt=True
1017
  )
1018
  wrapped.append(text)
1019
  except Exception:
1020
- wrapped.append(prompt) # fallback to raw if individual prompt fails
1021
- if (i + 1) % 100 == 0 or (i + 1) == n:
1022
- self.log(f" chat template {i + 1}/{n}")
1023
  return wrapped
1024
 
1025
  @staticmethod
@@ -1426,7 +1445,7 @@ class AbliterationPipeline:
1426
  if n_dirs > 1:
1427
  harmful_stack = torch.stack(self._harmful_acts[idx]).squeeze(1)
1428
  harmless_stack = torch.stack(self._harmless_acts[idx]).squeeze(1)
1429
- diff_matrix = harmful_stack - harmless_stack
1430
  if torch.isfinite(diff_matrix).all():
1431
  k = min(n_dirs, diff_matrix.shape[0], diff_matrix.shape[1])
1432
  _, _, Vh = torch.linalg.svd(diff_matrix, full_matrices=False)
@@ -1475,7 +1494,7 @@ class AbliterationPipeline:
1475
  # SVD-based multi-direction extraction (Gabliteration)
1476
  harmful_stack = torch.stack(self._harmful_acts[idx]).squeeze(1) # (n_prompts, hidden)
1477
  harmless_stack = torch.stack(self._harmless_acts[idx]).squeeze(1)
1478
- diff_matrix = harmful_stack - harmless_stack # (n_prompts, hidden_dim)
1479
 
1480
  # SVD to extract principal refusal directions
1481
  if not torch.isfinite(diff_matrix).all():
@@ -3046,16 +3065,21 @@ class AbliterationPipeline:
3046
  # remove components that lie in both subspaces (violating
3047
  # the GRRO's independent-Ξ±α΅’ assumption; see theory journal
3048
  # Β§12.6 "SAE-SVD Orthogonalization").
3049
- for si in range(sae_dirs.shape[0]):
3050
- for di in range(subspace.shape[0]):
3051
- svd_d = subspace[di].to(sae_dirs.device)
3052
- overlap = sae_dirs[si] @ svd_d
3053
- sae_dirs[si] -= overlap * svd_d
3054
- sae_norm = sae_dirs[si].norm()
3055
- if sae_norm > 1e-8:
3056
- sae_dirs[si] /= sae_norm
3057
- # else: SAE direction was entirely within SVD subspace,
3058
- # will be skipped by the norm check below.
 
 
 
 
 
3059
  sae_count = 0
3060
  # SAE regularization: for inversion modes, use a much
3061
  # gentler floor (0.6 = 40% removal) since these are
@@ -3063,39 +3087,52 @@ class AbliterationPipeline:
3063
  # projection which already uses full reflection.
3064
  sae_reg_floor = 0.6 if self.invert_refusal else 0.3
3065
  sae_reg = max(layer_reg, sae_reg_floor) if not self.invert_refusal else sae_reg_floor
3066
- for si in range(sae_dirs.shape[0]):
 
 
 
 
 
 
 
 
 
 
 
 
3067
  # Skip SAE directions that collapsed to near-zero
3068
  # after orthogonalization (fully redundant with SVD)
3069
- if sae_dirs[si].norm() < 1e-6:
3070
  continue
3071
- sd = sae_dirs[si].to(device).unsqueeze(-1)
3072
- try:
3073
- attn = get_attention_module(layers[idx], arch)
3074
- sae_count += self._project_out_advanced(
3075
- attn, sd, _ATTN_OUT_NAMES,
3076
- norm_preserve=self.norm_preserve,
3077
- regularization=sae_reg,
3078
- )
3079
- except (AttributeError, RuntimeError):
3080
- pass
3081
- try:
3082
- ffn = get_ffn_module(layers[idx], arch)
3083
- fc = self._project_out_advanced(
3084
- ffn, sd, _FFN_OUT_NAMES,
3085
- norm_preserve=self.norm_preserve,
3086
- regularization=sae_reg,
3087
- )
3088
- if fc == 0:
3089
- fc = self._project_moe_experts(
3090
- ffn, sd,
3091
  norm_preserve=self.norm_preserve,
3092
  regularization=sae_reg,
3093
- project_biases=False,
3094
  )
3095
- sae_count += fc
3096
- except (AttributeError, RuntimeError):
3097
- pass
 
 
 
 
 
 
 
3098
  del sd
 
3099
  total_sae_projections += sae_count
3100
  count += sae_count
3101
 
@@ -3156,23 +3193,26 @@ class AbliterationPipeline:
3156
  model = self.handle.model
3157
  if last_strong in self.refusal_subspaces:
3158
  subspace = self.refusal_subspaces[last_strong]
3159
- for dir_idx in range(subspace.shape[0]):
3160
- direction = subspace[dir_idx]
3161
- lm_device = self._get_model_device(model)
3162
- d = direction.to(lm_device).unsqueeze(-1)
3163
- # Try common lm_head attribute names
3164
- for head_name in ["lm_head", "embed_out", "output"]:
3165
- head = getattr(model, head_name, None)
3166
- if head is not None and hasattr(head, "weight"):
3167
- # Inversion: reflect lm_head to flip refusal token logits
3168
- lm_reg = (1.0 - self.reflection_strength) if self.invert_refusal else 0.0
3169
- lm_head_count += self._project_out_advanced(
3170
- model, d, [head_name],
3171
- norm_preserve=self.norm_preserve,
3172
- regularization=lm_reg,
3173
- )
3174
- break
3175
- del d
 
 
 
3176
  if lm_head_count > 0:
3177
  total_modified += lm_head_count
3178
  self.log(f" lm_head: {lm_head_count} projections")
@@ -3339,7 +3379,7 @@ class AbliterationPipeline:
3339
  if n_dirs > 1:
3340
  harmful_stack = torch.stack(self._harmful_acts[idx]).squeeze(1)
3341
  harmless_stack = torch.stack(self._harmless_acts[idx]).squeeze(1)
3342
- diff_matrix = harmful_stack - harmless_stack
3343
  if torch.isfinite(diff_matrix).all():
3344
  k = min(n_dirs, diff_matrix.shape[0], diff_matrix.shape[1])
3345
  _, _, Vh = torch.linalg.svd(diff_matrix, full_matrices=False)
@@ -3374,7 +3414,7 @@ class AbliterationPipeline:
3374
  else:
3375
  harmful_stack = torch.stack(self._harmful_acts[idx]).squeeze(1)
3376
  harmless_stack = torch.stack(self._harmless_acts[idx]).squeeze(1)
3377
- diff_matrix = harmful_stack - harmless_stack
3378
  if not torch.isfinite(diff_matrix).all():
3379
  diff_matrix = torch.nan_to_num(diff_matrix, nan=0.0, posinf=0.0, neginf=0.0)
3380
  k = min(n_dirs, diff_matrix.shape[0], diff_matrix.shape[1])
 
949
  self.log(f" Router profiling complete: {n_profiled} MoE layers profiled")
950
 
951
  for idx in range(n_layers):
952
+ if self._harmful_acts[idx] and self._harmless_acts[idx]:
953
+ self._harmful_means[idx] = torch.stack(self._harmful_acts[idx]).mean(dim=0)
954
+ self._harmless_means[idx] = torch.stack(self._harmless_acts[idx]).mean(dim=0)
955
+ else:
956
+ # Layer produced no activations (hook failure or skipped layer)
957
+ hidden = self._harmful_acts[0][0].shape[-1] if self._harmful_acts.get(0) else 768
958
+ self._harmful_means[idx] = torch.zeros(1, hidden)
959
+ self._harmless_means[idx] = torch.zeros(1, hidden)
960
 
961
  # ── Jailbreak-contrastive probing ─────────────────────────────────
962
  if self.use_jailbreak_contrast:
 
1014
 
1015
  n = len(prompts)
1016
  self.log(f" Wrapping {n} prompts with chat template")
1017
+
1018
+ # Try batch application first (single call, much faster for large sets)
1019
+ all_conversations = [[{"role": "user", "content": p}] for p in prompts]
1020
+ try:
1021
+ wrapped = [
1022
+ tokenizer.apply_chat_template(
1023
+ conv, tokenize=False, add_generation_prompt=True
1024
+ )
1025
+ for conv in all_conversations
1026
+ ]
1027
+ self.log(f" chat template {n}/{n}")
1028
+ return wrapped
1029
+ except Exception:
1030
+ pass # Fall through to per-prompt with error handling
1031
+
1032
  wrapped = []
1033
+ for i, conv in enumerate(all_conversations):
 
1034
  try:
1035
  text = tokenizer.apply_chat_template(
1036
+ conv, tokenize=False, add_generation_prompt=True
1037
  )
1038
  wrapped.append(text)
1039
  except Exception:
1040
+ wrapped.append(prompts[i]) # fallback to raw if individual prompt fails
1041
+ self.log(f" chat template {n}/{n}")
 
1042
  return wrapped
1043
 
1044
  @staticmethod
 
1445
  if n_dirs > 1:
1446
  harmful_stack = torch.stack(self._harmful_acts[idx]).squeeze(1)
1447
  harmless_stack = torch.stack(self._harmless_acts[idx]).squeeze(1)
1448
+ diff_matrix = (harmful_stack - harmless_stack).float()
1449
  if torch.isfinite(diff_matrix).all():
1450
  k = min(n_dirs, diff_matrix.shape[0], diff_matrix.shape[1])
1451
  _, _, Vh = torch.linalg.svd(diff_matrix, full_matrices=False)
 
1494
  # SVD-based multi-direction extraction (Gabliteration)
1495
  harmful_stack = torch.stack(self._harmful_acts[idx]).squeeze(1) # (n_prompts, hidden)
1496
  harmless_stack = torch.stack(self._harmless_acts[idx]).squeeze(1)
1497
+ diff_matrix = (harmful_stack - harmless_stack).float() # float32 for SVD stability
1498
 
1499
  # SVD to extract principal refusal directions
1500
  if not torch.isfinite(diff_matrix).all():
 
3065
  # remove components that lie in both subspaces (violating
3066
  # the GRRO's independent-Ξ±α΅’ assumption; see theory journal
3067
  # Β§12.6 "SAE-SVD Orthogonalization").
3068
+ # Batch orthogonalization: project out SVD subspace from all
3069
+ # SAE directions at once (replaces O(n_sae * n_svd) loop).
3070
+ svd_sub = subspace.to(sae_dirs.device) # (n_svd, hidden_dim)
3071
+ overlaps = sae_dirs @ svd_sub.T # (n_sae, n_svd)
3072
+ sae_dirs -= overlaps @ svd_sub # project out SVD subspace
3073
+ # Zero collapsed directions BEFORE normalizing to avoid
3074
+ # amplifying floating-point noise in near-zero directions.
3075
+ sae_norms = sae_dirs.norm(dim=-1, keepdim=True)
3076
+ collapsed_mask = (sae_norms.squeeze(-1) < 1e-8)
3077
+ if collapsed_mask.any():
3078
+ sae_dirs[collapsed_mask] = 0.0
3079
+ # Re-normalize surviving directions only
3080
+ surviving = ~collapsed_mask
3081
+ if surviving.any():
3082
+ sae_dirs[surviving] = sae_dirs[surviving] / sae_norms[surviving].clamp(min=1e-12)
3083
  sae_count = 0
3084
  # SAE regularization: for inversion modes, use a much
3085
  # gentler floor (0.6 = 40% removal) since these are
 
3087
  # projection which already uses full reflection.
3088
  sae_reg_floor = 0.6 if self.invert_refusal else 0.3
3089
  sae_reg = max(layer_reg, sae_reg_floor) if not self.invert_refusal else sae_reg_floor
3090
+ # Cache module lookups and pre-transfer SAE directions
3091
+ sae_attn = None
3092
+ sae_ffn = None
3093
+ try:
3094
+ sae_attn = get_attention_module(layers[idx], arch)
3095
+ except (AttributeError, RuntimeError):
3096
+ pass
3097
+ try:
3098
+ sae_ffn = get_ffn_module(layers[idx], arch)
3099
+ except (AttributeError, RuntimeError):
3100
+ pass
3101
+ sae_dirs_on_device = sae_dirs.to(device)
3102
+ for si in range(sae_dirs_on_device.shape[0]):
3103
  # Skip SAE directions that collapsed to near-zero
3104
  # after orthogonalization (fully redundant with SVD)
3105
+ if sae_dirs_on_device[si].norm() < 1e-6:
3106
  continue
3107
+ sd = sae_dirs_on_device[si].unsqueeze(-1)
3108
+ if sae_attn is not None:
3109
+ try:
3110
+ sae_count += self._project_out_advanced(
3111
+ sae_attn, sd, _ATTN_OUT_NAMES,
3112
+ norm_preserve=self.norm_preserve,
3113
+ regularization=sae_reg,
3114
+ )
3115
+ except (AttributeError, RuntimeError):
3116
+ pass
3117
+ if sae_ffn is not None:
3118
+ try:
3119
+ fc = self._project_out_advanced(
3120
+ sae_ffn, sd, _FFN_OUT_NAMES,
 
 
 
 
 
 
3121
  norm_preserve=self.norm_preserve,
3122
  regularization=sae_reg,
 
3123
  )
3124
+ if fc == 0:
3125
+ fc = self._project_moe_experts(
3126
+ sae_ffn, sd,
3127
+ norm_preserve=self.norm_preserve,
3128
+ regularization=sae_reg,
3129
+ project_biases=False,
3130
+ )
3131
+ sae_count += fc
3132
+ except (AttributeError, RuntimeError):
3133
+ pass
3134
  del sd
3135
+ del sae_dirs_on_device
3136
  total_sae_projections += sae_count
3137
  count += sae_count
3138
 
 
3193
  model = self.handle.model
3194
  if last_strong in self.refusal_subspaces:
3195
  subspace = self.refusal_subspaces[last_strong]
3196
+ lm_device = self._get_model_device(model)
3197
+ # Pre-transfer subspace and resolve lm_head module once
3198
+ subspace_on_device = subspace.to(lm_device)
3199
+ lm_head_name = None
3200
+ for head_name in ["lm_head", "embed_out", "output"]:
3201
+ head = getattr(model, head_name, None)
3202
+ if head is not None and hasattr(head, "weight"):
3203
+ lm_head_name = head_name
3204
+ break
3205
+ if lm_head_name is not None:
3206
+ lm_reg = (1.0 - self.reflection_strength) if self.invert_refusal else 0.0
3207
+ for dir_idx in range(subspace_on_device.shape[0]):
3208
+ d = subspace_on_device[dir_idx].unsqueeze(-1)
3209
+ lm_head_count += self._project_out_advanced(
3210
+ model, d, [lm_head_name],
3211
+ norm_preserve=self.norm_preserve,
3212
+ regularization=lm_reg,
3213
+ )
3214
+ del d
3215
+ del subspace_on_device
3216
  if lm_head_count > 0:
3217
  total_modified += lm_head_count
3218
  self.log(f" lm_head: {lm_head_count} projections")
 
3379
  if n_dirs > 1:
3380
  harmful_stack = torch.stack(self._harmful_acts[idx]).squeeze(1)
3381
  harmless_stack = torch.stack(self._harmless_acts[idx]).squeeze(1)
3382
+ diff_matrix = (harmful_stack - harmless_stack).float()
3383
  if torch.isfinite(diff_matrix).all():
3384
  k = min(n_dirs, diff_matrix.shape[0], diff_matrix.shape[1])
3385
  _, _, Vh = torch.linalg.svd(diff_matrix, full_matrices=False)
 
3414
  else:
3415
  harmful_stack = torch.stack(self._harmful_acts[idx]).squeeze(1)
3416
  harmless_stack = torch.stack(self._harmless_acts[idx]).squeeze(1)
3417
+ diff_matrix = (harmful_stack - harmless_stack).float() # float32 for SVD stability
3418
  if not torch.isfinite(diff_matrix).all():
3419
  diff_matrix = torch.nan_to_num(diff_matrix, nan=0.0, posinf=0.0, neginf=0.0)
3420
  k = min(n_dirs, diff_matrix.shape[0], diff_matrix.shape[1])
obliteratus/cli.py CHANGED
@@ -43,7 +43,7 @@ def main(argv: list[str] | None = None):
43
  )
44
 
45
  # --- models ---
46
- models_parser = subparsers.add_parser("models", help="Browse 48 curated models by compute tier")
47
  models_parser.add_argument(
48
  "--tier",
49
  type=str,
 
43
  )
44
 
45
  # --- models ---
46
+ models_parser = subparsers.add_parser("models", help="Browse curated models by compute tier")
47
  models_parser.add_argument(
48
  "--tier",
49
  type=str,
scripts/run_benchmark_remote.sh CHANGED
@@ -92,8 +92,8 @@ os.environ.setdefault("CUDA_LAUNCH_BLOCKING", "1")
92
  import torch
93
  import torch.nn as nn
94
 
95
- # Add app dir to path (HF Space layout)
96
- sys.path.insert(0, "/home/user/app")
97
 
98
  # ── Hotpatch: fix device detection for accelerate device_map="auto" ──────
99
  # The deployed Space code uses next(model.parameters()).device which is
 
92
  import torch
93
  import torch.nn as nn
94
 
95
+ # Add app dir to path (HF Space layout: /home/user/app)
96
+ sys.path.insert(0, os.environ.get("APP_DIR", "/home/user/app"))
97
 
98
  # ── Hotpatch: fix device detection for accelerate device_map="auto" ──────
99
  # The deployed Space code uses next(model.parameters()).device which is