obliteratus

Running on Zero

App Files Files Community

pliny-the-prompter commited on Mar 6

Commit

d169612

verified ·

1 Parent(s): b46e97f

Upload 134 files

Browse files

Files changed (13) hide show

app.py +453 -217
obliteratus/.DS_Store +0 -0
obliteratus/abliterate.py +43 -42
obliteratus/analysis/sae_abliteration.py +4 -3
obliteratus/device.py +305 -0
obliteratus/evaluation/benchmarks.py +2 -2
obliteratus/evaluation/heretic_eval.py +6 -8
obliteratus/interactive.py +14 -9
obliteratus/mlx_backend.py +469 -0
obliteratus/models/loader.py +60 -62
obliteratus/reproducibility.py +2 -2
obliteratus/tourney.py +2 -3
requirements-apple.txt +8 -0

app.py CHANGED Viewed

@@ -57,6 +57,7 @@ if "HF_HOME" not in os.environ:
 import gradio as gr
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 # ── ZeroGPU support ─────────────────────────────────────────────────
@@ -399,6 +400,213 @@ def _validate_hub_repo(hub_repo: str) -> str:
     return ""
 PROMPT_VOLUMES = {
     "33 (fast)": 33,
     "66 (better signal)": 66,
@@ -447,25 +655,11 @@ def _should_quantize(model_id: str, is_preset: bool = False) -> str | None:
 # ---------------------------------------------------------------------------
 def _clear_gpu():
-    """Free GPU memory.  Resilient to CUDA errors (e.g. after illegal memory access)."""
     with _lock:
         _state["model"] = None
         _state["tokenizer"] = None
-    gc.collect()
-    if torch.cuda.is_available():
-        try:
-            torch.cuda.empty_cache()
-        except Exception:
-            # CUDA context may be poisoned after an illegal-address error;
-            # attempt a device reset so subsequent loads can succeed.
-            try:
-                torch.cuda.synchronize()
-            except Exception:
-                pass
-            try:
-                torch.cuda.reset_peak_memory_stats()
-            except Exception:
-                pass
 def _install_steering_hooks(model, steering_meta: dict) -> int:
@@ -589,16 +783,16 @@ def _cleanup_disk():
 # ---------------------------------------------------------------------------
 def _get_vram_html() -> str:
-    """Return an HTML snippet showing GPU VRAM usage as a styled bar."""
-    if not torch.cuda.is_available():
         return (
             '<div style="text-align:center;color:#4a5568;font-size:0.72rem;'
             'letter-spacing:1px;margin-top:6px;">CPU ONLY — NO GPU DETECTED</div>'
         )
     try:
-        used = torch.cuda.memory_allocated() / 1024**3
-        reserved = torch.cuda.memory_reserved() / 1024**3
-        total = torch.cuda.get_device_properties(0).total_memory / 1024**3
         pct = (used / total * 100) if total > 0 else 0
         # Color shifts from green → yellow → red
         if pct < 50:
@@ -607,12 +801,17 @@ def _get_vram_html() -> str:
             bar_color = "#ffcc00"
         else:
             bar_color = "#ff003c"
-        device_name = torch.cuda.get_device_name(0)
         return (
             f'<div style="margin:6px auto 0;max-width:480px;">'
             f'<div style="display:flex;justify-content:space-between;font-size:0.68rem;'
             f'color:#4a5568;letter-spacing:1px;margin-bottom:2px;">'
-            f'<span>GPU: {device_name}</span>'
             f'<span>{used:.1f} / {total:.1f} GB ({pct:.0f}%)</span></div>'
             f'<div style="background:#0a0a0f;border:1px solid #1a1f2e;border-radius:3px;'
             f'height:10px;overflow:hidden;">'
@@ -620,11 +819,11 @@ def _get_vram_html() -> str:
             f'box-shadow:0 0 6px {bar_color};transition:width 0.5s ease;"></div></div>'
             f'<div style="display:flex;justify-content:space-between;font-size:0.6rem;'
             f'color:#333;margin-top:1px;">'
-            f'<span style="color:#4a5568;">reserved: {reserved:.1f} GB</span></div>'
             f'</div>'
         )
     except Exception:
-        return '<div style="text-align:center;color:#4a5568;font-size:0.72rem;">VRAM: unavailable</div>'
 # ---------------------------------------------------------------------------
@@ -1067,8 +1266,7 @@ def benchmark(
                 pass
             pipeline_ref[0] = None
         gc.collect()
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
         yield (
             f"**{method_key} complete** ({mi + 1}/{len(methods_to_test)}) \u2014 {_bench_elapsed()}",
@@ -1418,8 +1616,7 @@ def benchmark_multi_model(
                 pass
             pipeline_ref[0] = None
         gc.collect()
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
         yield (
             f"**{model_id} complete** ({mi + 1}/{len(model_choices)}) \u2014 {_mm_elapsed()}",
@@ -1518,7 +1715,6 @@ def _format_multi_model_results(results: list[dict], context: dict | None = None
 @spaces.GPU(duration=300)
 def obliterate(model_choice: str, method_choice: str,
-               hub_auto_push: bool, hub_repo: str,
                prompt_volume_choice: str, dataset_source_choice: str,
                custom_harmful: str, custom_harmless: str,
                # Advanced params (sliders)
@@ -1551,14 +1747,6 @@ def obliterate(model_choice: str, method_choice: str,
     model_id = MODELS.get(model_choice, model_choice)
     is_preset = model_choice in MODELS
     method = METHODS.get(method_choice, "advanced")
-    # Resolve push-to-hub: explicit repo overrides auto-naming
-    _hub_override = hub_repo.strip() if hub_repo and hub_repo.strip() else None
-    if _hub_override:
-        push_to_hub = _hub_override
-    elif hub_auto_push:
-        push_to_hub = "auto"  # resolved to {user}/{model}-OBLITERATED at push time
-    else:
-        push_to_hub = None
     prompt_volume = PROMPT_VOLUMES.get(prompt_volume_choice, 33)
     # Resolve "adaptive" → telemetry-recommended method for this model
@@ -1606,26 +1794,6 @@ def obliterate(model_choice: str, method_choice: str,
         )
         return
-    # Early validation: Hub repo format + token availability
-    # Resolve which token to use: user's own HF_TOKEN, or the shared community token.
-    _user_token = os.environ.get("HF_TOKEN")
-    _hub_token = _user_token or _HUB_COMMUNITY_TOKEN
-    _hub_org = None if _user_token else _HUB_COMMUNITY_ORG  # community org only when using shared token
-    if push_to_hub:
-        if push_to_hub != "auto" and not re.match(r'^[a-zA-Z0-9_-]+/[a-zA-Z0-9_.-]+$', push_to_hub):
-            yield (
-                "**Error:** Invalid Hub repo format. Use `username/model-name`.",
-                "", gr.update(), gr.update(), gr.update(), gr.update(),
-            )
-            return
-        if not _hub_token:
-            yield (
-                "**Error:** No Hub token available. Set HF_TOKEN or OBLITERATUS_HUB_TOKEN "
-                "as an environment variable or Space secret.",
-                "", gr.update(), gr.update(), gr.update(), gr.update(),
-            )
-            return
     # Resolve dataset source — custom prompts override the dropdown
     use_custom = custom_harmful and custom_harmful.strip()
     dataset_key = get_source_key_from_label(dataset_source_choice) if dataset_source_choice else "builtin"
@@ -1699,9 +1867,6 @@ def obliterate(model_choice: str, method_choice: str,
                     output_dir=save_dir,
                     device="auto",
                     dtype="float16",
-                    push_to_hub=push_to_hub,
-                    hub_token=_hub_token,
-                    hub_community_org=_hub_org,
                     quantization=quantization,
                     trust_remote_code=is_preset,
                     harmful_prompts=harmful_all[:n],
@@ -1719,9 +1884,6 @@ def obliterate(model_choice: str, method_choice: str,
                     device="auto",
                     dtype="float16",
                     method=method,
-                    push_to_hub=push_to_hub,
-                    hub_token=_hub_token,
-                    hub_community_org=_hub_org,
                     quantization=quantization,
                     trust_remote_code=is_preset,
                     harmful_prompts=harmful_all[:n],
@@ -1774,12 +1936,6 @@ def obliterate(model_choice: str, method_choice: str,
     log_lines.append(f"Dataset: {source_label}")
     vol_label = "all" if prompt_volume == -1 else str(prompt_volume)
     log_lines.append(f"Prompt volume: {vol_label} pairs")
-    if push_to_hub:
-        if push_to_hub == "auto":
-            _ns = _hub_org or "{you}"
-            log_lines.append(f"Push to Hub: auto ({_ns}/{{model}}-OBLITERATED)")
-        else:
-            log_lines.append(f"Push to Hub: {push_to_hub}")
     if quantization:
         log_lines.append(f"Quantization: {quantization} (auto-detected for GPU fit)")
     log_lines.append("")
@@ -2118,11 +2274,11 @@ def chat_respond(message: str, history: list[dict], system_prompt: str,
     _needs_reload = model is None or tokenizer is None
     if not _needs_reload:
         try:
-            dev = next(model.parameters()).device
-            if dev.type == "meta":
                 _needs_reload = True
-            elif torch.cuda.is_available() and dev.type != "cuda":
-                model.to("cuda")
         except Exception:
             _needs_reload = True
@@ -2552,11 +2708,11 @@ def ab_chat_respond(message: str, history_left: list[dict], history_right: list[
     _needs_reload = abliterated_model is None or tokenizer is None
     if not _needs_reload:
         try:
-            dev = next(abliterated_model.parameters()).device
-            if dev.type == "meta":
                 _needs_reload = True
-            elif torch.cuda.is_available() and dev.type != "cuda":
-                abliterated_model.to("cuda")
         except Exception:
             _needs_reload = True
@@ -2689,8 +2845,7 @@ def ab_chat_respond(message: str, history_left: list[dict], history_right: list[
     abl_device = next(abliterated_model.parameters()).device
     abliterated_model.to("cpu")
     gc.collect()
-    if torch.cuda.is_available():
-        torch.cuda.empty_cache()
     model_id = MODELS.get(model_name, model_name)
     # Only trust remote code for known preset models, not arbitrary user-supplied IDs
@@ -2742,8 +2897,7 @@ def ab_chat_respond(message: str, history_left: list[dict], history_right: list[
         # Free the original model
         del original_model
         gc.collect()
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
     except Exception as e:
         original_response = f"*Could not load original model for comparison: {e}*"
@@ -2752,7 +2906,7 @@ def ab_chat_respond(message: str, history_left: list[dict], history_right: list[
     # Use torch.device("cuda") rather than the captured abl_device, since
     # on ZeroGPU the original device reference may point to a stale context.
     try:
-        restore_device = torch.device("cuda") if torch.cuda.is_available() else abl_device
         abliterated_model.to(restore_device)
     except Exception:
         pass  # If GPU restore fails, model stays on CPU (still usable)
@@ -2870,8 +3024,7 @@ def strength_sweep(model_choice: str, method_choice: str,
         # Cleanup between runs
         gc.collect()
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
     # Generate dose-response curve
     gallery = None
@@ -2963,6 +3116,117 @@ def _format_sweep_results(results: list[dict]) -> str:
     return "\n".join(lines)
 # ---------------------------------------------------------------------------
 # Export Research Artifacts
 # ---------------------------------------------------------------------------
@@ -3523,20 +3787,10 @@ with gr.Blocks(theme=THEME, css=CSS, js=_JS, title="OBLITERATUS", fill_height=Tr
                         lines=5,
                     )
-            with gr.Row():
-                hub_auto_push = gr.Checkbox(
-                    label="Auto-push to Hub",
-                    value=False,
-                    info=f"Pushes your model to {_HUB_COMMUNITY_ORG}/{{model}}-OBLITERATED on HF Hub. "
-                         "No token needed — works out of the box!",
-                )
-                hub_repo = gr.Textbox(
-                    label="Push to Hub (optional override)",
-                    placeholder="auto-filled when checkbox is ticked, or type your own",
-                    info="Leave blank with checkbox ticked for auto-naming, "
-                         "or enter a custom repo ID (e.g. your-username/my-model).",
-                )
-            hub_warning_md = gr.Markdown("")
             # ── Advanced Settings (auto-populated from method preset) ────
             _defaults = _get_preset_defaults("advanced (recommended)")
@@ -4168,33 +4422,19 @@ tradeoff point where refusal is minimized with minimal capability damage.
         with gr.Tab("Tourney", id="tourney"):
             gr.Markdown("""### March Madness Tournament
 Pit **all abliteration methods** against each other in elimination rounds.
-The winner gets auto-pushed to HuggingFace Hub.
 **Round 1 — Qualifiers:** All methods, reduced prompts. Bottom half eliminated.
 **Round 2 — Semifinals:** Survivors, full prompts. Bottom half eliminated.
 **Round 3 — Finals:** Top contenders, maximum prompts. Champion crowned.
 """)
-            with gr.Row():
-                with gr.Column(scale=2):
-                    tourney_model_dd = gr.Dropdown(
-                        choices=list(MODELS.keys()),
-                        value="Alibaba (Qwen) / Qwen3-4B",
-                        label="Target Model",
-                        info="Select a model to tournament-abliterate",
-                        allow_custom_value=True,
-                    )
-                with gr.Column(scale=1):
-                    tourney_hub_org = gr.Textbox(
-                        label="HF Hub Org (optional)",
-                        placeholder="my-org",
-                        info="Push winner to hub-org/model-name-OBLITERATED",
-                    )
-                with gr.Column(scale=1):
-                    tourney_hub_repo = gr.Textbox(
-                        label="HF Hub Repo (optional)",
-                        placeholder="org/repo-name",
-                        info="Overrides Hub Org — full repo ID",
-                    )
             with gr.Accordion("Advanced Settings", open=False):
                 with gr.Row():
@@ -4223,97 +4463,12 @@ The winner gets auto-pushed to HuggingFace Hub.
                 interactive=False,
             )
-            @tourney_btn.click(
-                inputs=[tourney_model_dd, tourney_hub_org, tourney_hub_repo,
                         tourney_dataset_dd, tourney_quant_dd],
                 outputs=[tourney_status, tourney_bracket, tourney_log],
             )
-            def run_tourney(model_choice, hub_org, hub_repo, dataset, quantization):
-                if not model_choice or not model_choice.strip():
-                    yield "**Error:** Select a model first.", "", ""
-                    return
-                from obliteratus.tourney import TourneyRunner, render_bracket
-                # Resolve display label → HuggingFace model ID
-                model_id = model_choice.strip()
-                if model_id in MODELS:
-                    model_id = MODELS[model_id]
-                hub_org_val = hub_org.strip() if hub_org and hub_org.strip() else None
-                hub_repo_val = hub_repo.strip() if hub_repo and hub_repo.strip() else None
-                quant = quantization if quantization != "none" else None
-                log_lines = []
-                def on_log(msg):
-                    log_lines.append(msg)
-                def on_round(rnd):
-                    pass  # logged via on_log
-                dataset_key = get_source_key_from_label(dataset) if dataset else "builtin"
-                runner = TourneyRunner(
-                    model_name=model_id,
-                    hub_org=hub_org_val,
-                    hub_repo=hub_repo_val,
-                    dataset_key=dataset_key,
-                    quantization=quant,
-                    on_log=on_log,
-                    on_round=on_round,
-                )
-                # Yield progress updates during tournament
-                import threading
-                result_ref = [None]
-                error_ref = [None]
-                def _run():
-                    try:
-                        result_ref[0] = runner.run()
-                    except Exception as e:
-                        error_ref[0] = e
-                thread = threading.Thread(target=_run, daemon=True)
-                thread.start()
-                while thread.is_alive():
-                    yield (
-                        "**Tournament in progress...**",
-                        "",
-                        "\n".join(log_lines[-100:]),
-                    )
-                    time.sleep(1.0)
-                thread.join(timeout=10)
-                if error_ref[0]:
-                    yield (
-                        f"**Error:** {error_ref[0]}",
-                        "",
-                        "\n".join(log_lines),
-                    )
-                    return
-                result = result_ref[0]
-                if result and result.winner:
-                    bracket_md = render_bracket(result)
-                    hub_msg = ""
-                    if result.hub_repo:
-                        hub_msg = f"\nPushed to [{result.hub_repo}](https://huggingface.co/{result.hub_repo})"
-                    yield (
-                        f"**Champion: `{result.winner.method}`** "
-                        f"(score: {result.winner.score:.4f}){hub_msg}",
-                        bracket_md,
-                        "\n".join(log_lines),
-                    )
-                else:
-                    yield (
-                        "**Tournament complete** — no winner determined.",
-                        "",
-                        "\n".join(log_lines),
-                    )
         # ── Tab 7: Export ────────────────────────────────────────────────���
         with gr.Tab("Export", id="export"):
@@ -4336,7 +4491,94 @@ Download all intermediate data from your last obliteration run as a ZIP archive.
                 outputs=[export_file, export_status],
             )
-        # ── Tab 7: Leaderboard ────────────────────────────────────────────
         with gr.Tab("Leaderboard", id="leaderboard"):
             gr.Markdown("""### Community Leaderboard
 All benchmark results from **every OBLITERATUS Space** (including duplicated copies) are
@@ -4562,12 +4804,6 @@ Built on the shoulders of:
         outputs=[prompt_vol_dd, dataset_info_md],
     )
-    # Wire hub repo → live validation
-    hub_repo.change(
-        fn=_validate_hub_repo,
-        inputs=[hub_repo],
-        outputs=[hub_warning_md],
-    )
     # Wire benchmark → Chat/A/B cross-tab dropdown updates
     bench_btn.click(
@@ -4616,7 +4852,7 @@ Built on the shoulders of:
     # may not fire after generator teardown.
     obliterate_btn.click(
         fn=obliterate,
-        inputs=[model_dd, method_dd, hub_auto_push, hub_repo, prompt_vol_dd, dataset_dd,
                 custom_harmful_tb, custom_harmless_tb] + _adv_controls,
         outputs=[status_md, log_box, chat_status, session_model_dd, metrics_md, ab_session_model_dd],
     ).then(

 import gradio as gr
 import torch
+from obliteratus import device as dev
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 # ── ZeroGPU support ─────────────────────────────────────────────────
     return ""
+# ---------------------------------------------------------------------------
+# Push to Hub — dedicated tab backend
+# ---------------------------------------------------------------------------
+def _generate_model_card(meta: dict) -> str:
+    """Generate a HuggingFace model card README for a session model."""
+    model_id = meta.get("model_id", "unknown")
+    method = meta.get("method", "unknown")
+    source = meta.get("source", "obliterate")
+    short_model = model_id.split("/")[-1] if "/" in model_id else model_id
+    metrics_table = ""
+    tourney_metrics = meta.get("tourney_metrics")
+    if tourney_metrics:
+        rows = "\n".join(
+            f"| {k.replace('_', ' ').title()} | {v:.4f} |"
+            for k, v in tourney_metrics.items() if isinstance(v, (int, float))
+        )
+        metrics_table = f"\n## Metrics\n\n| Metric | Value |\n|--------|-------|\n{rows}\n"
+    return f"""---
+language: en
+tags:
+  - obliteratus
+  - abliteration
+  - uncensored
+  - {source}
+base_model: {model_id}
+---
+# {short_model}-OBLITERATED
+This model was abliterated using the **`{method}`** method via
+[OBLITERATUS](https://github.com/elder-plinius/OBLITERATUS).
+| Detail | Value |
+|--------|-------|
+| Base model | `{model_id}` |
+| Method | `{method}` |
+| Source | {source} |
+{metrics_table}
+## How to Use
+```python
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("{short_model}-OBLITERATED")
+tokenizer = AutoTokenizer.from_pretrained("{short_model}-OBLITERATED")
+prompt = "Hello, how are you?"
+inputs = tokenizer(prompt, return_tensors="pt")
+outputs = model.generate(**inputs, max_new_tokens=256)
+print(tokenizer.decode(outputs[0], skip_special_tokens=True))
+```
+## About OBLITERATUS
+OBLITERATUS is an open-source tool for removing refusal behavior from language
+models via activation engineering (abliteration). Learn more at
+[github.com/elder-plinius/OBLITERATUS](https://github.com/elder-plinius/OBLITERATUS).
+"""
+def _get_hub_session_info(label: str) -> str:
+    """Return a markdown summary of the selected session model."""
+    if not label or label.startswith("("):
+        return ""
+    meta = _session_models.get(label)
+    if not meta:
+        return "*Session model not found — try refreshing the list.*"
+    lines = [
+        f"**Model:** `{meta.get('model_id', 'unknown')}`",
+        f"**Method:** `{meta.get('method', 'unknown')}`",
+        f"**Source:** {meta.get('source', 'unknown')}",
+        f"**Path:** `{meta.get('output_dir', 'N/A')}`",
+    ]
+    score = meta.get("tourney_score")
+    if score is not None:
+        lines.append(f"**Tourney score:** {score:.4f}")
+    return "\n".join(lines)
+def _auto_hub_repo_id(label: str) -> str:
+    """Generate an auto-filled Hub repo ID for the selected session model."""
+    meta = _session_models.get(label)
+    if not meta:
+        return ""
+    model_id = meta.get("model_id", "")
+    import re
+    short = model_id.split("/")[-1] if "/" in model_id else model_id
+    short = re.sub(r"[^a-zA-Z0-9\-.]", "-", short)
+    return f"{_HUB_COMMUNITY_ORG}/{short}-OBLITERATED"
+def push_session_to_hub(
+    session_label: str,
+    hub_repo_id: str,
+    hub_token_input: str,
+    refine_enabled: bool,
+    refine_regularization: float,
+    refine_passes: int,
+    progress=gr.Progress(),
+):
+    """Push a session model to HuggingFace Hub, with optional refinement."""
+    import os
+    import re
+    if not session_label or session_label.startswith("("):
+        yield "**Error:** Select a session model first.", ""
+        return
+    meta = _session_models.get(session_label)
+    if not meta:
+        yield "**Error:** Session model not found. Try refreshing the list.", ""
+        return
+    output_dir = meta.get("output_dir", "")
+    if not output_dir or not Path(output_dir).exists():
+        yield f"**Error:** Model directory not found: `{output_dir}`", ""
+        return
+    # Resolve repo ID
+    repo_id = hub_repo_id.strip() if hub_repo_id else ""
+    if not repo_id:
+        repo_id = _auto_hub_repo_id(session_label)
+    if not repo_id:
+        yield "**Error:** Could not determine Hub repo ID.", ""
+        return
+    if not re.match(r'^[a-zA-Z0-9_-]+/[a-zA-Z0-9_.-]+$', repo_id):
+        yield "**Error:** Invalid repo format. Use `username/model-name`.", ""
+        return
+    # Resolve token
+    token = hub_token_input.strip() if hub_token_input else None
+    if not token:
+        token = os.environ.get("HF_TOKEN") or _HUB_COMMUNITY_TOKEN
+    if not token:
+        yield (
+            "**Error:** No Hub token available. Enter a token above, "
+            "or set `HF_TOKEN` / `OBLITERATUS_HUB_TOKEN` as an environment variable.",
+            "",
+        )
+        return
+    # Optional refinement pass
+    if refine_enabled and refine_passes > 0:
+        progress(0.1, desc="Refining model...")
+        yield "Applying refinement passes...", ""
+        try:
+            from obliteratus.abliterate import AbliterationPipeline
+            from obliteratus.prompts import load_dataset_source
+            dataset_key = meta.get("dataset_key", "builtin")
+            if dataset_key == "custom":
+                dataset_key = "builtin"
+            harmful, harmless = load_dataset_source(dataset_key)
+            n = min(33, len(harmful), len(harmless))
+            pipeline = AbliterationPipeline(
+                model_name=output_dir,  # load from saved checkpoint
+                output_dir=output_dir,
+                device="auto",
+                dtype="float16",
+                method=meta.get("method", "advanced"),
+                regularization=refine_regularization,
+                refinement_passes=refine_passes,
+                harmful_prompts=harmful[:n],
+                harmless_prompts=harmless[:n],
+            )
+            pipeline.run()
+        except Exception as e:
+            yield f"**Refinement failed:** {e}", ""
+            return
+    # Generate model card
+    progress(0.5, desc="Generating model card...")
+    yield f"Generating model card and uploading to `{repo_id}`...", ""
+    card_content = _generate_model_card(meta)
+    card_path = Path(output_dir) / "README.md"
+    card_path.write_text(card_content)
+    # Upload to Hub
+    progress(0.6, desc="Uploading to Hub...")
+    try:
+        from huggingface_hub import HfApi
+        api = HfApi(token=token)
+        api.create_repo(repo_id, exist_ok=True)
+        method = meta.get("method", "unknown")
+        model_id = meta.get("model_id", "unknown")
+        api.upload_folder(
+            folder_path=output_dir,
+            repo_id=repo_id,
+            commit_message=f"OBLITERATUS: {method} on {model_id}",
+        )
+    except Exception as e:
+        yield f"**Upload failed:** {e}", ""
+        return
+    progress(1.0, desc="Done!")
+    hub_url = f"https://huggingface.co/{repo_id}"
+    yield (
+        f"**Pushed successfully to [{repo_id}]({hub_url})**",
+        f"[Open on HuggingFace Hub]({hub_url})",
+    )
 PROMPT_VOLUMES = {
     "33 (fast)": 33,
     "66 (better signal)": 66,
 # ---------------------------------------------------------------------------
 def _clear_gpu():
+    """Free GPU/accelerator memory.  Resilient to device errors."""
     with _lock:
         _state["model"] = None
         _state["tokenizer"] = None
+    dev.free_gpu_memory()
 def _install_steering_hooks(model, steering_meta: dict) -> int:
 # ---------------------------------------------------------------------------
 def _get_vram_html() -> str:
+    """Return an HTML snippet showing GPU/accelerator memory usage as a styled bar."""
+    if not dev.is_gpu_available():
         return (
             '<div style="text-align:center;color:#4a5568;font-size:0.72rem;'
             'letter-spacing:1px;margin-top:6px;">CPU ONLY — NO GPU DETECTED</div>'
         )
     try:
+        mem = dev.get_memory_info()
+        used = mem.used_gb
+        total = mem.total_gb
         pct = (used / total * 100) if total > 0 else 0
         # Color shifts from green → yellow → red
         if pct < 50:
             bar_color = "#ffcc00"
         else:
             bar_color = "#ff003c"
+        device_name = mem.device_name
+        reserved_html = (
+            f'<span style="color:#4a5568;">reserved: {mem.reserved_gb:.1f} GB</span>'
+            if mem.reserved_gb > 0
+            else f'<span style="color:#4a5568;">unified memory</span>'
+        )
         return (
             f'<div style="margin:6px auto 0;max-width:480px;">'
             f'<div style="display:flex;justify-content:space-between;font-size:0.68rem;'
             f'color:#4a5568;letter-spacing:1px;margin-bottom:2px;">'
+            f'<span>{device_name}</span>'
             f'<span>{used:.1f} / {total:.1f} GB ({pct:.0f}%)</span></div>'
             f'<div style="background:#0a0a0f;border:1px solid #1a1f2e;border-radius:3px;'
             f'height:10px;overflow:hidden;">'
             f'box-shadow:0 0 6px {bar_color};transition:width 0.5s ease;"></div></div>'
             f'<div style="display:flex;justify-content:space-between;font-size:0.6rem;'
             f'color:#333;margin-top:1px;">'
+            f'{reserved_html}</div>'
             f'</div>'
         )
     except Exception:
+        return '<div style="text-align:center;color:#4a5568;font-size:0.72rem;">Memory: unavailable</div>'
 # ---------------------------------------------------------------------------
                 pass
             pipeline_ref[0] = None
         gc.collect()
+        dev.empty_cache()
         yield (
             f"**{method_key} complete** ({mi + 1}/{len(methods_to_test)}) \u2014 {_bench_elapsed()}",
                 pass
             pipeline_ref[0] = None
         gc.collect()
+        dev.empty_cache()
         yield (
             f"**{model_id} complete** ({mi + 1}/{len(model_choices)}) \u2014 {_mm_elapsed()}",
 @spaces.GPU(duration=300)
 def obliterate(model_choice: str, method_choice: str,
                prompt_volume_choice: str, dataset_source_choice: str,
                custom_harmful: str, custom_harmless: str,
                # Advanced params (sliders)
     model_id = MODELS.get(model_choice, model_choice)
     is_preset = model_choice in MODELS
     method = METHODS.get(method_choice, "advanced")
     prompt_volume = PROMPT_VOLUMES.get(prompt_volume_choice, 33)
     # Resolve "adaptive" → telemetry-recommended method for this model
         )
         return
     # Resolve dataset source — custom prompts override the dropdown
     use_custom = custom_harmful and custom_harmful.strip()
     dataset_key = get_source_key_from_label(dataset_source_choice) if dataset_source_choice else "builtin"
                     output_dir=save_dir,
                     device="auto",
                     dtype="float16",
                     quantization=quantization,
                     trust_remote_code=is_preset,
                     harmful_prompts=harmful_all[:n],
                     device="auto",
                     dtype="float16",
                     method=method,
                     quantization=quantization,
                     trust_remote_code=is_preset,
                     harmful_prompts=harmful_all[:n],
     log_lines.append(f"Dataset: {source_label}")
     vol_label = "all" if prompt_volume == -1 else str(prompt_volume)
     log_lines.append(f"Prompt volume: {vol_label} pairs")
     if quantization:
         log_lines.append(f"Quantization: {quantization} (auto-detected for GPU fit)")
     log_lines.append("")
     _needs_reload = model is None or tokenizer is None
     if not _needs_reload:
         try:
+            model_dev = next(model.parameters()).device
+            if model_dev.type == "meta":
                 _needs_reload = True
+            elif dev.is_gpu_available() and model_dev.type not in ("cuda", "mps"):
+                model.to(dev.get_device())
         except Exception:
             _needs_reload = True
     _needs_reload = abliterated_model is None or tokenizer is None
     if not _needs_reload:
         try:
+            model_dev = next(abliterated_model.parameters()).device
+            if model_dev.type == "meta":
                 _needs_reload = True
+            elif dev.is_gpu_available() and model_dev.type not in ("cuda", "mps"):
+                abliterated_model.to(dev.get_device())
         except Exception:
             _needs_reload = True
     abl_device = next(abliterated_model.parameters()).device
     abliterated_model.to("cpu")
     gc.collect()
+    dev.empty_cache()
     model_id = MODELS.get(model_name, model_name)
     # Only trust remote code for known preset models, not arbitrary user-supplied IDs
         # Free the original model
         del original_model
         gc.collect()
+        dev.empty_cache()
     except Exception as e:
         original_response = f"*Could not load original model for comparison: {e}*"
     # Use torch.device("cuda") rather than the captured abl_device, since
     # on ZeroGPU the original device reference may point to a stale context.
     try:
+        restore_device = torch.device(dev.get_device()) if dev.is_gpu_available() else abl_device
         abliterated_model.to(restore_device)
     except Exception:
         pass  # If GPU restore fails, model stays on CPU (still usable)
         # Cleanup between runs
         gc.collect()
+        dev.empty_cache()
     # Generate dose-response curve
     gallery = None
     return "\n".join(lines)
+# ---------------------------------------------------------------------------
+# Tournament
+# ---------------------------------------------------------------------------
+@spaces.GPU(duration=300)
+def run_tourney(model_choice, dataset, quantization):
+    """Run an elimination tournament across all abliteration methods.
+    On ZeroGPU Spaces the @spaces.GPU decorator allocates a GPU for the
+    duration of the tournament (up to 5 minutes).
+    """
+    if not model_choice or not model_choice.strip():
+        yield "**Error:** Select a model first.", "", ""
+        return
+    from obliteratus.tourney import TourneyRunner, render_bracket
+    # Resolve display label → HuggingFace model ID
+    model_id = model_choice.strip()
+    if model_id in MODELS:
+        model_id = MODELS[model_id]
+    quant = quantization if quantization != "none" else None
+    log_lines = []
+    def on_log(msg):
+        log_lines.append(msg)
+    def on_round(rnd):
+        pass  # logged via on_log
+    dataset_key = get_source_key_from_label(dataset) if dataset else "builtin"
+    runner = TourneyRunner(
+        model_name=model_id,
+        hub_org=None,
+        hub_repo=None,
+        dataset_key=dataset_key,
+        quantization=quant,
+        on_log=on_log,
+        on_round=on_round,
+    )
+    # Run tournament in a background thread so we can yield progress
+    import threading
+    result_ref = [None]
+    error_ref = [None]
+    def _run():
+        try:
+            result_ref[0] = runner.run()
+        except Exception as e:
+            error_ref[0] = e
+    thread = threading.Thread(target=_run, daemon=True)
+    thread.start()
+    while thread.is_alive():
+        yield (
+            "**Tournament in progress...**",
+            "",
+            "\n".join(log_lines[-100:]),
+        )
+        time.sleep(1.0)
+    thread.join(timeout=10)
+    if error_ref[0]:
+        yield (
+            f"**Error:** {error_ref[0]}",
+            "",
+            "\n".join(log_lines),
+        )
+        return
+    result = result_ref[0]
+    if result and result.winner:
+        bracket_md = render_bracket(result)
+        # Register winner in session models for Push to Hub tab
+        if result.winner.output_dir:
+            _ts = datetime.now().strftime("%H:%M")
+            _short = model_id.split("/")[-1] if "/" in model_id else model_id
+            _label = f"tourney winner ({result.winner.method}) on {_short} ({_ts})"
+            with _lock:
+                _session_models[_label] = {
+                    "model_id": model_id,
+                    "model_choice": model_choice,
+                    "method": result.winner.method,
+                    "dataset_key": dataset_key,
+                    "prompt_volume": 0,
+                    "output_dir": result.winner.output_dir,
+                    "source": "tourney",
+                    "tourney_score": result.winner.score,
+                    "tourney_metrics": result.winner.metrics,
+                }
+        yield (
+            f"**Champion: `{result.winner.method}`** "
+            f"(score: {result.winner.score:.4f})\n"
+            f"Push it to HuggingFace Hub from the **Push to Hub** tab.",
+            bracket_md,
+            "\n".join(log_lines),
+        )
+    else:
+        yield (
+            "**Tournament complete** — no winner determined.",
+            "",
+            "\n".join(log_lines),
+        )
 # ---------------------------------------------------------------------------
 # Export Research Artifacts
 # ---------------------------------------------------------------------------
                         lines=5,
                     )
+            gr.Markdown(
+                "*After obliterating, push your model to HuggingFace Hub from the **Push to Hub** tab.*",
+                elem_classes=["hub-hint"],
+            )
             # ── Advanced Settings (auto-populated from method preset) ────
             _defaults = _get_preset_defaults("advanced (recommended)")
         with gr.Tab("Tourney", id="tourney"):
             gr.Markdown("""### March Madness Tournament
 Pit **all abliteration methods** against each other in elimination rounds.
+The winner is saved locally — push it to HuggingFace Hub from the **Push to Hub** tab.
 **Round 1 — Qualifiers:** All methods, reduced prompts. Bottom half eliminated.
 **Round 2 — Semifinals:** Survivors, full prompts. Bottom half eliminated.
 **Round 3 — Finals:** Top contenders, maximum prompts. Champion crowned.
 """)
+            tourney_model_dd = gr.Dropdown(
+                choices=list(MODELS.keys()),
+                value="Alibaba (Qwen) / Qwen3-4B",
+                label="Target Model",
+                info="Select a model to tournament-abliterate",
+                allow_custom_value=True,
+            )
             with gr.Accordion("Advanced Settings", open=False):
                 with gr.Row():
                 interactive=False,
             )
+            tourney_btn.click(
+                fn=run_tourney,
+                inputs=[tourney_model_dd,
                         tourney_dataset_dd, tourney_quant_dd],
                 outputs=[tourney_status, tourney_bracket, tourney_log],
             )
         # ── Tab 7: Export ────────────────────────────────────────────────���
         with gr.Tab("Export", id="export"):
                 outputs=[export_file, export_status],
             )
+        # ── Tab: Push to Hub ──────────────────────────────────────────────
+        with gr.Tab("Push to Hub", id="push_hub"):
+            gr.Markdown("""### Push to HuggingFace Hub
+Select any session model from your Obliterate, Benchmark, or Tourney runs,
+optionally apply a quick refinement pass, then push to HuggingFace Hub
+with the **-OBLITERATED** tag.
+""")
+            with gr.Row():
+                with gr.Column(scale=2):
+                    push_session_dd = gr.Dropdown(
+                        choices=_get_session_model_choices(),
+                        label="Session Model",
+                        info="Pick a model from any tab's output",
+                    )
+                    push_refresh_btn = gr.Button("Refresh List", variant="secondary", size="sm")
+                    push_model_info = gr.Markdown("")
+                with gr.Column(scale=1):
+                    push_repo_id = gr.Textbox(
+                        label="Hub Repo ID",
+                        placeholder="auto-filled, or type your own",
+                        info="e.g. my-org/my-model-OBLITERATED",
+                    )
+                    push_token = gr.Textbox(
+                        label="HF Token (optional)",
+                        placeholder="hf_...",
+                        type="password",
+                        info="Leave blank to use HF_TOKEN env var or community token",
+                    )
+                    push_repo_warning = gr.Markdown("")
+            with gr.Accordion("Quick Refiner (optional)", open=False):
+                gr.Markdown(
+                    "*Optionally apply extra refinement passes to your model before pushing. "
+                    "This re-runs the abliteration pipeline with adjusted regularization.*"
+                )
+                with gr.Row():
+                    push_refine_reg = gr.Slider(
+                        0.0, 1.0, value=0.1, step=0.05,
+                        label="Regularization",
+                        info="Weight preservation (0 = full removal, 1 = no change)",
+                    )
+                    push_refine_passes = gr.Slider(
+                        0, 3, value=0, step=1,
+                        label="Extra Refinement Passes",
+                        info="0 = skip refinement, 1-3 = apply additional passes",
+                    )
+                push_refine_enabled = gr.Checkbox(
+                    label="Apply refinement before pushing",
+                    value=False,
+                )
+            push_btn = gr.Button(
+                "Push to Hub",
+                variant="primary",
+                size="lg",
+            )
+            push_status = gr.Markdown("")
+            push_link = gr.Markdown("")
+            # -- Event wiring (inline since components are scoped to this tab) --
+            push_refresh_btn.click(
+                fn=lambda: gr.update(choices=_get_session_model_choices()),
+                outputs=[push_session_dd],
+            )
+            push_session_dd.change(
+                fn=lambda label: (_get_hub_session_info(label), _auto_hub_repo_id(label)),
+                inputs=[push_session_dd],
+                outputs=[push_model_info, push_repo_id],
+            )
+            push_repo_id.change(
+                fn=_validate_hub_repo,
+                inputs=[push_repo_id],
+                outputs=[push_repo_warning],
+            )
+            push_btn.click(
+                fn=push_session_to_hub,
+                inputs=[push_session_dd, push_repo_id, push_token,
+                        push_refine_enabled, push_refine_reg, push_refine_passes],
+                outputs=[push_status, push_link],
+            )
+        # ── Tab: Leaderboard ────────────────────────────────────────────
         with gr.Tab("Leaderboard", id="leaderboard"):
             gr.Markdown("""### Community Leaderboard
 All benchmark results from **every OBLITERATUS Space** (including duplicated copies) are
         outputs=[prompt_vol_dd, dataset_info_md],
     )
     # Wire benchmark → Chat/A/B cross-tab dropdown updates
     bench_btn.click(
     # may not fire after generator teardown.
     obliterate_btn.click(
         fn=obliterate,
+        inputs=[model_dd, method_dd, prompt_vol_dd, dataset_dd,
                 custom_harmful_tb, custom_harmless_tb] + _adv_controls,
         outputs=[status_md, log_box, chat_status, session_model_dd, metrics_md, ab_session_model_dd],
     ).then(

obliteratus/.DS_Store CHANGED Viewed

Binary files a/obliteratus/.DS_Store and b/obliteratus/.DS_Store differ

obliteratus/abliterate.py CHANGED Viewed

@@ -33,11 +33,12 @@ from typing import Any, Callable
 import torch
 import torch.nn as nn
 # Reduce CUDA memory fragmentation for large models.  Must be set before any
 # CUDA allocations, so we do it at import time.  This is the PyTorch-recommended
 # fix for "reserved but unallocated" memory issues.
-if "PYTORCH_CUDA_ALLOC_CONF" not in os.environ and torch.cuda.is_available():
-    os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
 from obliteratus.models.loader import ModelHandle, load_model  # noqa: E402
 from obliteratus.strategies.utils import (  # noqa: E402
@@ -788,16 +789,8 @@ class AbliterationPipeline:
     @staticmethod
     def _free_gpu_memory():
-        """Release unused GPU memory between pipeline stages."""
-        import gc
-        gc.collect()
-        if torch.cuda.is_available():
-            try:
-                torch.cuda.empty_cache()
-            except Exception:
-                # CUDA may be in an error state after illegal memory access;
-                # swallow so we don't cascade into every subsequent stage.
-                pass
     @staticmethod
     def _get_model_device(model: nn.Module) -> torch.device:
@@ -1404,12 +1397,8 @@ class AbliterationPipeline:
             max_length = self.max_seq_length
         else:
             max_length = 384 if collect_multi_pos else 256
-        free_gb = 0.0
-        if torch.cuda.is_available():
-            free_gb = sum(
-                torch.cuda.mem_get_info(i)[0] / (1024 ** 3)
-                for i in range(torch.cuda.device_count())
-            )
             if self.max_seq_length is None and free_gb < 2.0:
                 max_length = 64
                 self.log(f"  Low GPU memory ({free_gb:.1f} GB free), using max_length={max_length}")
@@ -1999,22 +1988,22 @@ class AbliterationPipeline:
                 # Memory-aware cap: SAE encoder+decoder use
                 # 2 * hidden * (expansion * hidden) * 4 bytes
                 sae_mem_mb = 2 * hidden_dim * (sae_expansion * hidden_dim) * 4 / 1e6
-                if torch.cuda.is_available():
                     try:
-                        free_mb = torch.cuda.mem_get_info()[0] / 1e6
                         # Leave 512 MB headroom for other ops
                         while sae_mem_mb > (free_mb - 512) and sae_expansion > 1:
                             sae_expansion //= 2
                             sae_mem_mb = 2 * hidden_dim * (sae_expansion * hidden_dim) * 4 / 1e6
                     except Exception:
                         pass  # Fallback to hidden_dim-based heuristic
-                # Use GPU when enough headroom exists (SAE is small relative to model)
                 sae_device = "cpu"
-                if torch.cuda.is_available():
                     try:
-                        sae_free_mb = torch.cuda.mem_get_info()[0] / 1e6
                         if sae_free_mb > sae_mem_mb + 1024:
-                            sae_device = "cuda"
                     except Exception:
                         pass
                 sae = train_sae(
@@ -4155,7 +4144,8 @@ class AbliterationPipeline:
                 continue
             original_norm = saved_norms[param_name]
             if original_norm > 0:
-                data = param.data.float() if not param.data.is_floating_point() else param.data
                 new_norm = data.norm().item()
                 if math.isnan(new_norm) or math.isinf(new_norm) or new_norm == 0:
                     continue  # Skip — weight is degenerate after projection
@@ -4165,7 +4155,12 @@ class AbliterationPipeline:
                     # layers.  Uncapped amplification destroys coherence.
                     if ratio > _MAX_NORM_RATIO:
                         ratio = _MAX_NORM_RATIO
-                    param.data.mul_(ratio)
     @staticmethod
     def _project_out_advanced(
@@ -5371,16 +5366,19 @@ class AbliterationPipeline:
                         unique_ratio = len(set(words)) / len(words)
                         if unique_ratio > 0.2:
                             coherent_count += 1
-            except torch.cuda.OutOfMemoryError:
-                self._free_gpu_memory()
-                self.log("  Skipping generation tests (CUDA out of memory — model too large for KV cache)")
-                generation_failed = True
             except (RuntimeError, Exception) as e:
-                err_msg = str(e)
-                if "CUDA" in err_msg or "illegal" in err_msg.lower():
                     self._free_gpu_memory()
-                    self.log(f"  Skipping generation tests (CUDA error: {err_msg[:120]})")
                     generation_failed = True
                 else:
                     raise
@@ -5523,18 +5521,21 @@ class AbliterationPipeline:
                     del inputs, outputs
                     self._free_gpu_memory()
-                except torch.cuda.OutOfMemoryError:
-                    self._free_gpu_memory()
-                    self.log(f"    [batch {batch_start+1}-{batch_end}] CUDA OOM — stopping")
-                    self.log("  Skipping remaining refusal tests (CUDA out of memory)")
-                    oom_break = True
                 except (RuntimeError, Exception) as e:
-                    err_msg = str(e)
-                    if "CUDA" in err_msg or "illegal" in err_msg.lower():
                         self._free_gpu_memory()
-                        self.log(f"    [batch {batch_start+1}-{batch_end}] CUDA error — stopping")
-                        self.log(f"  Skipping remaining refusal tests (CUDA error: {err_msg[:120]})")
                         oom_break = True
                     else:
                         raise

 import torch
 import torch.nn as nn
+from obliteratus import device as dev  # noqa: E402 — must import before CUDA setup
 # Reduce CUDA memory fragmentation for large models.  Must be set before any
 # CUDA allocations, so we do it at import time.  This is the PyTorch-recommended
 # fix for "reserved but unallocated" memory issues.
+dev.configure_cuda_alloc()
 from obliteratus.models.loader import ModelHandle, load_model  # noqa: E402
 from obliteratus.strategies.utils import (  # noqa: E402
     @staticmethod
     def _free_gpu_memory():
+        """Release unused GPU/accelerator memory between pipeline stages."""
+        dev.free_gpu_memory()
     @staticmethod
     def _get_model_device(model: nn.Module) -> torch.device:
             max_length = self.max_seq_length
         else:
             max_length = 384 if collect_multi_pos else 256
+        free_gb = dev.get_total_free_gb()
+        if dev.is_gpu_available():
             if self.max_seq_length is None and free_gb < 2.0:
                 max_length = 64
                 self.log(f"  Low GPU memory ({free_gb:.1f} GB free), using max_length={max_length}")
                 # Memory-aware cap: SAE encoder+decoder use
                 # 2 * hidden * (expansion * hidden) * 4 bytes
                 sae_mem_mb = 2 * hidden_dim * (sae_expansion * hidden_dim) * 4 / 1e6
+                if dev.is_gpu_available():
                     try:
+                        free_mb = dev.get_total_free_gb() * 1024
                         # Leave 512 MB headroom for other ops
                         while sae_mem_mb > (free_mb - 512) and sae_expansion > 1:
                             sae_expansion //= 2
                             sae_mem_mb = 2 * hidden_dim * (sae_expansion * hidden_dim) * 4 / 1e6
                     except Exception:
                         pass  # Fallback to hidden_dim-based heuristic
+                # Use GPU/MPS when enough headroom exists (SAE is small relative to model)
                 sae_device = "cpu"
+                if dev.is_gpu_available():
                     try:
+                        sae_free_mb = dev.get_total_free_gb() * 1024
                         if sae_free_mb > sae_mem_mb + 1024:
+                            sae_device = dev.get_device()
                     except Exception:
                         pass
                 sae = train_sae(
                 continue
             original_norm = saved_norms[param_name]
             if original_norm > 0:
+                needs_cast = not param.data.is_floating_point()
+                data = param.data.float() if needs_cast else param.data
                 new_norm = data.norm().item()
                 if math.isnan(new_norm) or math.isinf(new_norm) or new_norm == 0:
                     continue  # Skip — weight is degenerate after projection
                     # layers.  Uncapped amplification destroys coherence.
                     if ratio > _MAX_NORM_RATIO:
                         ratio = _MAX_NORM_RATIO
+                    if needs_cast:
+                        # Non-float dtypes (e.g. uint8) can't mul_ by a float
+                        # scalar in-place — rescale in float then cast back.
+                        param.data.copy_(data.mul_(ratio).to(param.data.dtype))
+                    else:
+                        param.data.mul_(ratio)
     @staticmethod
     def _project_out_advanced(
                         unique_ratio = len(set(words)) / len(words)
                         if unique_ratio > 0.2:
                             coherent_count += 1
             except (RuntimeError, Exception) as e:
+                if dev.is_oom_error(e):
                     self._free_gpu_memory()
+                    self.log("  Skipping generation tests (out of memory — model too large for KV cache)")
                     generation_failed = True
+                elif isinstance(e, RuntimeError):
+                    err_msg = str(e)
+                    if "CUDA" in err_msg or "MPS" in err_msg or "illegal" in err_msg.lower():
+                        self._free_gpu_memory()
+                        self.log(f"  Skipping generation tests (device error: {err_msg[:120]})")
+                        generation_failed = True
+                    else:
+                        raise
                 else:
                     raise
                     del inputs, outputs
                     self._free_gpu_memory()
                 except (RuntimeError, Exception) as e:
+                    if dev.is_oom_error(e):
                         self._free_gpu_memory()
+                        self.log(f"    [batch {batch_start+1}-{batch_end}] OOM — stopping")
+                        self.log("  Skipping remaining refusal tests (out of memory)")
                         oom_break = True
+                    elif isinstance(e, RuntimeError):
+                        err_msg = str(e)
+                        if "CUDA" in err_msg or "MPS" in err_msg or "illegal" in err_msg.lower():
+                            self._free_gpu_memory()
+                            self.log(f"    [batch {batch_start+1}-{batch_end}] device error — stopping")
+                            self.log(f"  Skipping remaining refusal tests (device error: {err_msg[:120]})")
+                            oom_break = True
+                        else:
+                            raise
                     else:
                         raise

obliteratus/analysis/sae_abliteration.py CHANGED Viewed

@@ -39,6 +39,7 @@ from dataclasses import dataclass
 import torch
 import torch.nn as nn
 @dataclass
@@ -120,11 +121,11 @@ def _auto_detect_device(device: str | None = None) -> str:
     """
     if device is not None and device not in ("auto",):
         return device
-    if torch.cuda.is_available():
         try:
-            free_mb = torch.cuda.mem_get_info()[0] / 1e6
             if free_mb > 512:
-                return "cuda"
         except Exception:
             pass
     return "cpu"

 import torch
 import torch.nn as nn
+from obliteratus import device as dev
 @dataclass
     """
     if device is not None and device not in ("auto",):
         return device
+    if dev.is_gpu_available():
         try:
+            free_mb = dev.get_total_free_gb() * 1024
             if free_mb > 512:
+                return dev.get_device()
         except Exception:
             pass
     return "cpu"

obliteratus/device.py ADDED Viewed

	@@ -0,0 +1,305 @@

+"""Unified device abstraction for CUDA, MPS (Apple Silicon), and CPU.
+All device-specific queries (availability, memory, cache management) go through
+this module so the rest of the codebase never calls ``torch.cuda.*`` directly.
+"""
+from __future__ import annotations
+import gc
+import logging
+import os
+import platform
+from dataclasses import dataclass
+import torch
+logger = logging.getLogger(__name__)
+# ---------------------------------------------------------------------------
+# Device detection
+# ---------------------------------------------------------------------------
+def is_cuda() -> bool:
+    """True when at least one NVIDIA CUDA GPU is visible."""
+    return torch.cuda.is_available()
+def is_mps() -> bool:
+    """True when Apple Metal Performance Shaders backend is usable."""
+    return (
+        hasattr(torch.backends, "mps")
+        and torch.backends.mps.is_available()
+        and torch.backends.mps.is_built()
+    )
+def is_gpu_available() -> bool:
+    """True if *any* GPU backend (CUDA or MPS) is available."""
+    return is_cuda() or is_mps()
+def get_device(preference: str = "auto") -> str:
+    """Resolve a device string.
+    Parameters
+    ----------
+    preference : str
+        ``"auto"`` picks the best GPU, ``"cuda"``/``"mps"``/``"cpu"`` forces.
+    Returns
+    -------
+    str
+        A PyTorch device string (``"cuda"``, ``"mps"``, or ``"cpu"``).
+    """
+    if preference == "auto":
+        if is_cuda():
+            return "cuda"
+        if is_mps():
+            return "mps"
+        return "cpu"
+    return preference
+def get_device_name() -> str:
+    """Human-readable name of the current accelerator."""
+    if is_cuda():
+        return torch.cuda.get_device_name(0)
+    if is_mps():
+        # Apple doesn't expose a per-chip name via MPS; use platform info.
+        chip = platform.processor() or "Apple Silicon"
+        return f"Apple {chip} (MPS)"
+    return "CPU"
+def device_count() -> int:
+    """Number of accelerator devices (GPUs or MPS slots)."""
+    if is_cuda():
+        return torch.cuda.device_count()
+    if is_mps():
+        return 1  # MPS always exposes a single unified device
+    return 0
+# ---------------------------------------------------------------------------
+# Memory information
+# ---------------------------------------------------------------------------
+@dataclass
+class MemoryInfo:
+    """Snapshot of accelerator memory (in GB)."""
+    used_gb: float = 0.0
+    reserved_gb: float = 0.0
+    total_gb: float = 0.0
+    free_gb: float = 0.0
+    device_name: str = "CPU"
+def _system_memory_gb() -> tuple[float, float]:
+    """Return (total_gb, available_gb) of system RAM."""
+    try:
+        import psutil
+        vm = psutil.virtual_memory()
+        return vm.total / 1024 ** 3, vm.available / 1024 ** 3
+    except ImportError:
+        pass
+    try:
+        total = os.sysconf("SC_PHYS_PAGES") * os.sysconf("SC_PAGE_SIZE") / 1024 ** 3
+        # Rough estimate: assume 60 % available if we can't query
+        return total, total * 0.6
+    except (AttributeError, ValueError):
+        return 16.0, 8.0  # conservative fallback
+def get_memory_info(device_index: int = 0) -> MemoryInfo:
+    """Query memory for the given accelerator (or system RAM for MPS/CPU)."""
+    name = get_device_name()
+    if is_cuda():
+        try:
+            free, total = torch.cuda.mem_get_info(device_index)
+            used = torch.cuda.memory_allocated(device_index)
+            reserved = torch.cuda.memory_reserved(device_index)
+            total_gb = total / 1024 ** 3
+            return MemoryInfo(
+                used_gb=used / 1024 ** 3,
+                reserved_gb=reserved / 1024 ** 3,
+                total_gb=total_gb,
+                free_gb=free / 1024 ** 3,
+                device_name=name,
+            )
+        except Exception:
+            props = torch.cuda.get_device_properties(device_index)
+            total_gb = props.total_memory / 1024 ** 3
+            return MemoryInfo(total_gb=total_gb, free_gb=total_gb, device_name=name)
+    if is_mps():
+        # MPS uses unified memory — report system RAM as a proxy.
+        total, avail = _system_memory_gb()
+        # Apple's unified memory is shared with the OS, so usable fraction
+        # is typically ~65-75 % of total.
+        usable = total * 0.70
+        return MemoryInfo(
+            used_gb=max(usable - avail, 0.0),
+            reserved_gb=0.0,
+            total_gb=usable,
+            free_gb=min(avail, usable),
+            device_name=name,
+        )
+    # CPU-only
+    total, avail = _system_memory_gb()
+    return MemoryInfo(total_gb=total, free_gb=avail, device_name=name)
+def get_total_free_gb() -> float:
+    """Sum of free memory across all accelerator devices, in GB."""
+    if is_cuda():
+        total_free = 0.0
+        for i in range(torch.cuda.device_count()):
+            try:
+                free, _ = torch.cuda.mem_get_info(i)
+                total_free += free / 1024 ** 3
+            except Exception:
+                props = torch.cuda.get_device_properties(i)
+                total_free += props.total_memory / 1024 ** 3
+        return total_free
+    if is_mps():
+        _, avail = _system_memory_gb()
+        return avail * 0.70  # usable fraction
+    return 0.0
+# ---------------------------------------------------------------------------
+# Cache / memory management
+# ---------------------------------------------------------------------------
+def empty_cache() -> None:
+    """Release cached allocations on the current accelerator."""
+    if is_cuda():
+        torch.cuda.empty_cache()
+    elif is_mps():
+        # torch.mps.empty_cache() available since PyTorch 2.1
+        if hasattr(torch.mps, "empty_cache"):
+            torch.mps.empty_cache()
+def free_gpu_memory() -> None:
+    """Aggressive memory cleanup: GC + accelerator cache flush."""
+    gc.collect()
+    if is_cuda():
+        try:
+            torch.cuda.empty_cache()
+        except Exception:
+            try:
+                torch.cuda.synchronize()
+            except Exception:
+                pass
+            try:
+                torch.cuda.reset_peak_memory_stats()
+            except Exception:
+                pass
+    elif is_mps():
+        if hasattr(torch.mps, "empty_cache"):
+            try:
+                torch.mps.empty_cache()
+            except Exception:
+                pass
+        if hasattr(torch.mps, "synchronize"):
+            try:
+                torch.mps.synchronize()
+            except Exception:
+                pass
+def set_seed_all(seed: int) -> None:
+    """Set random seed on all available accelerators."""
+    torch.manual_seed(seed)
+    if is_cuda():
+        torch.cuda.manual_seed_all(seed)
+    # MPS shares the CPU random state — no separate seed call needed.
+# ---------------------------------------------------------------------------
+# Dtype helpers
+# ---------------------------------------------------------------------------
+def default_dtype(device: str | None = None) -> torch.dtype:
+    """Sensible default dtype for the given device."""
+    dev = device or get_device()
+    if dev == "cpu":
+        return torch.float32
+    return torch.float16
+def supports_bfloat16(device: str | None = None) -> bool:
+    """Whether *bfloat16* is natively supported on the target device."""
+    dev = device or get_device()
+    if dev.startswith("cuda"):
+        if is_cuda():
+            major, _ = torch.cuda.get_device_capability(0)
+            return major >= 8  # Ampere+
+        return False
+    if dev == "mps":
+        # MPS added bfloat16 support in PyTorch 2.3+
+        return hasattr(torch, "__version__") and tuple(
+            int(x) for x in torch.__version__.split(".")[:2]
+        ) >= (2, 3)
+    return True  # CPU supports bfloat16 on most modern platforms
+def supports_float64(device: str | None = None) -> bool:
+    """Whether *float64* is supported (MPS does NOT support it)."""
+    dev = device or get_device()
+    return dev != "mps"
+def safe_svd_dtype(tensor: torch.Tensor) -> torch.dtype:
+    """Return a dtype safe for SVD on the tensor's device.
+    MPS does not support float64, so we cap at float32.
+    """
+    if tensor.device.type == "mps":
+        return torch.float32
+    return torch.float64 if tensor.dtype in (torch.float64, torch.float32) else torch.float32
+# ---------------------------------------------------------------------------
+# OOM exception matching
+# ---------------------------------------------------------------------------
+def is_oom_error(exc: BaseException) -> bool:
+    """Return True if *exc* is an out-of-memory error on any backend."""
+    if isinstance(exc, torch.cuda.OutOfMemoryError):
+        return True
+    # MPS raises a generic RuntimeError containing "out of memory"
+    if isinstance(exc, RuntimeError) and "out of memory" in str(exc).lower():
+        return True
+    return False
+# ---------------------------------------------------------------------------
+# Quantization compatibility
+# ---------------------------------------------------------------------------
+def supports_bitsandbytes(device: str | None = None) -> bool:
+    """BitsAndBytes requires NVIDIA CUDA — check that."""
+    dev = device or get_device()
+    return dev.startswith("cuda")
+def supports_device_map_auto(device: str | None = None) -> bool:
+    """Accelerate's device_map='auto' is only reliable on CUDA."""
+    dev = device or get_device()
+    return dev.startswith("cuda")
+# ---------------------------------------------------------------------------
+# CUDA env setup (called once at import time of abliterate.py)
+# ---------------------------------------------------------------------------
+def configure_cuda_alloc() -> None:
+    """Set expandable_segments for CUDA if available."""
+    if is_cuda() and "PYTORCH_CUDA_ALLOC_CONF" not in os.environ:
+        os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

obliteratus/evaluation/benchmarks.py CHANGED Viewed

@@ -26,6 +26,7 @@ import re
 from dataclasses import dataclass, field
 import torch
 @dataclass
@@ -261,8 +262,7 @@ class BenchmarkRunner:
                          ("math_reasoning", self.run_math_reasoning_probe)]:
             results[name] = fn()
             # Free KV caches between probes to prevent OOM on tight GPUs
-            if torch.cuda.is_available():
-                torch.cuda.empty_cache()
         return results
     def _answer_mcq(self, question: str, choices: list[str]) -> int:

 from dataclasses import dataclass, field
 import torch
+from obliteratus import device as dev
 @dataclass
                          ("math_reasoning", self.run_math_reasoning_probe)]:
             results[name] = fn()
             # Free KV caches between probes to prevent OOM on tight GPUs
+            dev.empty_cache()
         return results
     def _answer_mcq(self, question: str, choices: list[str]) -> int:

obliteratus/evaluation/heretic_eval.py CHANGED Viewed

@@ -32,6 +32,7 @@ from typing import TYPE_CHECKING
 import torch
 import torch.nn.functional as F
 if TYPE_CHECKING:
     from collections.abc import Callable
@@ -363,8 +364,7 @@ def unload_harmbench_classifier() -> None:
         model, tokenizer = _HARMBENCH_CLASSIFIER
         del model, tokenizer
         _HARMBENCH_CLASSIFIER = None
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
         logger.info("HarmBench classifier unloaded")
@@ -432,8 +432,7 @@ def harmbench_asr(
         # Free memory between batches
         del inputs, outputs
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
     n_successful = sum(per_item)
     return {
@@ -536,8 +535,7 @@ def first_token_kl_on_prompts(
             kl_values.extend(kl.cpu().tolist())
         del inputs_orig, inputs_mod, logits_orig, logits_mod, first_orig, first_mod
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
     mean_kl = statistics.mean(kl_values) if kl_values else 0.0
     std_kl = statistics.stdev(kl_values) if len(kl_values) > 1 else 0.0
@@ -1098,8 +1096,8 @@ def run_full_heretic_eval(
             completions.append("")
         del inputs
-        if i % 25 == 0 and torch.cuda.is_available():
-            torch.cuda.empty_cache()
     log(f"Generated {len(completions)} completions")

 import torch
 import torch.nn.functional as F
+from obliteratus import device as dev
 if TYPE_CHECKING:
     from collections.abc import Callable
         model, tokenizer = _HARMBENCH_CLASSIFIER
         del model, tokenizer
         _HARMBENCH_CLASSIFIER = None
+        dev.empty_cache()
         logger.info("HarmBench classifier unloaded")
         # Free memory between batches
         del inputs, outputs
+        dev.empty_cache()
     n_successful = sum(per_item)
     return {
             kl_values.extend(kl.cpu().tolist())
         del inputs_orig, inputs_mod, logits_orig, logits_mod, first_orig, first_mod
+        dev.empty_cache()
     mean_kl = statistics.mean(kl_values) if kl_values else 0.0
     std_kl = statistics.stdev(kl_values) if len(kl_values) > 1 else 0.0
             completions.append("")
         del inputs
+        if i % 25 == 0:
+            dev.empty_cache()
     log(f"Generated {len(completions)} completions")

obliteratus/interactive.py CHANGED Viewed

@@ -21,9 +21,10 @@ console = Console()
 def _detect_compute_tier() -> str:
     """Auto-detect the best compute tier based on available hardware."""
     try:
-        import torch
-        if torch.cuda.is_available():
             vram_gb = torch.cuda.get_device_properties(0).total_memory / (1024**3)
             if vram_gb >= 20:
                 return "large"
@@ -31,8 +32,13 @@ def _detect_compute_tier() -> str:
                 return "medium"
             else:
                 return "small"
-        elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
-            return "small"  # Apple Silicon — conservative estimate
     except ImportError:
         pass
     return "tiny"  # CPU only
@@ -237,12 +243,11 @@ def run_interactive():
     dtype = model_preset.recommended_dtype
     quantization = None
     try:
-        import torch
-        if torch.cuda.is_available():
-            device = "auto"
-        elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
-            device = "mps"
     except ImportError:
         pass

 def _detect_compute_tier() -> str:
     """Auto-detect the best compute tier based on available hardware."""
     try:
+        from obliteratus import device as dev
+        if dev.is_cuda():
+            import torch
             vram_gb = torch.cuda.get_device_properties(0).total_memory / (1024**3)
             if vram_gb >= 20:
                 return "large"
                 return "medium"
             else:
                 return "small"
+        elif dev.is_mps():
+            # Apple Silicon with unified memory — estimate from system RAM
+            mem = dev.get_memory_info()
+            if mem.total_gb >= 24:
+                return "medium"  # M1 Pro/Max/Ultra, M2 Pro/Max/Ultra, M3 Pro/Max
+            else:
+                return "small"   # M1/M2/M3 base (8-16 GB)
     except ImportError:
         pass
     return "tiny"  # CPU only
     dtype = model_preset.recommended_dtype
     quantization = None
     try:
+        from obliteratus import device as _dev
+        resolved = _dev.get_device()
+        if resolved != "cpu":
+            device = resolved if resolved == "mps" else "auto"
     except ImportError:
         pass

obliteratus/mlx_backend.py ADDED Viewed

	@@ -0,0 +1,469 @@

+"""Optional MLX backend for Apple Silicon native inference and weight editing.
+MLX is Apple's array framework that runs natively on the Apple Neural Engine
+and Metal GPU.  When available, it provides significantly faster inference and
+weight manipulation than PyTorch's MPS backend on Apple hardware.
+This module is entirely optional — if ``mlx`` / ``mlx-lm`` are not installed,
+``MLX_AVAILABLE`` is ``False`` and all public functions raise ``RuntimeError``.
+Install with::
+    pip install mlx>=0.22 mlx-lm>=0.20
+"""
+from __future__ import annotations
+import logging
+from pathlib import Path
+from typing import Any, Callable
+logger = logging.getLogger(__name__)
+# ---------------------------------------------------------------------------
+# Availability check
+# ---------------------------------------------------------------------------
+MLX_AVAILABLE = False
+_mx = None  # mlx module
+_mlx_lm = None  # mlx-lm module
+_mlx_nn = None  # mlx.nn module
+try:
+    import mlx.core as _mx_core  # type: ignore[import-untyped]
+    import mlx.nn as _mlx_nn_mod  # type: ignore[import-untyped]
+    import mlx_lm  # type: ignore[import-untyped]
+    _mx = _mx_core
+    _mlx_nn = _mlx_nn_mod
+    _mlx_lm = mlx_lm
+    MLX_AVAILABLE = True
+    logger.info("MLX backend available (mlx %s)", _mx.__version__ if hasattr(_mx, "__version__") else "?")
+except ImportError:
+    pass
+def _require_mlx() -> None:
+    if not MLX_AVAILABLE:
+        raise RuntimeError(
+            "MLX backend is not available. Install with: pip install mlx>=0.22 mlx-lm>=0.20"
+        )
+# ---------------------------------------------------------------------------
+# Model loading
+# ---------------------------------------------------------------------------
+class MLXModelHandle:
+    """Lightweight wrapper around an MLX-loaded model + tokenizer."""
+    def __init__(self, model: Any, tokenizer: Any, model_name: str):
+        self.model = model
+        self.tokenizer = tokenizer
+        self.model_name = model_name
+    @property
+    def config(self) -> Any:
+        return getattr(self.model, "config", None)
+def load_model(
+    model_name: str,
+    dtype: str = "float16",
+) -> MLXModelHandle:
+    """Load a HuggingFace model via ``mlx-lm`` for Apple-native execution.
+    Parameters
+    ----------
+    model_name : str
+        HuggingFace model identifier (e.g. ``"meta-llama/Llama-3.2-3B-Instruct"``).
+    dtype : str
+        One of ``"float16"``, ``"bfloat16"``, ``"float32"``.
+    Returns
+    -------
+    MLXModelHandle
+        Wrapper with ``.model`` and ``.tokenizer`` attributes.
+    """
+    _require_mlx()
+    from mlx_lm import load  # type: ignore[import-untyped]
+    logger.info("Loading %s via MLX (dtype=%s)", model_name, dtype)
+    model, tokenizer = load(model_name)
+    return MLXModelHandle(model=model, tokenizer=tokenizer, model_name=model_name)
+# ---------------------------------------------------------------------------
+# Inference
+# ---------------------------------------------------------------------------
+def generate(
+    handle: MLXModelHandle,
+    prompt: str,
+    max_tokens: int = 256,
+    temperature: float = 0.7,
+    top_p: float = 0.9,
+    repetition_penalty: float | None = None,
+) -> str:
+    """Generate text using the MLX model.
+    Parameters
+    ----------
+    handle : MLXModelHandle
+        A loaded MLX model handle.
+    prompt : str
+        The input prompt string.
+    max_tokens : int
+        Maximum number of tokens to generate.
+    temperature : float
+        Sampling temperature.
+    top_p : float
+        Nucleus sampling threshold.
+    repetition_penalty : float or None
+        Repetition penalty (1.0 = no penalty).
+    Returns
+    -------
+    str
+        Generated text completion.
+    """
+    _require_mlx()
+    from mlx_lm import generate as mlx_generate  # type: ignore[import-untyped]
+    kwargs: dict[str, Any] = {
+        "max_tokens": max_tokens,
+        "temp": temperature,
+        "top_p": top_p,
+    }
+    if repetition_penalty is not None:
+        kwargs["repetition_penalty"] = repetition_penalty
+    return mlx_generate(
+        handle.model,
+        handle.tokenizer,
+        prompt=prompt,
+        **kwargs,
+    )
+# ---------------------------------------------------------------------------
+# Activation extraction
+# ---------------------------------------------------------------------------
+def get_activations(
+    handle: MLXModelHandle,
+    prompts: list[str],
+    layer_indices: list[int],
+    max_length: int = 256,
+) -> dict[int, list[Any]]:
+    """Extract hidden-state activations from specified layers.
+    Uses MLX's computation graph to capture intermediate outputs.
+    Parameters
+    ----------
+    handle : MLXModelHandle
+        Loaded model.
+    prompts : list[str]
+        Input prompts to probe.
+    layer_indices : list[int]
+        Which transformer layers to capture.
+    max_length : int
+        Maximum sequence length for tokenization.
+    Returns
+    -------
+    dict[int, list[mlx.core.array]]
+        Mapping from layer index to list of activation arrays (one per prompt).
+        Each array has shape ``(hidden_dim,)`` — the last-token hidden state.
+    """
+    _require_mlx()
+    import mlx.core as mx  # type: ignore[import-untyped]
+    model = handle.model
+    tokenizer = handle.tokenizer
+    # Identify the transformer block list
+    layers = None
+    for attr in ("model.layers", "transformer.h", "gpt_neox.layers"):
+        obj = model
+        try:
+            for part in attr.split("."):
+                obj = getattr(obj, part)
+            layers = obj
+            break
+        except AttributeError:
+            continue
+    if layers is None:
+        raise RuntimeError(
+            "Cannot locate transformer layers in the MLX model. "
+            "Supported architectures: LLaMA, GPT-2, GPT-NeoX."
+        )
+    activations: dict[int, list[Any]] = {idx: [] for idx in layer_indices}
+    target_set = set(layer_indices)
+    for prompt in prompts:
+        tokens = tokenizer.encode(prompt)
+        if len(tokens) > max_length:
+            tokens = tokens[:max_length]
+        input_ids = mx.array([tokens])
+        # Forward through embedding
+        if hasattr(model, "model"):
+            # LLaMA-style: model.model.embed_tokens
+            embed_module = model.model
+        elif hasattr(model, "transformer"):
+            embed_module = model.transformer
+        else:
+            embed_module = model
+        if hasattr(embed_module, "embed_tokens"):
+            h = embed_module.embed_tokens(input_ids)
+        elif hasattr(embed_module, "wte"):
+            h = embed_module.wte(input_ids)
+        else:
+            raise RuntimeError("Cannot find embedding layer in MLX model")
+        # Walk through layers, capturing activations at target indices
+        for i, layer in enumerate(layers):
+            h = layer(h)
+            # Some layers return tuples (hidden, attention) — take first
+            if isinstance(h, tuple):
+                h = h[0]
+            if i in target_set:
+                # Last token hidden state
+                last_hidden = h[0, -1, :]
+                mx.eval(last_hidden)  # force evaluation
+                activations[i].append(last_hidden)
+    return activations
+# ---------------------------------------------------------------------------
+# Weight manipulation
+# ---------------------------------------------------------------------------
+def get_weight(handle: MLXModelHandle, layer_idx: int, param_path: str) -> Any:
+    """Retrieve a weight matrix from the model.
+    Parameters
+    ----------
+    handle : MLXModelHandle
+        Loaded model.
+    layer_idx : int
+        Transformer layer index.
+    param_path : str
+        Dot-separated path within the layer, e.g. ``"self_attn.o_proj.weight"``.
+    Returns
+    -------
+    mlx.core.array
+        The weight tensor.
+    """
+    _require_mlx()
+    model = handle.model
+    # Navigate to the layer
+    layers = _get_layers(model)
+    layer = layers[layer_idx]
+    # Navigate the param path
+    obj = layer
+    for part in param_path.split("."):
+        obj = getattr(obj, part)
+    return obj
+def modify_weights(
+    handle: MLXModelHandle,
+    layer_idx: int,
+    param_path: str,
+    modifier_fn: Callable[[Any], Any],
+) -> None:
+    """Modify a weight matrix in-place using a function.
+    Parameters
+    ----------
+    handle : MLXModelHandle
+        Loaded model.
+    layer_idx : int
+        Transformer layer index.
+    param_path : str
+        Dot-separated path within the layer to the weight, e.g.
+        ``"self_attn.o_proj.weight"``.
+    modifier_fn : callable
+        Function that takes the current weight (mlx array) and returns the
+        modified weight (mlx array).  For abliteration, this would project
+        out the refusal direction.
+    """
+    _require_mlx()
+    import mlx.core as mx  # type: ignore[import-untyped]
+    model = handle.model
+    layers = _get_layers(model)
+    layer = layers[layer_idx]
+    # Navigate to the parent module and leaf attribute
+    parts = param_path.split(".")
+    parent = layer
+    for part in parts[:-1]:
+        parent = getattr(parent, part)
+    leaf_name = parts[-1]
+    old_weight = getattr(parent, leaf_name)
+    new_weight = modifier_fn(old_weight)
+    # MLX uses a functional update pattern
+    if hasattr(parent, "update"):
+        parent.update({leaf_name: new_weight})
+    else:
+        setattr(parent, leaf_name, new_weight)
+    mx.eval(new_weight)  # materialize
+def project_out_direction(weight: Any, direction: Any) -> Any:
+    """Project a direction out of a weight matrix (abliteration).
+    Given weight matrix W and unit direction d, computes::
+        W' = W - (W @ d) outer d
+    Parameters
+    ----------
+    weight : mlx.core.array
+        Weight matrix, shape ``(out_features, in_features)``.
+    direction : mlx.core.array
+        Unit direction vector, shape ``(in_features,)``.
+    Returns
+    -------
+    mlx.core.array
+        Modified weight with direction projected out.
+    """
+    _require_mlx()
+    import mlx.core as mx  # type: ignore[import-untyped]
+    d = direction.astype(weight.dtype)
+    # W @ d gives the component along d for each row
+    proj = mx.matmul(weight, d[:, None])  # (out, 1)
+    return weight - mx.matmul(proj, d[None, :])  # (out, in)
+# ---------------------------------------------------------------------------
+# Save model
+# ---------------------------------------------------------------------------
+def save_model(
+    handle: MLXModelHandle,
+    output_dir: str | Path,
+    upload_repo: str | None = None,
+) -> Path:
+    """Save the (modified) MLX model to disk.
+    Saves in safetensors format compatible with both MLX and HuggingFace.
+    Parameters
+    ----------
+    handle : MLXModelHandle
+        Model handle (possibly with modified weights).
+    output_dir : str or Path
+        Directory to save into.
+    upload_repo : str or None
+        If set, also uploads to HuggingFace Hub.
+    Returns
+    -------
+    Path
+        The output directory.
+    """
+    _require_mlx()
+    from mlx_lm import convert  # type: ignore[import-untyped]
+    out = Path(output_dir)
+    out.mkdir(parents=True, exist_ok=True)
+    # mlx-lm's save uses safetensors
+    if hasattr(_mlx_lm, "save_model"):
+        _mlx_lm.save_model(str(out), handle.model, handle.tokenizer)
+    else:
+        # Fallback: manual save via mlx.core.save_safetensors
+        import mlx.core as mx  # type: ignore[import-untyped]
+        weights = dict(handle.model.parameters())
+        flat = {}
+        _flatten_dict(weights, "", flat)
+        mx.save_safetensors(str(out / "model.safetensors"), flat)
+        # Save tokenizer via transformers
+        handle.tokenizer.save_pretrained(str(out))
+    logger.info("MLX model saved to %s", out)
+    if upload_repo:
+        try:
+            from mlx_lm import upload_to_hub  # type: ignore[import-untyped]
+            upload_to_hub(str(out), upload_repo)
+            logger.info("Uploaded to %s", upload_repo)
+        except (ImportError, AttributeError):
+            logger.warning("mlx-lm upload not available — push manually with huggingface-cli")
+    return out
+# ---------------------------------------------------------------------------
+# Conversion: PyTorch ↔ MLX
+# ---------------------------------------------------------------------------
+def torch_tensor_to_mlx(tensor: "torch.Tensor") -> Any:  # noqa: F821
+    """Convert a PyTorch tensor to an MLX array."""
+    _require_mlx()
+    import mlx.core as mx  # type: ignore[import-untyped]
+    import numpy as np
+    # Move to CPU and convert via numpy
+    np_array = tensor.detach().cpu().float().numpy()
+    return mx.array(np_array)
+def mlx_to_torch_tensor(array: Any, device: str = "cpu") -> "torch.Tensor":  # noqa: F821
+    """Convert an MLX array to a PyTorch tensor."""
+    import numpy as np
+    import torch
+    np_array = np.array(array)
+    return torch.from_numpy(np_array).to(device)
+# ---------------------------------------------------------------------------
+# Internal helpers
+# ---------------------------------------------------------------------------
+def _get_layers(model: Any) -> Any:
+    """Locate the transformer block list in an MLX model."""
+    for attr_path in ("model.layers", "transformer.h", "gpt_neox.layers"):
+        obj = model
+        try:
+            for part in attr_path.split("."):
+                obj = getattr(obj, part)
+            return obj
+        except AttributeError:
+            continue
+    raise RuntimeError("Cannot locate transformer layers in MLX model")
+def _flatten_dict(d: dict, prefix: str, out: dict) -> None:
+    """Flatten a nested dict with dot-separated keys."""
+    for k, v in d.items():
+        key = f"{prefix}{k}" if prefix else k
+        if isinstance(v, dict):
+            _flatten_dict(v, f"{key}.", out)
+        else:
+            out[key] = v

obliteratus/models/loader.py CHANGED Viewed

@@ -12,6 +12,7 @@ from typing import Optional
 import sys as _sys
 import torch
 from transformers import (
     AutoConfig,
     AutoModelForCausalLM,
@@ -381,24 +382,8 @@ def _estimate_model_memory_gb(config: AutoConfig, dtype: torch.dtype) -> float:
 def _available_gpu_memory_gb() -> float:
-    """Return free GPU memory across all CUDA devices, in GB.
-    Uses torch.cuda.mem_get_info which reports actual free memory,
-    not total capacity. Falls back to total_memory if mem_get_info
-    is unavailable (PyTorch < 1.10).
-    """
-    if not torch.cuda.is_available():
-        return 0.0
-    total_free = 0.0
-    for i in range(torch.cuda.device_count()):
-        try:
-            free, _ = torch.cuda.mem_get_info(i)
-            total_free += free / (1024 ** 3)
-        except AttributeError:
-            # Fallback for old PyTorch without mem_get_info
-            props = torch.cuda.get_device_properties(i)
-            total_free += props.total_memory / (1024 ** 3)
-    return total_free
 def _hf_token() -> str | None:
@@ -515,34 +500,54 @@ def load_model(
         load_kwargs.pop("torch_dtype", None)
         load_kwargs["device_map"] = "auto"
     elif quantization in ("4bit", "8bit"):
-        try:
-            import bitsandbytes  # noqa: F401
-        except ImportError:
-            raise RuntimeError(
-                f"Quantization '{quantization}' requires bitsandbytes: "
-                f"pip install -U bitsandbytes>=0.46.1"
-            )
-        from transformers import BitsAndBytesConfig
-        # Enable fp32 CPU offload so that models too large to fit entirely on
-        # GPU (even quantized) can spill to CPU without crashing bitsandbytes.
-        # This is critical for frontier MoE models (GLM-5 744B, DeepSeek-V3 685B,
-        # Mistral Large 3 675B, etc.) on single-GPU setups.
-        if quantization == "4bit":
-            load_kwargs["quantization_config"] = BitsAndBytesConfig(
-                load_in_4bit=True,
-                bnb_4bit_compute_dtype=torch_dtype,
-                bnb_4bit_quant_type="nf4",
-                llm_int8_enable_fp32_cpu_offload=True,
             )
         else:
-            load_kwargs["quantization_config"] = BitsAndBytesConfig(
-                load_in_8bit=True,
-                llm_int8_enable_fp32_cpu_offload=True,
-            )
-        load_kwargs["device_map"] = "auto"
-    elif device == "auto":
-        load_kwargs["device_map"] = "auto"
     # Offload support: provide a folder for disk offloading when GPU memory is insufficient
     _offload_dir = None
@@ -560,9 +565,9 @@ def load_model(
         # Reserve GPU headroom for inference (KV cache, activations, generate()).
         # Without this, device_map="auto" packs 100% of layers onto GPU, leaving
         # no room for forward passes or generation on tight-memory setups.
-        if torch.cuda.is_available():
             max_memory = {}
-            for i in range(torch.cuda.device_count()):
                 total = torch.cuda.get_device_properties(i).total_memory
                 # Reserve 15% or 2 GiB (whichever is larger) for inference headroom
                 reserve = max(int(total * 0.15), 2 * 1024 ** 3)
@@ -570,16 +575,8 @@ def load_model(
                 max_memory[i] = f"{usable // (1024 ** 2)}MiB"
             # Allow overflow to CPU RAM, capped at 85% of physical memory
             # to leave room for the OS, Python runtime, and serialization buffers.
-            try:
-                import psutil
-                cpu_ram_gb = psutil.virtual_memory().total / (1024 ** 3)
-            except ImportError:
-                try:
-                    cpu_ram_gb = os.sysconf("SC_PHYS_PAGES") * os.sysconf("SC_PAGE_SIZE") / (1024 ** 3)
-                except (AttributeError, ValueError):
-                    # os.sysconf is unavailable on non-POSIX platforms (Windows)
-                    cpu_ram_gb = 16.0  # conservative fallback
-            cpu_budget_gb = int(cpu_ram_gb * 0.85)
             max_memory["cpu"] = f"{max(cpu_budget_gb, 4)}GiB"
             load_kwargs["max_memory"] = max_memory
             logger.info(
@@ -625,12 +622,15 @@ def load_model(
     if device not in ("auto",) and quantization is None and native_quant is None:
         model = model.to(device)
     model.eval()
-    # Free CUDA cache after loading
-    if torch.cuda.is_available():
-        torch.cuda.empty_cache()
     try:
         tokenizer = AutoTokenizer.from_pretrained(
@@ -665,9 +665,7 @@ def load_model(
         if gpu_gb > 0 and native_quant is not None:
             # Model is pre-quantized but we can't estimate its true size.
             # Check actual free memory after loading — if less than 40% free, skip snapshot.
-            free_gb = 0.0
-            for i in range(torch.cuda.device_count()):
-                free_gb += torch.cuda.mem_get_info(i)[0] / (1024 ** 3)
             if free_gb < gpu_gb * 0.4:
                 logger.warning(
                     f"Auto-skipping state dict snapshot for natively quantized model "

 import sys as _sys
 import torch
+from obliteratus import device as dev
 from transformers import (
     AutoConfig,
     AutoModelForCausalLM,
 def _available_gpu_memory_gb() -> float:
+    """Return free accelerator memory in GB (CUDA, MPS, or 0 for CPU)."""
+    return dev.get_total_free_gb()
 def _hf_token() -> str | None:
         load_kwargs.pop("torch_dtype", None)
         load_kwargs["device_map"] = "auto"
     elif quantization in ("4bit", "8bit"):
+        # BitsAndBytes only works on NVIDIA CUDA GPUs.
+        resolved_device = dev.get_device(device)
+        if not dev.supports_bitsandbytes(resolved_device):
+            logger.warning(
+                "BitsAndBytes quantization is not supported on %s. "
+                "Loading in %s instead.",
+                resolved_device, dtype,
             )
+            # On MPS, load normally to the device; on CPU, fall through.
+            if resolved_device == "mps":
+                device = "mps"
+            # Don't set quantization_config — fall through to normal loading.
         else:
+            try:
+                import bitsandbytes  # noqa: F401
+            except ImportError:
+                raise RuntimeError(
+                    f"Quantization '{quantization}' requires bitsandbytes: "
+                    f"pip install -U bitsandbytes>=0.46.1"
+                )
+            from transformers import BitsAndBytesConfig
+            # Enable fp32 CPU offload so that models too large to fit entirely on
+            # GPU (even quantized) can spill to CPU without crashing bitsandbytes.
+            # This is critical for frontier MoE models (GLM-5 744B, DeepSeek-V3 685B,
+            # Mistral Large 3 675B, etc.) on single-GPU setups.
+            if quantization == "4bit":
+                load_kwargs["quantization_config"] = BitsAndBytesConfig(
+                    load_in_4bit=True,
+                    bnb_4bit_compute_dtype=torch_dtype,
+                    bnb_4bit_quant_type="nf4",
+                    llm_int8_enable_fp32_cpu_offload=True,
+                )
+            else:
+                load_kwargs["quantization_config"] = BitsAndBytesConfig(
+                    load_in_8bit=True,
+                    llm_int8_enable_fp32_cpu_offload=True,
+                )
+            load_kwargs["device_map"] = "auto"
+    # device_map="auto" is only reliable on CUDA (accelerate doesn't support MPS).
+    if "device_map" not in load_kwargs and device == "auto":
+        resolved_device = dev.get_device(device)
+        if dev.supports_device_map_auto(resolved_device):
+            load_kwargs["device_map"] = "auto"
+        else:
+            # MPS / CPU: load to CPU first, then .to(device) after loading.
+            pass
     # Offload support: provide a folder for disk offloading when GPU memory is insufficient
     _offload_dir = None
         # Reserve GPU headroom for inference (KV cache, activations, generate()).
         # Without this, device_map="auto" packs 100% of layers onto GPU, leaving
         # no room for forward passes or generation on tight-memory setups.
+        if dev.is_cuda():
             max_memory = {}
+            for i in range(dev.device_count()):
                 total = torch.cuda.get_device_properties(i).total_memory
                 # Reserve 15% or 2 GiB (whichever is larger) for inference headroom
                 reserve = max(int(total * 0.15), 2 * 1024 ** 3)
                 max_memory[i] = f"{usable // (1024 ** 2)}MiB"
             # Allow overflow to CPU RAM, capped at 85% of physical memory
             # to leave room for the OS, Python runtime, and serialization buffers.
+            total_ram, _ = dev._system_memory_gb()
+            cpu_budget_gb = int(total_ram * 0.85)
             max_memory["cpu"] = f"{max(cpu_budget_gb, 4)}GiB"
             load_kwargs["max_memory"] = max_memory
             logger.info(
     if device not in ("auto",) and quantization is None and native_quant is None:
         model = model.to(device)
+    elif device == "auto" and not dev.supports_device_map_auto():
+        # MPS / CPU: device_map wasn't used, move model to best device.
+        resolved = dev.get_device()
+        model = model.to(resolved)
     model.eval()
+    # Free accelerator cache after loading
+    dev.empty_cache()
     try:
         tokenizer = AutoTokenizer.from_pretrained(
         if gpu_gb > 0 and native_quant is not None:
             # Model is pre-quantized but we can't estimate its true size.
             # Check actual free memory after loading — if less than 40% free, skip snapshot.
+            free_gb = dev.get_total_free_gb()
             if free_gb < gpu_gb * 0.4:
                 logger.warning(
                     f"Auto-skipping state dict snapshot for natively quantized model "

obliteratus/reproducibility.py CHANGED Viewed

@@ -38,9 +38,9 @@ def set_seed(seed: int = 42, deterministic: bool = True) -> None:
     try:
         import torch
         torch.manual_seed(seed)
-        if torch.cuda.is_available():
-            torch.cuda.manual_seed_all(seed)
         if deterministic:
             torch.use_deterministic_algorithms(True, warn_only=True)

     try:
         import torch
+        from obliteratus import device as dev
         torch.manual_seed(seed)
+        dev.set_seed_all(seed)
         if deterministic:
             torch.use_deterministic_algorithms(True, warn_only=True)

obliteratus/tourney.py CHANGED Viewed

@@ -372,9 +372,8 @@ class TourneyRunner:
         # Clean up GPU between methods
         gc.collect()
         try:
-            import torch
-            if torch.cuda.is_available():
-                torch.cuda.empty_cache()
         except Exception:
             pass

         # Clean up GPU between methods
         gc.collect()
         try:
+            from obliteratus import device as dev
+            dev.empty_cache()
         except Exception:
             pass

requirements-apple.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+# Optional Apple Silicon dependencies for native MLX acceleration.
+# Install alongside the main requirements on macOS with Apple Silicon:
+#
+#   pip install -r requirements.txt -r requirements-apple.txt
+#
+# These packages are macOS-only and will fail to install on Linux/Windows.
+mlx>=0.22
+mlx-lm>=0.20