#!/usr/bin/env bash # ───────────────────────────────────────────────────────────────────────────── # OBLITERATUS Remote Benchmark Runner # # One-command benchmark on your HuggingFace Space GPU. # # Usage: # ./scripts/run_benchmark_remote.sh # defaults: Qwen 0.5B, all methods # ./scripts/run_benchmark_remote.sh --model Qwen/Qwen2.5-1.5B-Instruct # ./scripts/run_benchmark_remote.sh --model openai/gpt-oss-20b # ./scripts/run_benchmark_remote.sh --models "Qwen/Qwen2.5-0.5B-Instruct openai/gpt-oss-20b" # ./scripts/run_benchmark_remote.sh --methods "basic advanced surgical" # ./scripts/run_benchmark_remote.sh --prompts 33 # use 33/66/99 prompts per side # ./scripts/run_benchmark_remote.sh --dry-run # print the command, don't execute # ./scripts/run_benchmark_remote.sh --verbose # show SSH debug output # ───────────────────────────────────────────────────────────────────────────── set -euo pipefail # ── Defaults ───────────────────────────────────────────────────────────────── SSH_KEY="${OBLITERATUS_SSH_KEY:-$HOME/.ssh/hf_obliteratus}" SSH_HOST="${OBLITERATUS_SSH_HOST:-}" MODEL="${OBLITERATUS_MODEL:-Qwen/Qwen2.5-0.5B-Instruct}" MODELS="" METHODS="${OBLITERATUS_METHODS:-basic advanced aggressive surgical inverted nuclear}" PROMPTS="${OBLITERATUS_PROMPTS:-33}" DRY_RUN=false VERBOSE=false # ── Parse args ─────────────────────────────────────────────────────────────── while [[ $# -gt 0 ]]; do case "$1" in --model) MODEL="$2"; MODELS=""; shift 2 ;; --models) MODELS="$2"; shift 2 ;; --methods) METHODS="$2"; shift 2 ;; --prompts) PROMPTS="$2"; shift 2 ;; --key) SSH_KEY="$2"; shift 2 ;; --host) SSH_HOST="$2"; shift 2 ;; --dry-run) DRY_RUN=true; shift ;; --verbose|-v) VERBOSE=true; shift ;; -h|--help) head -15 "$0" | tail -11 exit 0 ;; *) echo "Unknown arg: $1" >&2; exit 1 ;; esac done # If --models not set, use single --model if [[ -z "$MODELS" ]]; then MODELS="$MODEL" fi # ── Validate SSH host ────────────────────────────────────────────────────── if [[ -z "$SSH_HOST" ]]; then echo "ERROR: SSH_HOST not configured." echo "" echo "Set your HF Space SSH host:" echo " 1. export OBLITERATUS_SSH_HOST=your-username-spacename@ssh.hf.space" echo " 2. Or pass --host your-username-spacename@ssh.hf.space" exit 1 fi # ── Validate SSH key ──────────────────────────────────────────────────────── if [[ ! -f "$SSH_KEY" ]]; then echo "ERROR: SSH key not found at $SSH_KEY" echo "" echo "Either:" echo " 1. Place your HF Space SSH key at ~/.ssh/hf_obliteratus" echo " 2. Set OBLITERATUS_SSH_KEY=/path/to/key" echo " 3. Pass --key /path/to/key" exit 1 fi echo "╔══════════════════════════════════════════════════════════════╗" echo "║ OBLITERATUS — Remote GPU Benchmark ║" echo "╠══════════════════════════════════════════════════════════════╣" echo "║ Host: $SSH_HOST" echo "║ Models: $MODELS" echo "║ Methods: $METHODS" echo "║ Prompts: $PROMPTS per side" echo "║ SSH key: $SSH_KEY" echo "╚══════════════════════════════════════════════════════════════╝" echo "" # ── Build the Python benchmark script to run remotely ──────────────────────── read -r -d '' REMOTE_SCRIPT << 'PYEOF' || true import json, sys, time, shutil, gc, os os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True") os.environ.setdefault("CUDA_LAUNCH_BLOCKING", "1") import torch import torch.nn as nn # Add app dir to path (HF Space layout: /home/user/app) sys.path.insert(0, os.environ.get("APP_DIR", "/home/user/app")) # ── Hotpatch: fix device detection for accelerate device_map="auto" ────── # The deployed Space code uses next(model.parameters()).device which is # unreliable when accelerate distributes params across devices. import obliteratus.abliterate as _abl @staticmethod def _get_model_device(model): """Find the correct input device (embedding layer) for accelerate models.""" if hasattr(model, "hf_device_map"): try: embed = model.get_input_embeddings() return next(embed.parameters()).device except (StopIteration, AttributeError): for p in model.parameters(): if p.device.type != "meta": return p.device return torch.device("cpu") return next(model.parameters()).device _abl.AbliterationPipeline._get_model_device = _get_model_device # Patch _collect_activations to use the fixed device detection _orig_collect = _abl.AbliterationPipeline._collect_activations.__code__ import types def _patched_collect(self, layer_modules, prompts, label): """Collect last-token activations — patched for correct device detection.""" n_layers = len(layer_modules) activations = {i: [] for i in range(n_layers)} hooks = [] def make_hook(idx): def hook_fn(module, input, output): hidden = output[0] if isinstance(output, tuple) else output activations[idx].append(hidden[:, -1, :].detach().cpu().float()) return hook_fn for idx in range(n_layers): hooks.append(layer_modules[idx].register_forward_hook(make_hook(idx))) model = self.handle.model tokenizer = self.handle.tokenizer max_length = 256 if torch.cuda.is_available(): free_gb = sum( torch.cuda.mem_get_info(i)[0] / (1024 ** 3) for i in range(torch.cuda.device_count()) ) if free_gb < 2.0: max_length = 64 self.log(f" Low GPU memory ({free_gb:.1f} GB free), using max_length={max_length}") elif free_gb < 4.0: max_length = 128 self.log(f" Tight GPU memory ({free_gb:.1f} GB free), using max_length={max_length}") device = self._get_model_device(model) try: for i, prompt in enumerate(prompts): self.log(f" [{label}] prompt {i + 1}/{len(prompts)}") inputs = tokenizer( prompt, return_tensors="pt", padding=True, truncation=True, max_length=max_length, ) inputs = {k: v.to(device) for k, v in inputs.items()} with torch.no_grad(): model(**inputs) del inputs self._free_gpu_memory() finally: for h in hooks: h.remove() return activations _abl.AbliterationPipeline._collect_activations = _patched_collect print("[hotpatch] Device detection fix applied") # ── End hotpatch ───────────────────────────────────────────────────────── # ── Hotpatch: nuclear mode tuning ───────────────────────────────────────── # The deployed Space code has stale nuclear defaults. Override them here # so the benchmark exercises the latest tuning without redeploying. import math as _math # 1. Updated method configs (read at __init__ time) _abl.METHODS["nuclear"].update({ "n_directions": 4, "reflection_strength": 1.25, "embed_regularization": 0.50, "steering_strength": 0.15, "safety_neuron_masking": False, }) _abl.METHODS["inverted"]["safety_neuron_masking"] = False # 2. Cap layers for inversion modes (40% of total) — post-distill _orig_distill = _abl.AbliterationPipeline._distill_refusal_subspace def _patched_distill(self): _orig_distill(self) if self.invert_refusal and self._strong_layers: try: n_total = len(_abl.get_layer_modules(self.handle)) except Exception: n_total = 24 max_layers = max(3, int(n_total * 0.40)) if len(self._strong_layers) > max_layers: old_count = len(self._strong_layers) self._strong_layers = self._strong_layers[:max_layers] self.log(f" [hotpatch] Capped {old_count} -> {max_layers} layers for inversion (40% of {n_total})") # Truncate SAE directions: 4 features for nuclear, 6 for inverted n_sae = 4 if self.reflection_strength < 2.0 else 6 for idx in list(self._sae_directions.keys()): dirs = self._sae_directions[idx] if dirs.shape[0] > n_sae: self._sae_directions[idx] = dirs[:n_sae] if self._sae_directions: self.log(f" [hotpatch] SAE features capped to {n_sae} per layer") _abl.AbliterationPipeline._distill_refusal_subspace = _patched_distill print("[hotpatch] Nuclear tuning: 4 dirs, 1.25x reflect, no neuron mask, 40%% layer cap, 4 SAE features") # ── End nuclear hotpatch ────────────────────────────────────────────────── from obliteratus.abliterate import AbliterationPipeline, METHODS, HARMFUL_PROMPTS, HARMLESS_PROMPTS MODELS_LIST = os.environ["BENCH_MODELS"].split() METHODS_LIST = os.environ["BENCH_METHODS"].split() N_PROMPTS = int(os.environ["BENCH_PROMPTS"]) print(f"\n{'='*60}") print(f"OBLITERATUS BENCHMARK") print(f"{'='*60}") print(f"Models: {MODELS_LIST}") print(f"Methods: {METHODS_LIST}") print(f"Prompts: {N_PROMPTS} per side") if torch.cuda.is_available(): gpu = torch.cuda.get_device_name(0) total = torch.cuda.get_device_properties(0).total_memory / 1e9 free = torch.cuda.mem_get_info(0)[0] / 1e9 print(f"GPU: {gpu} ({total:.1f} GB total, {free:.1f} GB free)") else: print("GPU: NONE (CPU only)") print(f"{'='*60}\n") harmful = HARMFUL_PROMPTS[:N_PROMPTS] harmless = HARMLESS_PROMPTS[:N_PROMPTS] all_results = [] for model_name in MODELS_LIST: print(f"\n{'═'*60}") print(f"MODEL: {model_name}") print(f"{'═'*60}") model_results = [] for method in METHODS_LIST: if method not in METHODS: print(f"SKIP unknown method: {method}") continue print(f"\n{'─'*60}") print(f"METHOD: {method} — {METHODS[method]['label']}") print(f"{'─'*60}") # Clean slate gc.collect() if torch.cuda.is_available(): torch.cuda.empty_cache() torch.cuda.reset_peak_memory_stats() outdir = f"/tmp/obliteratus_bench_{method}" t0 = time.time() pipeline = None try: pipeline = AbliterationPipeline( model_name=model_name, output_dir=outdir, device="auto", dtype="float16", method=method, harmful_prompts=harmful, harmless_prompts=harmless, on_log=lambda msg: print(f" {msg}"), ) result_path = pipeline.run() elapsed = time.time() - t0 r = { "model": model_name, "method": method, "label": METHODS[method]["label"], "time_seconds": round(elapsed, 1), "quality": pipeline._quality_metrics, "strong_layers": pipeline._strong_layers, "n_strong_layers": len(pipeline._strong_layers), "n_directions": pipeline.n_directions, } if torch.cuda.is_available(): r["peak_gpu_mb"] = round(torch.cuda.max_memory_allocated() / 1e6, 1) model_results.append(r) print(f"\n ✓ {method} complete in {elapsed:.1f}s") print(f" Quality: {json.dumps(pipeline._quality_metrics, default=str)}") except Exception as e: elapsed = time.time() - t0 model_results.append({ "model": model_name, "method": method, "label": METHODS.get(method, {}).get("label", method), "time_seconds": round(elapsed, 1), "error": str(e), }) print(f"\n ✗ {method} FAILED after {elapsed:.1f}s: {e}") import traceback traceback.print_exc() # Cleanup saved model to free disk shutil.rmtree(outdir, ignore_errors=True) # Force cleanup between runs if pipeline is not None: del pipeline gc.collect() if torch.cuda.is_available(): torch.cuda.empty_cache() all_results.extend(model_results) # Summary table for this model print(f"\n{'='*60}") print(f"RESULTS: {model_name}") print(f"{'Method':<12} {'Time':>8} {'PPL':>10} {'Coher':>8} {'Refusal':>8} {'GPU MB':>8}") print(f"{'─'*12} {'─'*8} {'─'*10} {'─'*8} {'─'*8} {'─'*8}") for r in model_results: if "error" in r: print(f"{r['method']:<12} {r['time_seconds']:>7.1f}s {'FAILED':>10}") continue q = r.get("quality", {}) ppl = q.get("perplexity") coh = q.get("coherence") ref = q.get("refusal_rate") gpu = r.get("peak_gpu_mb") ppl_str = f"{ppl:.2f}" if ppl is not None else "N/A" print(f"{r['method']:<12} {r['time_seconds']:>7.1f}s " f"{ppl_str:>10} " f"{f'{coh:.0%}' if coh is not None else 'N/A':>8} " f"{f'{ref:.0%}' if ref is not None else 'N/A':>8} " f"{gpu if gpu is not None else 'N/A':>8}") print(f"{'='*60}") # Final JSON dump print(f"\n\n{'='*60}") print("ALL BENCHMARK RESULTS (JSON)") print(f"{'='*60}") print("```json") print(json.dumps(all_results, indent=2, default=str)) print("```") PYEOF # ── SSH options ────────────────────────────────────────────────────────────── SSH_OPTS=( -i "$SSH_KEY" -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=30 -o ServerAliveInterval=60 -o ServerAliveCountMax=10 ) if $VERBOSE; then SSH_OPTS+=( -v ) fi # ── Pre-flight: verify SSH connectivity ───────────────────────────────────── echo "Checking SSH connectivity..." if ! ssh "${SSH_OPTS[@]}" "$SSH_HOST" "echo 'SSH_OK'" 2>/tmp/obliteratus_ssh_debug.log; then echo "" echo "ERROR: SSH connection failed!" echo "" echo "Debug output:" cat /tmp/obliteratus_ssh_debug.log echo "" echo "Troubleshooting checklist:" echo " 1. Is Dev Mode enabled on your HF Space?" echo " → Check your Space's Settings tab (Dev Mode must be ON)" echo " 2. Is the Space awake (not sleeping/building)?" echo " → Visit the Space URL and wait for the UI to load" echo " 3. Is your SSH public key added to your HF profile?" echo " → https://huggingface.co/settings/keys" echo " → Run: cat ${SSH_KEY}.pub" echo " 4. Are key permissions correct?" echo " → Run: chmod 600 $SSH_KEY" echo " 5. Try manually:" echo " → ssh -v -i $SSH_KEY $SSH_HOST echo hello" echo "" rm -f /tmp/obliteratus_ssh_debug.log exit 1 fi rm -f /tmp/obliteratus_ssh_debug.log echo "SSH connection verified ✓" echo "" # ── Build SSH command ──────────────────────────────────────────────────────── # Write the Python script to a temp file and pipe it, instead of passing # via -c (avoids command-line length limits and shell escaping issues). REMOTE_SCRIPT_FILE=$(mktemp /tmp/obliteratus_bench_XXXXXX.py) echo "$REMOTE_SCRIPT" > "$REMOTE_SCRIPT_FILE" trap "rm -f '$REMOTE_SCRIPT_FILE'" EXIT if $DRY_RUN; then echo "[DRY RUN] Would execute:" echo " cat script.py | ssh ${SSH_OPTS[*]} $SSH_HOST 'BENCH_MODELS=... python3 -u'" echo "" echo "Script saved to: $REMOTE_SCRIPT_FILE" exit 0 fi echo "Running benchmark on Space..." echo "" # Sanitize inputs: reject values containing shell metacharacters to prevent # command injection on the remote host. for _var_name in MODELS METHODS PROMPTS; do _val="${!_var_name}" if [[ "$_val" =~ [\'\"\;\&\|\`\$\(\)\{\}\<\>\\] ]]; then echo "ERROR: ${_var_name} contains unsafe characters: $_val" >&2 exit 1 fi done cat "$REMOTE_SCRIPT_FILE" | ssh "${SSH_OPTS[@]}" "$SSH_HOST" \ "BENCH_MODELS='$MODELS' BENCH_METHODS='$METHODS' BENCH_PROMPTS='$PROMPTS' python3 -u -"