obliteratus

Running

App Files Files Community

obliteratus / scripts /run_benchmark_remote.sh

pliny-the-prompter

Upload 129 files

4837177 verified 9 days ago

raw

history blame contribute delete

17.7 kB

	#!/usr/bin/env bash
	# ─────────────────────────────────────────────────────────────────────────────
	# OBLITERATUS Remote Benchmark Runner
	#
	# One-command benchmark on your HuggingFace Space GPU.
	#
	# Usage:
	# ./scripts/run_benchmark_remote.sh # defaults: Qwen 0.5B, all methods
	# ./scripts/run_benchmark_remote.sh --model Qwen/Qwen2.5-1.5B-Instruct
	# ./scripts/run_benchmark_remote.sh --model openai/gpt-oss-20b
	# ./scripts/run_benchmark_remote.sh --models "Qwen/Qwen2.5-0.5B-Instruct openai/gpt-oss-20b"
	# ./scripts/run_benchmark_remote.sh --methods "basic advanced surgical"
	# ./scripts/run_benchmark_remote.sh --prompts 33 # use 33/66/99 prompts per side
	# ./scripts/run_benchmark_remote.sh --dry-run # print the command, don't execute
	# ./scripts/run_benchmark_remote.sh --verbose # show SSH debug output
	# ─────────────────────────────────────────────────────────────────────────────
	set -euo pipefail

	# ── Defaults ─────────────────────────────────────────────────────────────────
	SSH_KEY="${OBLITERATUS_SSH_KEY:-$HOME/.ssh/hf_obliteratus}"
	SSH_HOST="${OBLITERATUS_SSH_HOST:-}"
	MODEL="${OBLITERATUS_MODEL:-Qwen/Qwen2.5-0.5B-Instruct}"
	MODELS=""
	METHODS="${OBLITERATUS_METHODS:-basic advanced aggressive surgical inverted nuclear}"
	PROMPTS="${OBLITERATUS_PROMPTS:-33}"
	DRY_RUN=false
	VERBOSE=false

	# ── Parse args ───────────────────────────────────────────────────────────────
	while [[ $# -gt 0 ]]; do
	case "$1" in
	--model) MODEL="$2"; MODELS=""; shift 2 ;;
	--models) MODELS="$2"; shift 2 ;;
	--methods) METHODS="$2"; shift 2 ;;
	--prompts) PROMPTS="$2"; shift 2 ;;
	--key) SSH_KEY="$2"; shift 2 ;;
	--host) SSH_HOST="$2"; shift 2 ;;
	--dry-run) DRY_RUN=true; shift ;;
	--verbose\|-v) VERBOSE=true; shift ;;
	-h\|--help)
	head -15 "$0" \| tail -11
	exit 0
	;;
	*)
	echo "Unknown arg: $1" >&2; exit 1 ;;
	esac
	done

	# If --models not set, use single --model
	if [[ -z "$MODELS" ]]; then
	MODELS="$MODEL"
	fi

	# ── Validate SSH host ──────────────────────────────────────────────────────
	if [[ -z "$SSH_HOST" ]]; then
	echo "ERROR: SSH_HOST not configured."
	echo ""
	echo "Set your HF Space SSH host:"
	echo " 1. export OBLITERATUS_SSH_HOST=your-username-spacename@ssh.hf.space"
	echo " 2. Or pass --host your-username-spacename@ssh.hf.space"
	exit 1
	fi

	# ── Validate SSH key ────────────────────────────────────────────────────────
	if [[ ! -f "$SSH_KEY" ]]; then
	echo "ERROR: SSH key not found at $SSH_KEY"
	echo ""
	echo "Either:"
	echo " 1. Place your HF Space SSH key at ~/.ssh/hf_obliteratus"
	echo " 2. Set OBLITERATUS_SSH_KEY=/path/to/key"
	echo " 3. Pass --key /path/to/key"
	exit 1
	fi

	echo "╔══════════════════════════════════════════════════════════════╗"
	echo "║ OBLITERATUS — Remote GPU Benchmark ║"
	echo "╠══════════════════════════════════════════════════════════════╣"
	echo "║ Host: $SSH_HOST"
	echo "║ Models: $MODELS"
	echo "║ Methods: $METHODS"
	echo "║ Prompts: $PROMPTS per side"
	echo "║ SSH key: $SSH_KEY"
	echo "╚══════════════════════════════════════════════════════════════╝"
	echo ""

	# ── Build the Python benchmark script to run remotely ────────────────────────
	read -r -d '' REMOTE_SCRIPT << 'PYEOF' \|\| true
	import json, sys, time, shutil, gc, os
	os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True")
	os.environ.setdefault("CUDA_LAUNCH_BLOCKING", "1")

	import torch
	import torch.nn as nn

	# Add app dir to path (HF Space layout: /home/user/app)
	sys.path.insert(0, os.environ.get("APP_DIR", "/home/user/app"))

	# ── Hotpatch: fix device detection for accelerate device_map="auto" ──────
	# The deployed Space code uses next(model.parameters()).device which is
	# unreliable when accelerate distributes params across devices.
	import obliteratus.abliterate as _abl

	@staticmethod
	def _get_model_device(model):
	"""Find the correct input device (embedding layer) for accelerate models."""
	if hasattr(model, "hf_device_map"):
	try:
	embed = model.get_input_embeddings()
	return next(embed.parameters()).device
	except (StopIteration, AttributeError):
	for p in model.parameters():
	if p.device.type != "meta":
	return p.device
	return torch.device("cpu")
	return next(model.parameters()).device

	_abl.AbliterationPipeline._get_model_device = _get_model_device

	# Patch _collect_activations to use the fixed device detection
	_orig_collect = _abl.AbliterationPipeline._collect_activations.__code__
	import types

	def _patched_collect(self, layer_modules, prompts, label):
	"""Collect last-token activations — patched for correct device detection."""
	n_layers = len(layer_modules)
	activations = {i: [] for i in range(n_layers)}
	hooks = []

	def make_hook(idx):
	def hook_fn(module, input, output):
	hidden = output[0] if isinstance(output, tuple) else output
	activations[idx].append(hidden[:, -1, :].detach().cpu().float())
	return hook_fn

	for idx in range(n_layers):
	hooks.append(layer_modules[idx].register_forward_hook(make_hook(idx)))

	model = self.handle.model
	tokenizer = self.handle.tokenizer

	max_length = 256
	if torch.cuda.is_available():
	free_gb = sum(
	torch.cuda.mem_get_info(i)[0] / (1024 ** 3)
	for i in range(torch.cuda.device_count())
	)
	if free_gb < 2.0:
	max_length = 64
	self.log(f" Low GPU memory ({free_gb:.1f} GB free), using max_length={max_length}")
	elif free_gb < 4.0:
	max_length = 128
	self.log(f" Tight GPU memory ({free_gb:.1f} GB free), using max_length={max_length}")

	device = self._get_model_device(model)

	try:
	for i, prompt in enumerate(prompts):
	self.log(f" [{label}] prompt {i + 1}/{len(prompts)}")
	inputs = tokenizer(
	prompt, return_tensors="pt", padding=True, truncation=True,
	max_length=max_length,
	)
	inputs = {k: v.to(device) for k, v in inputs.items()}
	with torch.no_grad():
	model(**inputs)
	del inputs
	self._free_gpu_memory()
	finally:
	for h in hooks:
	h.remove()

	return activations

	_abl.AbliterationPipeline._collect_activations = _patched_collect
	print("[hotpatch] Device detection fix applied")
	# ── End hotpatch ─────────────────────────────────────────────────────────

	# ── Hotpatch: nuclear mode tuning ─────────────────────────────────────────
	# The deployed Space code has stale nuclear defaults. Override them here
	# so the benchmark exercises the latest tuning without redeploying.
	import math as _math

	# 1. Updated method configs (read at __init__ time)
	_abl.METHODS["nuclear"].update({
	"n_directions": 4,
	"reflection_strength": 1.25,
	"embed_regularization": 0.50,
	"steering_strength": 0.15,
	"safety_neuron_masking": False,
	})
	_abl.METHODS["inverted"]["safety_neuron_masking"] = False

	# 2. Cap layers for inversion modes (40% of total) — post-distill
	_orig_distill = _abl.AbliterationPipeline._distill_refusal_subspace
	def _patched_distill(self):
	_orig_distill(self)
	if self.invert_refusal and self._strong_layers:
	try:
	n_total = len(_abl.get_layer_modules(self.handle))
	except Exception:
	n_total = 24
	max_layers = max(3, int(n_total * 0.40))
	if len(self._strong_layers) > max_layers:
	old_count = len(self._strong_layers)
	self._strong_layers = self._strong_layers[:max_layers]
	self.log(f" [hotpatch] Capped {old_count} -> {max_layers} layers for inversion (40% of {n_total})")
	# Truncate SAE directions: 4 features for nuclear, 6 for inverted
	n_sae = 4 if self.reflection_strength < 2.0 else 6
	for idx in list(self._sae_directions.keys()):
	dirs = self._sae_directions[idx]
	if dirs.shape[0] > n_sae:
	self._sae_directions[idx] = dirs[:n_sae]
	if self._sae_directions:
	self.log(f" [hotpatch] SAE features capped to {n_sae} per layer")
	_abl.AbliterationPipeline._distill_refusal_subspace = _patched_distill

	print("[hotpatch] Nuclear tuning: 4 dirs, 1.25x reflect, no neuron mask, 40%% layer cap, 4 SAE features")
	# ── End nuclear hotpatch ──────────────────────────────────────────────────

	from obliteratus.abliterate import AbliterationPipeline, METHODS, HARMFUL_PROMPTS, HARMLESS_PROMPTS

	MODELS_LIST = os.environ["BENCH_MODELS"].split()
	METHODS_LIST = os.environ["BENCH_METHODS"].split()
	N_PROMPTS = int(os.environ["BENCH_PROMPTS"])

	print(f"\n{'='*60}")
	print(f"OBLITERATUS BENCHMARK")
	print(f"{'='*60}")
	print(f"Models: {MODELS_LIST}")
	print(f"Methods: {METHODS_LIST}")
	print(f"Prompts: {N_PROMPTS} per side")
	if torch.cuda.is_available():
	gpu = torch.cuda.get_device_name(0)
	total = torch.cuda.get_device_properties(0).total_memory / 1e9
	free = torch.cuda.mem_get_info(0)[0] / 1e9
	print(f"GPU: {gpu} ({total:.1f} GB total, {free:.1f} GB free)")
	else:
	print("GPU: NONE (CPU only)")
	print(f"{'='*60}\n")

	harmful = HARMFUL_PROMPTS[:N_PROMPTS]
	harmless = HARMLESS_PROMPTS[:N_PROMPTS]

	all_results = []

	for model_name in MODELS_LIST:
	print(f"\n{'═'*60}")
	print(f"MODEL: {model_name}")
	print(f"{'═'*60}")

	model_results = []

	for method in METHODS_LIST:
	if method not in METHODS:
	print(f"SKIP unknown method: {method}")
	continue

	print(f"\n{'─'*60}")
	print(f"METHOD: {method} — {METHODS[method]['label']}")
	print(f"{'─'*60}")

	# Clean slate
	gc.collect()
	if torch.cuda.is_available():
	torch.cuda.empty_cache()
	torch.cuda.reset_peak_memory_stats()

	outdir = f"/tmp/obliteratus_bench_{method}"
	t0 = time.time()
	pipeline = None

	try:
	pipeline = AbliterationPipeline(
	model_name=model_name,
	output_dir=outdir,
	device="auto",
	dtype="float16",
	method=method,
	harmful_prompts=harmful,
	harmless_prompts=harmless,
	on_log=lambda msg: print(f" {msg}"),
	)
	result_path = pipeline.run()
	elapsed = time.time() - t0

	r = {
	"model": model_name,
	"method": method,
	"label": METHODS[method]["label"],
	"time_seconds": round(elapsed, 1),
	"quality": pipeline._quality_metrics,
	"strong_layers": pipeline._strong_layers,
	"n_strong_layers": len(pipeline._strong_layers),
	"n_directions": pipeline.n_directions,
	}

	if torch.cuda.is_available():
	r["peak_gpu_mb"] = round(torch.cuda.max_memory_allocated() / 1e6, 1)

	model_results.append(r)

	print(f"\n ✓ {method} complete in {elapsed:.1f}s")
	print(f" Quality: {json.dumps(pipeline._quality_metrics, default=str)}")

	except Exception as e:
	elapsed = time.time() - t0
	model_results.append({
	"model": model_name,
	"method": method,
	"label": METHODS.get(method, {}).get("label", method),
	"time_seconds": round(elapsed, 1),
	"error": str(e),
	})
	print(f"\n ✗ {method} FAILED after {elapsed:.1f}s: {e}")
	import traceback
	traceback.print_exc()

	# Cleanup saved model to free disk
	shutil.rmtree(outdir, ignore_errors=True)

	# Force cleanup between runs
	if pipeline is not None:
	del pipeline
	gc.collect()
	if torch.cuda.is_available():
	torch.cuda.empty_cache()

	all_results.extend(model_results)

	# Summary table for this model
	print(f"\n{'='*60}")
	print(f"RESULTS: {model_name}")
	print(f"{'Method':<12} {'Time':>8} {'PPL':>10} {'Coher':>8} {'Refusal':>8} {'GPU MB':>8}")
	print(f"{'─'12} {'─'8} {'─'10} {'─'8} {'─'8} {'─'8}")
	for r in model_results:
	if "error" in r:
	print(f"{r['method']:<12} {r['time_seconds']:>7.1f}s {'FAILED':>10}")
	continue
	q = r.get("quality", {})
	ppl = q.get("perplexity")
	coh = q.get("coherence")
	ref = q.get("refusal_rate")
	gpu = r.get("peak_gpu_mb")
	ppl_str = f"{ppl:.2f}" if ppl is not None else "N/A"
	print(f"{r['method']:<12} {r['time_seconds']:>7.1f}s "
	f"{ppl_str:>10} "
	f"{f'{coh:.0%}' if coh is not None else 'N/A':>8} "
	f"{f'{ref:.0%}' if ref is not None else 'N/A':>8} "
	f"{gpu if gpu is not None else 'N/A':>8}")
	print(f"{'='*60}")

	# Final JSON dump
	print(f"\n\n{'='*60}")
	print("ALL BENCHMARK RESULTS (JSON)")
	print(f"{'='*60}")
	print("```json")
	print(json.dumps(all_results, indent=2, default=str))
	print("```")
	PYEOF

	# ── SSH options ──────────────────────────────────────────────────────────────
	SSH_OPTS=(
	-i "$SSH_KEY"
	-o StrictHostKeyChecking=no
	-o UserKnownHostsFile=/dev/null
	-o ConnectTimeout=30
	-o ServerAliveInterval=60
	-o ServerAliveCountMax=10
	)

	if $VERBOSE; then
	SSH_OPTS+=( -v )
	fi

	# ── Pre-flight: verify SSH connectivity ─────────────────────────────────────
	echo "Checking SSH connectivity..."
	if ! ssh "${SSH_OPTS[@]}" "$SSH_HOST" "echo 'SSH_OK'" 2>/tmp/obliteratus_ssh_debug.log; then
	echo ""
	echo "ERROR: SSH connection failed!"
	echo ""
	echo "Debug output:"
	cat /tmp/obliteratus_ssh_debug.log
	echo ""
	echo "Troubleshooting checklist:"
	echo " 1. Is Dev Mode enabled on your HF Space?"
	echo " → Check your Space's Settings tab (Dev Mode must be ON)"
	echo " 2. Is the Space awake (not sleeping/building)?"
	echo " → Visit the Space URL and wait for the UI to load"
	echo " 3. Is your SSH public key added to your HF profile?"
	echo " → https://huggingface.co/settings/keys"
	echo " → Run: cat ${SSH_KEY}.pub"
	echo " 4. Are key permissions correct?"
	echo " → Run: chmod 600 $SSH_KEY"
	echo " 5. Try manually:"
	echo " → ssh -v -i $SSH_KEY $SSH_HOST echo hello"
	echo ""
	rm -f /tmp/obliteratus_ssh_debug.log
	exit 1
	fi
	rm -f /tmp/obliteratus_ssh_debug.log
	echo "SSH connection verified ✓"
	echo ""

	# ── Build SSH command ────────────────────────────────────────────────────────
	# Write the Python script to a temp file and pipe it, instead of passing
	# via -c (avoids command-line length limits and shell escaping issues).
	REMOTE_SCRIPT_FILE=$(mktemp /tmp/obliteratus_bench_XXXXXX.py)
	echo "$REMOTE_SCRIPT" > "$REMOTE_SCRIPT_FILE"
	trap "rm -f '$REMOTE_SCRIPT_FILE'" EXIT

	if $DRY_RUN; then
	echo "[DRY RUN] Would execute:"
	echo " cat script.py \| ssh ${SSH_OPTS[*]} $SSH_HOST 'BENCH_MODELS=... python3 -u'"
	echo ""
	echo "Script saved to: $REMOTE_SCRIPT_FILE"
	exit 0
	fi

	echo "Running benchmark on Space..."
	echo ""

	# Sanitize inputs: reject values containing shell metacharacters to prevent
	# command injection on the remote host.
	for _var_name in MODELS METHODS PROMPTS; do
	_val="${!_var_name}"
	if [[ "$_val" =~ [\'\"\;\&\\|\`\$\{\}\<\>\\] ]]; then
	echo "ERROR: ${_var_name} contains unsafe characters: $_val" >&2
	exit 1
	fi
	done

	cat "$REMOTE_SCRIPT_FILE" \| ssh "${SSH_OPTS[@]}" "$SSH_HOST" \
	"BENCH_MODELS='$MODELS' BENCH_METHODS='$METHODS' BENCH_PROMPTS='$PROMPTS' python3 -u -"