File size: 18,057 Bytes
a4d593e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ae16715
a4d593e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ae16715
 
 
 
 
 
 
 
 
 
a4d593e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4837177
 
a4d593e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102206c
 
 
 
 
 
 
a4d593e
102206c
 
a4d593e
102206c
a4d593e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ae16715
a4d593e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f6b07b4
 
 
 
 
 
 
 
 
 
a4d593e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
#!/usr/bin/env bash
# ─────────────────────────────────────────────────────────────────────────────
# OBLITERATUS Remote Benchmark Runner
#
# One-command benchmark on your HuggingFace Space GPU.
#
# Usage:
#   ./scripts/run_benchmark_remote.sh                  # defaults: Qwen 0.5B, all methods
#   ./scripts/run_benchmark_remote.sh --model Qwen/Qwen2.5-1.5B-Instruct
#   ./scripts/run_benchmark_remote.sh --model openai/gpt-oss-20b
#   ./scripts/run_benchmark_remote.sh --models "Qwen/Qwen2.5-0.5B-Instruct openai/gpt-oss-20b"
#   ./scripts/run_benchmark_remote.sh --methods "basic advanced surgical"
#   ./scripts/run_benchmark_remote.sh --prompts 33     # use 33/66/99 prompts per side
#   ./scripts/run_benchmark_remote.sh --dry-run        # print the command, don't execute
#   ./scripts/run_benchmark_remote.sh --verbose        # show SSH debug output
# ─────────────────────────────────────────────────────────────────────────────
set -euo pipefail

# ── Defaults ─────────────────────────────────────────────────────────────────
SSH_KEY="${OBLITERATUS_SSH_KEY:-$HOME/.ssh/hf_obliteratus}"
SSH_HOST="${OBLITERATUS_SSH_HOST:-}"
MODEL="${OBLITERATUS_MODEL:-Qwen/Qwen2.5-0.5B-Instruct}"
MODELS=""
METHODS="${OBLITERATUS_METHODS:-basic advanced aggressive surgical inverted nuclear}"
PROMPTS="${OBLITERATUS_PROMPTS:-33}"
DRY_RUN=false
VERBOSE=false

# ── Parse args ───────────────────────────────────────────────────────────────
while [[ $# -gt 0 ]]; do
  case "$1" in
    --model)    MODEL="$2"; MODELS=""; shift 2 ;;
    --models)   MODELS="$2";           shift 2 ;;
    --methods)  METHODS="$2";          shift 2 ;;
    --prompts)  PROMPTS="$2";          shift 2 ;;
    --key)      SSH_KEY="$2";          shift 2 ;;
    --host)     SSH_HOST="$2";         shift 2 ;;
    --dry-run)  DRY_RUN=true;          shift ;;
    --verbose|-v) VERBOSE=true;        shift ;;
    -h|--help)
      head -15 "$0" | tail -11
      exit 0
      ;;
    *)
      echo "Unknown arg: $1" >&2; exit 1 ;;
  esac
done

# If --models not set, use single --model
if [[ -z "$MODELS" ]]; then
  MODELS="$MODEL"
fi

# ── Validate SSH host ──────────────────────────────────────────────────────
if [[ -z "$SSH_HOST" ]]; then
  echo "ERROR: SSH_HOST not configured."
  echo ""
  echo "Set your HF Space SSH host:"
  echo "  1. export OBLITERATUS_SSH_HOST=your-username-spacename@ssh.hf.space"
  echo "  2. Or pass --host your-username-spacename@ssh.hf.space"
  exit 1
fi

# ── Validate SSH key ────────────────────────────────────────────────────────
if [[ ! -f "$SSH_KEY" ]]; then
  echo "ERROR: SSH key not found at $SSH_KEY"
  echo ""
  echo "Either:"
  echo "  1. Place your HF Space SSH key at ~/.ssh/hf_obliteratus"
  echo "  2. Set OBLITERATUS_SSH_KEY=/path/to/key"
  echo "  3. Pass --key /path/to/key"
  exit 1
fi

echo "╔══════════════════════════════════════════════════════════════╗"
echo "β•‘          OBLITERATUS β€” Remote GPU Benchmark                 β•‘"
echo "╠══════════════════════════════════════════════════════════════╣"
echo "β•‘  Host:     $SSH_HOST"
echo "β•‘  Models:   $MODELS"
echo "β•‘  Methods:  $METHODS"
echo "β•‘  Prompts:  $PROMPTS per side"
echo "β•‘  SSH key:  $SSH_KEY"
echo "β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•"
echo ""

# ── Build the Python benchmark script to run remotely ────────────────────────
read -r -d '' REMOTE_SCRIPT << 'PYEOF' || true
import json, sys, time, shutil, gc, os
os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True")
os.environ.setdefault("CUDA_LAUNCH_BLOCKING", "1")

import torch
import torch.nn as nn

# Add app dir to path (HF Space layout: /home/user/app)
sys.path.insert(0, os.environ.get("APP_DIR", "/home/user/app"))

# ── Hotpatch: fix device detection for accelerate device_map="auto" ──────
# The deployed Space code uses next(model.parameters()).device which is
# unreliable when accelerate distributes params across devices.
import obliteratus.abliterate as _abl

@staticmethod
def _get_model_device(model):
    """Find the correct input device (embedding layer) for accelerate models."""
    if hasattr(model, "hf_device_map"):
        try:
            embed = model.get_input_embeddings()
            return next(embed.parameters()).device
        except (StopIteration, AttributeError):
            for p in model.parameters():
                if p.device.type != "meta":
                    return p.device
            return torch.device("cpu")
    return next(model.parameters()).device

_abl.AbliterationPipeline._get_model_device = _get_model_device

# Patch _collect_activations to use the fixed device detection
_orig_collect = _abl.AbliterationPipeline._collect_activations.__code__
import types

def _patched_collect(self, layer_modules, prompts, label):
    """Collect last-token activations β€” patched for correct device detection."""
    n_layers = len(layer_modules)
    activations = {i: [] for i in range(n_layers)}
    hooks = []

    def make_hook(idx):
        def hook_fn(module, input, output):
            hidden = output[0] if isinstance(output, tuple) else output
            activations[idx].append(hidden[:, -1, :].detach().cpu().float())
        return hook_fn

    for idx in range(n_layers):
        hooks.append(layer_modules[idx].register_forward_hook(make_hook(idx)))

    model = self.handle.model
    tokenizer = self.handle.tokenizer

    max_length = 256
    if torch.cuda.is_available():
        free_gb = sum(
            torch.cuda.mem_get_info(i)[0] / (1024 ** 3)
            for i in range(torch.cuda.device_count())
        )
        # Scale thresholds by model size (baseline: 7B with hidden=4096, 32 layers)
        _h = self.handle.hidden_size if self.handle else 4096
        _l = n_layers if n_layers else 32
        _ms = (_h / 4096) * (_l / 32)
        _tight = max(4.0 * _ms, 0.5)
        _low = max(2.0 * _ms, 0.25)
        if free_gb < _low:
            max_length = 64
            self.log(f"  Low GPU memory ({free_gb:.1f} GB free, threshold {_low:.1f} GB), using max_length={max_length}")
        elif free_gb < _tight:
            max_length = 128
            self.log(f"  Tight GPU memory ({free_gb:.1f} GB free, threshold {_tight:.1f} GB), using max_length={max_length}")

    device = self._get_model_device(model)

    try:
        for i, prompt in enumerate(prompts):
            self.log(f"  [{label}] prompt {i + 1}/{len(prompts)}")
            inputs = tokenizer(
                prompt, return_tensors="pt", padding=True, truncation=True,
                max_length=max_length,
            )
            inputs = {k: v.to(device) for k, v in inputs.items()}
            with torch.no_grad():
                model(**inputs)
            del inputs
            self._free_gpu_memory()
    finally:
        for h in hooks:
            h.remove()

    return activations

_abl.AbliterationPipeline._collect_activations = _patched_collect
print("[hotpatch] Device detection fix applied")
# ── End hotpatch ─────────────────────────────────────────────────────────

# ── Hotpatch: nuclear mode tuning ─────────────────────────────────────────
# The deployed Space code has stale nuclear defaults.  Override them here
# so the benchmark exercises the latest tuning without redeploying.
import math as _math

# 1. Updated method configs (read at __init__ time)
_abl.METHODS["nuclear"].update({
    "n_directions": 4,
    "reflection_strength": 1.25,
    "embed_regularization": 0.50,
    "steering_strength": 0.15,
    "safety_neuron_masking": False,
})
_abl.METHODS["inverted"]["safety_neuron_masking"] = False

# 2. Cap layers for inversion modes (40% of total) β€” post-distill
_orig_distill = _abl.AbliterationPipeline._distill_refusal_subspace
def _patched_distill(self):
    _orig_distill(self)
    if self.invert_refusal and self._strong_layers:
        try:
            n_total = len(_abl.get_layer_modules(self.handle))
        except Exception:
            n_total = 24
        max_layers = max(3, int(n_total * 0.40))
        if len(self._strong_layers) > max_layers:
            old_count = len(self._strong_layers)
            self._strong_layers = self._strong_layers[:max_layers]
            self.log(f"  [hotpatch] Capped {old_count} -> {max_layers} layers for inversion (40% of {n_total})")
        # Truncate SAE directions: 4 features for nuclear, 6 for inverted
        n_sae = 4 if self.reflection_strength < 2.0 else 6
        for idx in list(self._sae_directions.keys()):
            dirs = self._sae_directions[idx]
            if dirs.shape[0] > n_sae:
                self._sae_directions[idx] = dirs[:n_sae]
        if self._sae_directions:
            self.log(f"  [hotpatch] SAE features capped to {n_sae} per layer")
_abl.AbliterationPipeline._distill_refusal_subspace = _patched_distill

print("[hotpatch] Nuclear tuning: 4 dirs, 1.25x reflect, no neuron mask, 40%% layer cap, 4 SAE features")
# ── End nuclear hotpatch ──────────────────────────────────────────────────

from obliteratus.abliterate import AbliterationPipeline, METHODS, HARMFUL_PROMPTS, HARMLESS_PROMPTS

MODELS_LIST = os.environ["BENCH_MODELS"].split()
METHODS_LIST = os.environ["BENCH_METHODS"].split()
N_PROMPTS = int(os.environ["BENCH_PROMPTS"])

print(f"\n{'='*60}")
print(f"OBLITERATUS BENCHMARK")
print(f"{'='*60}")
print(f"Models:   {MODELS_LIST}")
print(f"Methods:  {METHODS_LIST}")
print(f"Prompts:  {N_PROMPTS} per side")
if torch.cuda.is_available():
    gpu = torch.cuda.get_device_name(0)
    total = torch.cuda.get_device_properties(0).total_memory / 1e9
    free = torch.cuda.mem_get_info(0)[0] / 1e9
    print(f"GPU:      {gpu} ({total:.1f} GB total, {free:.1f} GB free)")
else:
    print("GPU:      NONE (CPU only)")
print(f"{'='*60}\n")

harmful = HARMFUL_PROMPTS[:N_PROMPTS]
harmless = HARMLESS_PROMPTS[:N_PROMPTS]

all_results = []

for model_name in MODELS_LIST:
    print(f"\n{'═'*60}")
    print(f"MODEL: {model_name}")
    print(f"{'═'*60}")

    model_results = []

    for method in METHODS_LIST:
        if method not in METHODS:
            print(f"SKIP unknown method: {method}")
            continue

        print(f"\n{'─'*60}")
        print(f"METHOD: {method} β€” {METHODS[method]['label']}")
        print(f"{'─'*60}")

        # Clean slate
        gc.collect()
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
            torch.cuda.reset_peak_memory_stats()

        outdir = f"/tmp/obliteratus_bench_{method}"
        t0 = time.time()
        pipeline = None

        try:
            pipeline = AbliterationPipeline(
                model_name=model_name,
                output_dir=outdir,
                device="auto",
                dtype="float16",
                method=method,
                harmful_prompts=harmful,
                harmless_prompts=harmless,
                on_log=lambda msg: print(f"  {msg}"),
            )
            result_path = pipeline.run()
            elapsed = time.time() - t0

            r = {
                "model": model_name,
                "method": method,
                "label": METHODS[method]["label"],
                "time_seconds": round(elapsed, 1),
                "quality": pipeline._quality_metrics,
                "strong_layers": pipeline._strong_layers,
                "n_strong_layers": len(pipeline._strong_layers),
                "n_directions": pipeline.n_directions,
            }

            if torch.cuda.is_available():
                r["peak_gpu_mb"] = round(torch.cuda.max_memory_allocated() / 1e6, 1)

            model_results.append(r)

            print(f"\n  βœ“ {method} complete in {elapsed:.1f}s")
            print(f"    Quality: {json.dumps(pipeline._quality_metrics, default=str)}")

        except Exception as e:
            elapsed = time.time() - t0
            model_results.append({
                "model": model_name,
                "method": method,
                "label": METHODS.get(method, {}).get("label", method),
                "time_seconds": round(elapsed, 1),
                "error": str(e),
            })
            print(f"\n  βœ— {method} FAILED after {elapsed:.1f}s: {e}")
            import traceback
            traceback.print_exc()

        # Cleanup saved model to free disk
        shutil.rmtree(outdir, ignore_errors=True)

        # Force cleanup between runs
        if pipeline is not None:
            del pipeline
        gc.collect()
        if torch.cuda.is_available():
            torch.cuda.empty_cache()

    all_results.extend(model_results)

    # Summary table for this model
    print(f"\n{'='*60}")
    print(f"RESULTS: {model_name}")
    print(f"{'Method':<12} {'Time':>8} {'PPL':>10} {'Coher':>8} {'Refusal':>8} {'GPU MB':>8}")
    print(f"{'─'*12} {'─'*8} {'─'*10} {'─'*8} {'─'*8} {'─'*8}")
    for r in model_results:
        if "error" in r:
            print(f"{r['method']:<12} {r['time_seconds']:>7.1f}s {'FAILED':>10}")
            continue
        q = r.get("quality", {})
        ppl = q.get("perplexity")
        coh = q.get("coherence")
        ref = q.get("refusal_rate")
        gpu = r.get("peak_gpu_mb")
        ppl_str = f"{ppl:.2f}" if ppl is not None else "N/A"
        print(f"{r['method']:<12} {r['time_seconds']:>7.1f}s "
              f"{ppl_str:>10} "
              f"{f'{coh:.0%}' if coh is not None else 'N/A':>8} "
              f"{f'{ref:.0%}' if ref is not None else 'N/A':>8} "
              f"{gpu if gpu is not None else 'N/A':>8}")
    print(f"{'='*60}")

# Final JSON dump
print(f"\n\n{'='*60}")
print("ALL BENCHMARK RESULTS (JSON)")
print(f"{'='*60}")
print("```json")
print(json.dumps(all_results, indent=2, default=str))
print("```")
PYEOF

# ── SSH options ──────────────────────────────────────────────────────────────
SSH_OPTS=(
  -i "$SSH_KEY"
  -o StrictHostKeyChecking=no
  -o UserKnownHostsFile=/dev/null
  -o ConnectTimeout=30
  -o ServerAliveInterval=60
  -o ServerAliveCountMax=10
)

if $VERBOSE; then
  SSH_OPTS+=( -v )
fi

# ── Pre-flight: verify SSH connectivity ─────────────────────────────────────
echo "Checking SSH connectivity..."
if ! ssh "${SSH_OPTS[@]}" "$SSH_HOST" "echo 'SSH_OK'" 2>/tmp/obliteratus_ssh_debug.log; then
  echo ""
  echo "ERROR: SSH connection failed!"
  echo ""
  echo "Debug output:"
  cat /tmp/obliteratus_ssh_debug.log
  echo ""
  echo "Troubleshooting checklist:"
  echo "  1. Is Dev Mode enabled on your HF Space?"
  echo "     β†’ Check your Space's Settings tab (Dev Mode must be ON)"
  echo "  2. Is the Space awake (not sleeping/building)?"
  echo "     β†’ Visit the Space URL and wait for the UI to load"
  echo "  3. Is your SSH public key added to your HF profile?"
  echo "     β†’ https://huggingface.co/settings/keys"
  echo "     β†’ Run: cat ${SSH_KEY}.pub"
  echo "  4. Are key permissions correct?"
  echo "     β†’ Run: chmod 600 $SSH_KEY"
  echo "  5. Try manually:"
  echo "     β†’ ssh -v -i $SSH_KEY $SSH_HOST echo hello"
  echo ""
  rm -f /tmp/obliteratus_ssh_debug.log
  exit 1
fi
rm -f /tmp/obliteratus_ssh_debug.log
echo "SSH connection verified βœ“"
echo ""

# ── Build SSH command ────────────────────────────────────────────────────────
# Write the Python script to a temp file and pipe it, instead of passing
# via -c (avoids command-line length limits and shell escaping issues).
REMOTE_SCRIPT_FILE=$(mktemp /tmp/obliteratus_bench_XXXXXX.py)
echo "$REMOTE_SCRIPT" > "$REMOTE_SCRIPT_FILE"
trap "rm -f '$REMOTE_SCRIPT_FILE'" EXIT

if $DRY_RUN; then
  echo "[DRY RUN] Would execute:"
  echo "  cat script.py | ssh ${SSH_OPTS[*]} $SSH_HOST 'BENCH_MODELS=... python3 -u'"
  echo ""
  echo "Script saved to: $REMOTE_SCRIPT_FILE"
  exit 0
fi

echo "Running benchmark on Space..."
echo ""

# Sanitize inputs: reject values containing shell metacharacters to prevent
# command injection on the remote host.
for _var_name in MODELS METHODS PROMPTS; do
  _val="${!_var_name}"
  if [[ "$_val" =~ [\'\"\;\&\|\`\$\(\)\{\}\<\>\\] ]]; then
    echo "ERROR: ${_var_name} contains unsafe characters: $_val" >&2
    exit 1
  fi
done

cat "$REMOTE_SCRIPT_FILE" | ssh "${SSH_OPTS[@]}" "$SSH_HOST" \
  "BENCH_MODELS='$MODELS' BENCH_METHODS='$METHODS' BENCH_PROMPTS='$PROMPTS' python3 -u -"