README / ghidra-re-loop /exportDecomp.py
Nekochu's picture
Add skill ghidra-re-loop
ae4930d verified
Raw
History Blame Contribute Delete
5.78 kB
# Ghidra headless post-script: export decompiled C + (optionally) locate patch sites.
# Runs inside Ghidra's Jython under analyzeHeadless. No machine-specific paths baked in.
#
# Usage (via analyzeHeadless):
# -postScript exportDecomp.py <outdir> [<comma-separated file offsets, hex or dec>]
# Offsets may also be supplied via the GHIDRA_PATCH_OFFSETS env var (same format).
# Examples of offsets: "0x024F74,0x0E740E" or "155508,947726"
import os
from ghidra.app.decompiler import DecompInterface
from ghidra.util.task import ConsoleTaskMonitor
args = getScriptArgs()
outdir = args[0] if args and len(args) > 0 else os.path.join(os.getcwd(), "ghidra_out")
try:
os.makedirs(outdir)
except:
pass
def _parse_offsets(s):
out = []
if not s:
return out
for tok in s.replace(";", ",").split(","):
tok = tok.strip()
if not tok:
continue
try:
out.append(int(tok, 16) if tok.lower().startswith("0x") else int(tok, 0))
except:
pass
return out
# Patch-site file offsets: CLI arg 2 wins, else env var, else none.
offs_src = args[1] if args and len(args) > 1 else os.environ.get("GHIDRA_PATCH_OFFSETS", "")
TARGET_OFFSETS = _parse_offsets(offs_src)
prog = currentProgram
fm = prog.getFunctionManager()
mem = prog.getMemory()
imageBase = prog.getImageBase()
dec = DecompInterface()
dec.openProgram(prog)
monitor = ConsoleTaskMonitor()
# --- file offset -> virtual address (robust across Ghidra versions) ---
def off_to_addr(off):
for b in mem.getBlocks():
try:
sis = b.getSourceInfos()
except:
sis = []
for si in sis:
fb = -1
try:
fb = si.getFileBytesOffset()
except:
try:
fb = si.getFileBytesOffsetFromVA() # fallback name on some builds
except:
fb = -1
try:
ln = si.getLength()
except:
ln = 0
if fb is not None and fb != -1 and fb <= off < fb + ln:
return b.getStart().add(off - fb)
return None
# --- virtual address -> file offset (for authoring a patch from a decompiler VA) ---
def addr_to_off(addr):
try:
info = mem.getAddressSourceInfo(addr)
if info is not None:
fo = info.getFileOffset()
if fo is not None and fo != -1:
return fo
except:
pass
return None
patch_report = []
target_funcs = set()
for off in TARGET_OFFSETS:
addr = off_to_addr(off)
if addr is None:
patch_report.append("file 0x%06X -> (no VA mapping)" % off)
continue
f = fm.getFunctionContaining(addr)
fname = f.getName() if f else "(no function)"
fentry = f.getEntryPoint().toString() if f else "-"
patch_report.append("file 0x%06X -> VA %s in func %s @ %s" % (off, addr, fname, fentry))
if f:
target_funcs.add(f)
if TARGET_OFFSETS:
with open(os.path.join(outdir, "patch_sites.txt"), "w") as fh:
fh.write("ImageBase: %s\n" % imageBase)
fh.write("\n".join(patch_report))
fh.write("\n")
# --- decompile one function to C ---
def decompile(f):
try:
res = dec.decompileFunction(f, 60, monitor)
if res and res.decompileCompleted():
return res.getDecompiledFunction().getC()
except Exception as e:
return "// decompile error: %s\n" % str(e)
return "// decompile failed\n"
# --- dump the target patch-site functions first (most important), with VA+file offset banner ---
if target_funcs:
with open(os.path.join(outdir, "patch_functions.c"), "w") as fh:
for f in target_funcs:
entry = f.getEntryPoint()
fo = addr_to_off(entry)
fo_s = ("0x%X" % fo) if fo is not None else "?"
fh.write("\n/* ===== FUNCTION %s @ VA %s (file %s) ===== */\n" % (f.getName(), entry, fo_s))
fh.write(decompile(f))
fh.write("\n")
# --- caller/callee counts (used to RANK targets: heavily-called helpers are
# often the gate; mirrors auto-re-agent's function picker ranking). ---
def caller_count(f):
try:
return len(f.getCallingFunctions(monitor))
except:
return 0
def callee_count(f):
try:
return len(f.getCalledFunctions(monitor))
except:
return 0
# --- dump ALL functions + an index. functions.txt columns:
# entry-VA <tab> name <tab> callers=<n> <tab> callees=<n> [<tab> THUNK]
# Also emit by_callers.txt: non-thunk functions sorted by caller count desc. ---
funcs = list(fm.getFunctions(True))
idx = open(os.path.join(outdir, "functions.txt"), "w")
allc = open(os.path.join(outdir, "all_decomp.c"), "w")
ranked = []
count = 0
for f in funcs:
ep = f.getEntryPoint()
nc = caller_count(f)
if f.isThunk():
idx.write("%s\t%s\tcallers=%d\tcallees=0\tTHUNK\n" % (ep, f.getName(), nc))
continue
cc = callee_count(f)
idx.write("%s\t%s\tcallers=%d\tcallees=%d\n" % (ep, f.getName(), nc, cc))
ranked.append((nc, cc, ep.toString(), f.getName()))
allc.write("\n/* ===== %s @ %s (callers=%d callees=%d) ===== */\n"
% (f.getName(), ep, nc, cc))
allc.write(decompile(f))
count += 1
idx.close()
allc.close()
ranked.sort(reverse=True) # by caller count desc, then callee count
with open(os.path.join(outdir, "by_callers.txt"), "w") as rh:
rh.write("# callers\tcallees\tVA\tname (rank targets from the top)\n")
for nc, cc, va, nm in ranked:
rh.write("%d\t%d\t%s\t%s\n" % (nc, cc, va, nm))
print("EXPORT DONE: %d functions decompiled -> %s" % (count, outdir))
print(" ranked candidates -> by_callers.txt")
for line in patch_report:
print(" " + line)