ejschwartz's picture
add log
e8ab2e3
import gradio as gr
import logging
import os
import shlex
import tempfile
import subprocess
import pyghidra
logging.basicConfig(level=logging.INFO)
pyghidra.start()
GHIDRA_PROJECT_DIR = f"{os.getenv('HOME')}/ghidra_project"
os.makedirs(GHIDRA_PROJECT_DIR, exist_ok=True)
def get_functions(file):
with pyghidra.open_program(file) as flat_api:
program = flat_api.getCurrentProgram()
function_addrs = [(f.getName(), f.getEntryPoint().getOffset()) for f in program.getFunctionManager().getFunctions(True)]
return function_addrs
with gr.Blocks() as demo:
state = gr.State()
intro = gr.Markdown(
"""
This is a space to experiment with GhidraFunctionCPPExporter, a Ghidra scripts that outputs *rich* C decompilations of functions in a binary. It notably includes declarations for variables, functions, and data types. To get started, upload an executable file below.
"""
)
file_widget = gr.File(label="Executable file")
with gr.Column(visible=False) as col:
# output = gr.Textbox("Output")
gr.Markdown(
"""
Great, you selected an executable! Now pick the function you would like
to analyze.
"""
)
fun_dropdown = gr.Dropdown(
label="Select a function", choices=["Woohoo!"], interactive=True
)
gr.Markdown(
"""
Below you can find some information.
"""
)
extra_args = gr.Textbox(label="Extra args to export.bash", placeholder="emit_type_definitions false", value="")
with gr.Row(visible=True) as result:
disassembly = gr.Code(
label="Disassembly", lines=20,
max_lines=20,
)
original_decompile = gr.Code(
language="c",
label="Decompilation", lines=20,
max_lines=20,
)
example_widget = gr.Examples(
examples=[f.path for f in os.scandir(os.path.join(os.path.dirname(__file__), "examples"))],
inputs=file_widget,
outputs=[state, disassembly, original_decompile],
)
@file_widget.change(inputs=file_widget, outputs=[intro, state, col, fun_dropdown])
def file_change_fn(file):
if file is None:
return {col: gr.update(visible=False), state: {"file": None}}
else:
try:
progress = gr.Progress()
progress(
0,
desc=f"Analyzing binary {os.path.basename(file.name)} with Ghidra...",
)
fun_data = get_functions(file.name)
print(fun_data)
addrs = [
(f"{name} ({hex(int(addr))})", int(addr))
for name, addr in fun_data
]
print(addrs)
except Exception as e:
raise gr.Error(f"Unable to analyze binary with Ghidra: {e}")
return {
col: gr.update(visible=True),
fun_dropdown: gr.Dropdown(choices=addrs, value=addrs[0][1] if addrs else None),
state: {"file": file,
"addrs": addrs},
}
@fun_dropdown.change(inputs=[fun_dropdown, state, extra_args], outputs=[disassembly, original_decompile])
@extra_args.submit(inputs=[fun_dropdown, state, extra_args], outputs=[disassembly, original_decompile])
def function_change_fn(selected_fun, state, extra_args, progress=gr.Progress()):
print("function_change_fn called with", selected_fun, state, extra_args)
with tempfile.TemporaryDirectory() as TEMP_DIR:
progress(0, desc=f"Running GhidraFunctionCPPExporter on {hex(selected_fun)}...")
# Yeah, this is kind of dumb, but oh well.
with pyghidra.open_program(state['file']) as flat_api:
program = flat_api.getCurrentProgram()
fm = program.getFunctionManager()
func = fm.getFunctionAt(program.getAddressFactory().getAddress(hex(selected_fun)))
listing = program.getListing()
disassembly_str = "\n".join(
[
f"0x{i.getAddress().getOffset():x}: {i.toString()}"
for i in listing.getInstructions(func.getBody(), True)
]
)
o = subprocess.run(["/code/GhidraFunctionCPPExporter/export.bash", state['file'], "base_name", "file", "address_set_str", hex(selected_fun), "output_dir", TEMP_DIR] + shlex.split(extra_args), shell=False, capture_output=True, encoding="utf8")
print(o.stdout)
print(o.stderr)
if (o.returncode != 0):
raise Exception(f"Ghidra export failed with return code {o.returncode}: {o.stderr}")
with open(os.path.join(TEMP_DIR, "file.c"), "r") as f:
decompile_str = f.read()
return {
disassembly: gr.Textbox(value=disassembly_str),
original_decompile: gr.Textbox(value=decompile_str),
}
demo.queue()
demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True, debug=True)