import gradio as gr import logging import os import shlex import tempfile import subprocess import pyghidra logging.basicConfig(level=logging.INFO) pyghidra.start() GHIDRA_PROJECT_DIR = f"{os.getenv('HOME')}/ghidra_project" os.makedirs(GHIDRA_PROJECT_DIR, exist_ok=True) def get_functions(file): with pyghidra.open_program(file) as flat_api: program = flat_api.getCurrentProgram() function_addrs = [(f.getName(), f.getEntryPoint().getOffset()) for f in program.getFunctionManager().getFunctions(True)] return function_addrs with gr.Blocks() as demo: state = gr.State() intro = gr.Markdown( """ This is a space to experiment with GhidraFunctionCPPExporter, a Ghidra scripts that outputs *rich* C decompilations of functions in a binary. It notably includes declarations for variables, functions, and data types. To get started, upload an executable file below. """ ) file_widget = gr.File(label="Executable file") with gr.Column(visible=False) as col: # output = gr.Textbox("Output") gr.Markdown( """ Great, you selected an executable! Now pick the function you would like to analyze. """ ) fun_dropdown = gr.Dropdown( label="Select a function", choices=["Woohoo!"], interactive=True ) gr.Markdown( """ Below you can find some information. """ ) extra_args = gr.Textbox(label="Extra args to export.bash", placeholder="emit_type_definitions false", value="") with gr.Row(visible=True) as result: disassembly = gr.Code( label="Disassembly", lines=20, max_lines=20, ) original_decompile = gr.Code( language="c", label="Decompilation", lines=20, max_lines=20, ) example_widget = gr.Examples( examples=[f.path for f in os.scandir(os.path.join(os.path.dirname(__file__), "examples"))], inputs=file_widget, outputs=[state, disassembly, original_decompile], ) @file_widget.change(inputs=file_widget, outputs=[intro, state, col, fun_dropdown]) def file_change_fn(file): if file is None: return {col: gr.update(visible=False), state: {"file": None}} else: try: progress = gr.Progress() progress( 0, desc=f"Analyzing binary {os.path.basename(file.name)} with Ghidra...", ) fun_data = get_functions(file.name) print(fun_data) addrs = [ (f"{name} ({hex(int(addr))})", int(addr)) for name, addr in fun_data ] print(addrs) except Exception as e: raise gr.Error(f"Unable to analyze binary with Ghidra: {e}") return { col: gr.update(visible=True), fun_dropdown: gr.Dropdown(choices=addrs, value=addrs[0][1] if addrs else None), state: {"file": file, "addrs": addrs}, } @fun_dropdown.change(inputs=[fun_dropdown, state, extra_args], outputs=[disassembly, original_decompile]) @extra_args.submit(inputs=[fun_dropdown, state, extra_args], outputs=[disassembly, original_decompile]) def function_change_fn(selected_fun, state, extra_args, progress=gr.Progress()): print("function_change_fn called with", selected_fun, state, extra_args) with tempfile.TemporaryDirectory() as TEMP_DIR: progress(0, desc=f"Running GhidraFunctionCPPExporter on {hex(selected_fun)}...") # Yeah, this is kind of dumb, but oh well. with pyghidra.open_program(state['file']) as flat_api: program = flat_api.getCurrentProgram() fm = program.getFunctionManager() func = fm.getFunctionAt(program.getAddressFactory().getAddress(hex(selected_fun))) listing = program.getListing() disassembly_str = "\n".join( [ f"0x{i.getAddress().getOffset():x}: {i.toString()}" for i in listing.getInstructions(func.getBody(), True) ] ) o = subprocess.run(["/code/GhidraFunctionCPPExporter/export.bash", state['file'], "base_name", "file", "address_set_str", hex(selected_fun), "output_dir", TEMP_DIR] + shlex.split(extra_args), shell=False, capture_output=True, encoding="utf8") print(o.stdout) print(o.stderr) if (o.returncode != 0): raise Exception(f"Ghidra export failed with return code {o.returncode}: {o.stderr}") with open(os.path.join(TEMP_DIR, "file.c"), "r") as f: decompile_str = f.read() return { disassembly: gr.Textbox(value=disassembly_str), original_decompile: gr.Textbox(value=decompile_str), } demo.queue() demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True, debug=True)