ejschwartz commited on
Commit
a3de3ad
·
1 Parent(s): 054f33b

Try to use pyghidra to get function addresses

Browse files
Files changed (1) hide show
  1. main.py +26 -22
main.py CHANGED
@@ -5,32 +5,39 @@ import logging
5
  import os
6
  import tempfile
7
  import subprocess
 
 
8
 
9
  logging.basicConfig(level=logging.INFO)
10
  logging.info(f"CUDA available: {torch.cuda.is_available()}, CUDA version: {torch.version.cuda}")
11
 
 
 
 
12
  GHIDRA_PROJECT_DIR = f"{os.getenv('HOME')}/ghidra_project"
13
 
14
  os.makedirs(GHIDRA_PROJECT_DIR, exist_ok=True)
15
 
16
  def get_functions(file):
17
 
18
- with tempfile.TemporaryDirectory() as TEMP_DIR:
 
 
19
 
20
- o = subprocess.run(["/code/GhidraFunctionCPPExporter/export.bash", file, "output_dir", TEMP_DIR], shell=False, capture_output=True, encoding="utf8")
21
 
22
- print(o.stdout)
23
- print(o.stderr)
24
 
25
- if (o.returncode != 0):
26
- raise Exception(f"Ghidra export failed with return code {o.returncode}: {o.stderr}")
27
 
28
- # Show files in TEMP_DIR
29
- print("Files in TEMP_DIR:")
30
- for f in os.listdir(TEMP_DIR):
31
- print(f)
32
 
33
- return o.stdout
34
 
35
 
36
  # def decomp_create_prompt(input_data: str) -> str:
@@ -112,8 +119,6 @@ This is a space to experiment with our quantized 22B neural model for decompilat
112
  @file_widget.change(inputs=file_widget, outputs=[intro, state, col, fun_dropdown])
113
  def file_change_fn(file):
114
 
115
- print("file change fn called with file:", file)
116
-
117
  if file is None:
118
  return {col: gr.update(visible=False), state: {"file": None}}
119
  else:
@@ -125,16 +130,15 @@ This is a space to experiment with our quantized 22B neural model for decompilat
125
  desc=f"Analyzing binary {os.path.basename(file.name)} with Ghidra...",
126
  )
127
  fun_data = get_functions(file.name)
128
- # print(fun_data)
 
 
 
 
 
129
 
130
- #addrs = [
131
- # (f"{name} ({hex(int(addr))}; {numvars} vars)", int(addr))
132
- # for addr, (name, cf, numvars) in fun_data.items()
133
- #]
134
 
135
- #cfs = {name: cf for (name, cf, _numvars) in fun_data.values()}
136
- addrs = []
137
- cfs = {}
138
  except Exception as e:
139
  print("error...", e)
140
  raise gr.Error(f"Unable to analyze binary with Ghidra: {e}")
@@ -143,7 +147,7 @@ This is a space to experiment with our quantized 22B neural model for decompilat
143
  col: gr.update(visible=True),
144
  fun_dropdown: gr.Dropdown(choices=addrs, value=addrs[0][1] if addrs else None),
145
  state: {"file": file,
146
- "cfs": cfs},
147
  }
148
 
149
  #file_widget.change(
 
5
  import os
6
  import tempfile
7
  import subprocess
8
+ import pyghidra
9
+
10
 
11
  logging.basicConfig(level=logging.INFO)
12
  logging.info(f"CUDA available: {torch.cuda.is_available()}, CUDA version: {torch.version.cuda}")
13
 
14
+ print("Starting pyghidra")
15
+ pyghidra.start()
16
+
17
  GHIDRA_PROJECT_DIR = f"{os.getenv('HOME')}/ghidra_project"
18
 
19
  os.makedirs(GHIDRA_PROJECT_DIR, exist_ok=True)
20
 
21
  def get_functions(file):
22
 
23
+ with pyghidra.open_program(file) as flat_api:
24
+ program = flat_api.getCurrentProgram()
25
+ function_addrs = [(f.getName(), f.getAddress().toOffset()) for f in program.getFunctionManager().getFunctions(True)]
26
 
27
+ # o = subprocess.run(["/code/GhidraFunctionCPPExporter/export.bash", file, "output_dir", TEMP_DIR], shell=False, capture_output=True, encoding="utf8")
28
 
29
+ # print(o.stdout)
30
+ # print(o.stderr)
31
 
32
+ # if (o.returncode != 0):
33
+ # raise Exception(f"Ghidra export failed with return code {o.returncode}: {o.stderr}")
34
 
35
+ # # Show files in TEMP_DIR
36
+ # print("Files in TEMP_DIR:")
37
+ # for f in os.listdir(TEMP_DIR):
38
+ # print(f)
39
 
40
+ return function_addrs
41
 
42
 
43
  # def decomp_create_prompt(input_data: str) -> str:
 
119
  @file_widget.change(inputs=file_widget, outputs=[intro, state, col, fun_dropdown])
120
  def file_change_fn(file):
121
 
 
 
122
  if file is None:
123
  return {col: gr.update(visible=False), state: {"file": None}}
124
  else:
 
130
  desc=f"Analyzing binary {os.path.basename(file.name)} with Ghidra...",
131
  )
132
  fun_data = get_functions(file.name)
133
+ print(fun_data)
134
+
135
+ addrs = [
136
+ (f"{name} ({hex(int(addr))}", int(addr))
137
+ for name, addr in fun_data
138
+ ]
139
 
140
+ print(addrs)
 
 
 
141
 
 
 
 
142
  except Exception as e:
143
  print("error...", e)
144
  raise gr.Error(f"Unable to analyze binary with Ghidra: {e}")
 
147
  col: gr.update(visible=True),
148
  fun_dropdown: gr.Dropdown(choices=addrs, value=addrs[0][1] if addrs else None),
149
  state: {"file": file,
150
+ "addrs": addrs},
151
  }
152
 
153
  #file_widget.change(