ho22joshua commited on
Commit
ebeeeff
·
2 Parent(s): 407a49f 255148e

merging branch joshua with main

Browse files
root_gnn_dgl/configs/stats_all/ttH_CP_even_vs_odd_batch_size_2048.yaml ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Training_Name: ttH_CP_even_vs_odd_batch_size_2048
2
+ Training_Directory: trainings/stats_all/ttH_CP_even_vs_odd_batch_size_2048
3
+ Model:
4
+ module: models.GCN
5
+ class: Edge_Network
6
+ args:
7
+ hid_size: 64
8
+ in_size: 7
9
+ out_size: 1
10
+ n_layers: 4
11
+ n_proc_steps: 4
12
+ dropout: 0
13
+ Training:
14
+ epochs: 500
15
+ batch_size: 2048
16
+ learning_rate: 0.0001
17
+ gamma: 0.99
18
+ Datasets:
19
+ ttH_CP_even: &dataset_defn
20
+ module: root_gnn_base.dataset
21
+ class: LazyDataset
22
+ shuffle_chunks: 10
23
+ batch_size: 2048
24
+ padding_mode: NONE #one of STEPS, FIXED, or NONE
25
+ args: &dataset_args
26
+ name: ttH_CP_even
27
+ label: 0
28
+ # weight_var: weight
29
+ chunks: 10
30
+ buffer_size: 3
31
+ file_names: ttH_NLO.root
32
+ tree_name: output
33
+ fold_var: Number
34
+ raw_dir: /global/cfs/projectdirs/trn007/lbl_atlas/data/stats_all/
35
+ save_dir: /global/cfs/projectdirs/trn007/lbl_atlas/data/processed_graphs/stats_all/ttH_CP_even_vs_odd_batch_size_2048/
36
+ node_branch_names:
37
+ - [jet_pt, ele_pt, mu_pt, ph_pt, MET_met]
38
+ - [jet_eta, ele_eta, mu_eta, ph_eta, 0]
39
+ - [jet_phi, ele_phi, mu_phi, ph_phi, MET_phi]
40
+ - CALC_E
41
+ - [jet_btag, 0, 0, 0, 0]
42
+ - [0, ele_charge, mu_charge, 0, 0]
43
+ - NODE_TYPE
44
+ node_branch_types: [vector, vector, vector, vector, single]
45
+ node_feature_scales: [1e-1, 1, 1, 1e-1, 1, 1, 1]
46
+ folding:
47
+ n_folds: 4
48
+ test: [0]
49
+ # validation: 1
50
+ train: [1, 2, 3]
51
+ ttH_CP_odd:
52
+ <<: *dataset_defn
53
+ args:
54
+ <<: *dataset_args
55
+ name: ttH_CP_odd
56
+ label: 1
57
+ file_names: ttH_CPodd.root
root_gnn_dgl/configs/stats_all/ttH_CP_even_vs_odd_batch_size_4096.yaml ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Training_Name: ttH_CP_even_vs_odd_batch_size_4096
2
+ Training_Directory: trainings/stats_all/ttH_CP_even_vs_odd_batch_size_4096
3
+ Model:
4
+ module: models.GCN
5
+ class: Edge_Network
6
+ args:
7
+ hid_size: 64
8
+ in_size: 7
9
+ out_size: 1
10
+ n_layers: 4
11
+ n_proc_steps: 4
12
+ dropout: 0
13
+ Training:
14
+ epochs: 500
15
+ batch_size: 4096
16
+ learning_rate: 0.0001
17
+ gamma: 0.99
18
+ Datasets:
19
+ ttH_CP_even: &dataset_defn
20
+ module: root_gnn_base.dataset
21
+ class: LazyDataset
22
+ shuffle_chunks: 10
23
+ batch_size: 4096
24
+ padding_mode: NONE #one of STEPS, FIXED, or NONE
25
+ args: &dataset_args
26
+ name: ttH_CP_even
27
+ label: 0
28
+ # weight_var: weight
29
+ chunks: 10
30
+ buffer_size: 3
31
+ file_names: ttH_NLO.root
32
+ tree_name: output
33
+ fold_var: Number
34
+ raw_dir: /global/cfs/projectdirs/trn007/lbl_atlas/data/stats_all/
35
+ save_dir: /global/cfs/projectdirs/trn007/lbl_atlas/data/processed_graphs/stats_all/ttH_CP_even_vs_odd_batch_size_4096/
36
+ node_branch_names:
37
+ - [jet_pt, ele_pt, mu_pt, ph_pt, MET_met]
38
+ - [jet_eta, ele_eta, mu_eta, ph_eta, 0]
39
+ - [jet_phi, ele_phi, mu_phi, ph_phi, MET_phi]
40
+ - CALC_E
41
+ - [jet_btag, 0, 0, 0, 0]
42
+ - [0, ele_charge, mu_charge, 0, 0]
43
+ - NODE_TYPE
44
+ node_branch_types: [vector, vector, vector, vector, single]
45
+ node_feature_scales: [1e-1, 1, 1, 1e-1, 1, 1, 1]
46
+ folding:
47
+ n_folds: 4
48
+ test: [0]
49
+ # validation: 1
50
+ train: [1, 2, 3]
51
+ ttH_CP_odd:
52
+ <<: *dataset_defn
53
+ args:
54
+ <<: *dataset_args
55
+ name: ttH_CP_odd
56
+ label: 1
57
+ file_names: ttH_CPodd.root
root_gnn_dgl/configs/stats_all/ttH_CP_even_vs_odd_batch_size_8192.yaml ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Training_Name: ttH_CP_even_vs_odd_batch_size_8192
2
+ Training_Directory: trainings/stats_all/ttH_CP_even_vs_odd_batch_size_8192
3
+ Model:
4
+ module: models.GCN
5
+ class: Edge_Network
6
+ args:
7
+ hid_size: 64
8
+ in_size: 7
9
+ out_size: 1
10
+ n_layers: 4
11
+ n_proc_steps: 4
12
+ dropout: 0
13
+ Training:
14
+ epochs: 500
15
+ batch_size: 8192
16
+ learning_rate: 0.0001
17
+ gamma: 0.99
18
+ Datasets:
19
+ ttH_CP_even: &dataset_defn
20
+ module: root_gnn_base.dataset
21
+ class: LazyDataset
22
+ shuffle_chunks: 10
23
+ batch_size: 8192
24
+ padding_mode: NONE #one of STEPS, FIXED, or NONE
25
+ args: &dataset_args
26
+ name: ttH_CP_even
27
+ label: 0
28
+ # weight_var: weight
29
+ chunks: 10
30
+ buffer_size: 3
31
+ file_names: ttH_NLO.root
32
+ tree_name: output
33
+ fold_var: Number
34
+ raw_dir: /global/cfs/projectdirs/trn007/lbl_atlas/data/stats_all/
35
+ save_dir: /global/cfs/projectdirs/trn007/lbl_atlas/data/processed_graphs/stats_all/ttH_CP_even_vs_odd_batch_size_8192/
36
+ node_branch_names:
37
+ - [jet_pt, ele_pt, mu_pt, ph_pt, MET_met]
38
+ - [jet_eta, ele_eta, mu_eta, ph_eta, 0]
39
+ - [jet_phi, ele_phi, mu_phi, ph_phi, MET_phi]
40
+ - CALC_E
41
+ - [jet_btag, 0, 0, 0, 0]
42
+ - [0, ele_charge, mu_charge, 0, 0]
43
+ - NODE_TYPE
44
+ node_branch_types: [vector, vector, vector, vector, single]
45
+ node_feature_scales: [1e-1, 1, 1, 1e-1, 1, 1, 1]
46
+ folding:
47
+ n_folds: 4
48
+ test: [0]
49
+ # validation: 1
50
+ train: [1, 2, 3]
51
+ ttH_CP_odd:
52
+ <<: *dataset_defn
53
+ args:
54
+ <<: *dataset_args
55
+ name: ttH_CP_odd
56
+ label: 1
57
+ file_names: ttH_CPodd.root
root_gnn_dgl/jobs/prep_data/run_processing.py CHANGED
@@ -77,9 +77,12 @@ def main():
77
  configs = [
78
  # "configs/stats_100K/pretraining_multiclass.yaml",
79
  # "configs/stats_100K/ttH_CP_even_vs_odd.yaml",
80
- "configs/stats_all/pretraining_multiclass.yaml",
81
- "configs/stats_all/ttH_CP_even_vs_odd.yaml",
82
  # "configs/attention/ttH_CP_even_vs_odd.yaml",
 
 
 
83
  ]
84
 
85
  # Path to the bash script to be called
 
77
  configs = [
78
  # "configs/stats_100K/pretraining_multiclass.yaml",
79
  # "configs/stats_100K/ttH_CP_even_vs_odd.yaml",
80
+ # "configs/stats_all/pretraining_multiclass.yaml",
81
+ # "configs/stats_all/ttH_CP_even_vs_odd.yaml",
82
  # "configs/attention/ttH_CP_even_vs_odd.yaml",
83
+ "configs/stats_all/ttH_CP_even_vs_odd_batch_size_2048.yaml",
84
+ "configs/stats_all/ttH_CP_even_vs_odd_batch_size_4096.yaml",
85
+ "configs/stats_all/ttH_CP_even_vs_odd_batch_size_8192.yaml",
86
  ]
87
 
88
  # Path to the bash script to be called
root_gnn_dgl/jobs/training/singlegpu/run_job.sh CHANGED
@@ -5,11 +5,11 @@
5
  #SBATCH --mail-user=ho22joshua@berkeley.edu
6
  #SBATCH --mail-type=ALL
7
  #SBATCH -t 15:00:00
8
- #SBATCH -A atlas
9
- #SBATCH -o /global/cfs/projectdirs/atlas/joshua/root_gnn/root_gnn_dgl/jobs/slurm/%j.out # STDOUT
10
 
11
  ARGUEMENTS="$*"
12
 
13
  echo "Arguements: $ARGUEMENTS"
14
  echo "launching image"
15
- source launch_image.sh "--entrypoint /global/cfs/projectdirs/atlas/joshua/root_gnn/root_gnn_dgl/jobs/run_job_image.sh" $ARGUEMENTS
 
5
  #SBATCH --mail-user=ho22joshua@berkeley.edu
6
  #SBATCH --mail-type=ALL
7
  #SBATCH -t 15:00:00
8
+ #SBATCH -A trn007
9
+ #SBATCH -o /global/cfs/projectdirs/atlas/joshua/GNN4Colliders/root_gnn_dgl/jobs/slurm/%j.out # STDOUT
10
 
11
  ARGUEMENTS="$*"
12
 
13
  echo "Arguements: $ARGUEMENTS"
14
  echo "launching image"
15
+ source /global/homes/j/joshuaho/launch_image.sh "--entrypoint /global/cfs/projectdirs/atlas/joshua/GNN4Colliders/root_gnn_dgl/jobs/training/singlegpu/run_job_image.sh" $ARGUEMENTS
root_gnn_dgl/jobs/training/singlegpu/run_job_image.sh CHANGED
@@ -4,17 +4,8 @@ CONFIG=$1
4
  shift
5
  ARGUEMENTS="$*"
6
 
7
- DIRECTORY="/global/cfs/projectdirs/atlas/joshua/root_gnn/root_gnn_dgl/configs/model_configs/"
8
- BASE_COMMAND="/global/cfs/projectdirs/atlas/joshua/root_gnn/root_gnn_dgl/scripts/training_script.py $ARGUEMENTS --preshuffle --nocompile --lazy --config $DIRECTORY"
9
-
10
- echo "launched image"
11
- cd /global/cfs/projectdirs/atlas/joshua/root_gnn/root_gnn_dgl/
12
-
13
- COMMAND="$BASE_COMMAND$CONFIG"
14
-
15
- eval "$(conda shell.bash hook)"
16
- conda init bash
17
- conda activate /opt/conda/envs/dgl
18
 
19
  echo "Running my script now"
20
  echo $COMMAND
 
4
  shift
5
  ARGUEMENTS="$*"
6
 
7
+ DIRECTORY="/global/cfs/projectdirs/atlas/joshua/GNN4Colliders/root_gnn_dgl/"
8
+ COMMAND="$DIRECTORY"scripts/training_script.py $ARGUEMENTS --preshuffle --nocompile --lazy --config $DIRECTORY$CONFIG
 
 
 
 
 
 
 
 
 
9
 
10
  echo "Running my script now"
11
  echo $COMMAND
root_gnn_dgl/jobs/training/singlegpu/submit.sh CHANGED
@@ -3,7 +3,10 @@ date
3
  DIRECTORY="/global/cfs/projectdirs/atlas/joshua/root_gnn/root_gnn_dgl/configs/model_configs/"
4
 
5
  configs=(
6
- "run_3_ttH/v05/sb_yukawa_cp_abs_weights.yaml --abs"
 
 
 
7
  )
8
 
9
  counter=0
 
3
  DIRECTORY="/global/cfs/projectdirs/atlas/joshua/root_gnn/root_gnn_dgl/configs/model_configs/"
4
 
5
  configs=(
6
+ "configs/stats_all/ttH_CP_even_vs_odd.yaml"
7
+ "configs/stats_all/ttH_CP_even_vs_odd_batch_size_2048.yaml"
8
+ "configs/stats_all/ttH_CP_even_vs_odd_batch_size_4096.yaml"
9
+ "configs/stats_all/ttH_CP_even_vs_odd_batch_size_8192.yaml"
10
  )
11
 
12
  counter=0
root_gnn_dgl/profile.sh CHANGED
@@ -1,35 +1,35 @@
1
  nsys profile \
2
- -o /pscratch/sd/j/joshuaho/my_profile_report_1_gpu_batch_size_1028 \
3
  --capture-range=cudaProfilerApi \
4
- --capture-range-end=stop-shutdown \
5
  --force-overwrite true \
6
  --trace=nvtx \
7
  --cudabacktrace=all \
8
- python scripts/training_script.py --config configs/stats_100K/ttH_CP_even_vs_odd.yaml --preshuffle --nocompile --lazy --restart --profile
9
 
10
  nsys profile \
11
- -o /pscratch/sd/j/joshuaho/my_profile_report_1_gpu_batch_size_2048 \
12
  --capture-range=cudaProfilerApi \
13
- --capture-range-end=stop-shutdown \
14
  --force-overwrite true \
15
  --trace=nvtx \
16
  --cudabacktrace=all \
17
- python scripts/training_script.py --config configs/stats_100K/ttH_CP_even_vs_odd_batch_size_2048.yaml --preshuffle --nocompile --lazy --restart --profile
18
 
19
  nsys profile \
20
- -o /pscratch/sd/j/joshuaho/my_profile_report_1_gpu_batch_size_4096 \
21
  --capture-range=cudaProfilerApi \
22
- --capture-range-end=stop-shutdown \
23
- --force-overwrite true \
24
  --trace=nvtx \
25
  --cudabacktrace=all \
26
- python scripts/training_script.py --config configs/stats_100K/ttH_CP_even_vs_odd_batch_size_4096.yaml --preshuffle --nocompile --lazy --restart --profile
27
 
28
  nsys profile \
29
- -o /pscratch/sd/j/joshuaho/my_profile_report_1_gpu_batch_size_8192 \
30
  --capture-range=cudaProfilerApi \
31
- --capture-range-end=stop-shutdown \
32
  --force-overwrite true \
33
  --trace=nvtx \
34
  --cudabacktrace=all \
35
- python scripts/training_script.py --config configs/stats_100K/ttH_CP_even_vs_odd_batch_size_8192.yaml --preshuffle --nocompile --lazy --restart --profile
 
1
  nsys profile \
2
+ -o /pscratch/sd/j/joshuaho/full_stats_profile_1_gpu_batch_size_1028 \
3
  --capture-range=cudaProfilerApi \
4
+ --duration=100 \
5
  --force-overwrite true \
6
  --trace=nvtx \
7
  --cudabacktrace=all \
8
+ python scripts/training_script.py --config configs/stats_all/ttH_CP_even_vs_odd.yaml --preshuffle --nocompile --lazy --restart --profile
9
 
10
  nsys profile \
11
+ -o /pscratch/sd/j/joshuaho/full_stats_profile_1_gpu_batch_size_2048 \
12
  --capture-range=cudaProfilerApi \
13
+ --duration=100 \
14
  --force-overwrite true \
15
  --trace=nvtx \
16
  --cudabacktrace=all \
17
+ python scripts/training_script.py --config configs/stats_all/ttH_CP_even_vs_odd_batch_size_2048.yaml --preshuffle --nocompile --lazy --restart --profile
18
 
19
  nsys profile \
20
+ -o /pscratch/sd/j/joshuaho/full_stats_profile_1_gpu_batch_size_4096 \
21
  --capture-range=cudaProfilerApi \
22
+ --duration=100 \
23
+ --force-overwrite=true \
24
  --trace=nvtx \
25
  --cudabacktrace=all \
26
+ python scripts/training_script.py --config configs/stats_all/ttH_CP_even_vs_odd_batch_size_4096.yaml --preshuffle --nocompile --lazy --restart --profile
27
 
28
  nsys profile \
29
+ -o /pscratch/sd/j/joshuaho/full_stats_profile_1_gpu_batch_size_8192 \
30
  --capture-range=cudaProfilerApi \
31
+ --duration=100 \
32
  --force-overwrite true \
33
  --trace=nvtx \
34
  --cudabacktrace=all \
35
+ python scripts/training_script.py --config configs/stats_all/ttH_CP_even_vs_odd_batch_size_8192.yaml --preshuffle --nocompile --lazy --restart --profile
root_gnn_dgl/root_gnn_base/dataset.py CHANGED
@@ -1,6 +1,7 @@
1
  from dgl.data import DGLDataset
2
  import dgl
3
- import ROOT
 
4
  import torch
5
  import os
6
  import glob
@@ -14,7 +15,7 @@ def node_features_from_tree(ch, node_branch_names, node_branch_types, node_featu
14
  if node_type == 'single':
15
  lengths.append(1)
16
  elif node_type == 'vector':
17
- lengths.append(len(getattr(ch, branch)))
18
  else:
19
  print('Unknown node branch type: {}'.format(node_type))
20
  features = []
@@ -38,16 +39,14 @@ def node_features_from_tree(ch, node_branch_names, node_branch_types, node_featu
38
  this_type_ends_at = sum(lengths[:itype+1])
39
  feat.extend(features[0][this_type_starts_at:this_type_ends_at]*torch.cosh(features[1][this_type_starts_at:this_type_ends_at]))
40
  elif node_type == 'single':
41
- feat.append(getattr(ch, branch))
42
  elif node_type == 'vector':
43
- feat.extend(getattr(ch, branch))
44
  itype += 1
45
  features.append(torch.tensor(feat))
46
  return torch.stack(features, dim=1) * node_feature_scales, lengths
47
 
48
  def full_connected_graph(n_nodes, self_loops=True):
49
- senders = []
50
- receivers = []
51
  senders = np.arange(n_nodes*n_nodes) // n_nodes
52
  receivers = np.arange(n_nodes*n_nodes) % n_nodes
53
  if not self_loops and n_nodes > 1:
@@ -59,19 +58,18 @@ def full_connected_graph(n_nodes, self_loops=True):
59
  def check_selection(ch, selection):
60
  var, cut, op = selection
61
  if op == '>':
62
- return getattr(ch, var) > cut
63
  elif op == '<':
64
- return getattr(ch, var) < cut
65
  elif op == '==':
66
- return getattr(ch, var) == cut
67
-
68
  def check_selections(ch, selections):
69
  for selection in selections:
70
  if not check_selection(ch, selection):
71
  return False
72
  return True
73
 
74
- #Base dataset class for making graphs from ROOT ntuples.
75
  class RootDataset(DGLDataset):
76
  def __init__(self, name=None, raw_dir=None, save_dir=None, label=1, file_names = '*.root', node_branch_names=None, node_branch_types=None, node_feature_scales=None,
77
  selections=[], save=True, tree_name = 'nominal_Loose', fold_var = 'eventNumber', weight_var = None, chunks = 1, process_chunks = None, global_features = [], tracking_info = [], **kwargs):
@@ -88,7 +86,7 @@ class RootDataset(DGLDataset):
88
  self.fold_var = fold_var
89
  self.tracking_info = tracking_info
90
  self.tracking_info.insert(0, fold_var)
91
- if weight_var == None:
92
  weight_var = 1
93
  self.tracking_info.insert(1, weight_var)
94
  self.global_features = global_features
@@ -116,7 +114,7 @@ class RootDataset(DGLDataset):
116
  branches.append(feat)
117
  for selection in self.selections:
118
  branches.append(selection[0])
119
- return branches
120
 
121
  def make_graph(self, ch):
122
  t1 = time.time()
@@ -129,7 +127,7 @@ class RootDataset(DGLDataset):
129
  self.times[0] += t2 - t1
130
  self.times[1] += t3 - t2
131
  return g
132
-
133
  def process(self):
134
  times = [0, 0, 0]
135
  oldtime = time.time()
@@ -139,21 +137,21 @@ class RootDataset(DGLDataset):
139
  self.files = []
140
  for file_name in self.file_names:
141
  self.files.extend(glob.glob(os.path.join(self.raw_dir, file_name)))
142
- self.chain = ROOT.TChain(self.tree_name)
143
 
144
- if len(self.files) == 0:
145
- print('No files found in {}'.format(os.path.join(self.raw_dir, self.file_names)))
146
  for file in self.files:
147
- utils.set_timeout(60*2)
148
- self.chain.Add(file)
149
- utils.unset_timeout()
150
- branches = self.get_list_of_branches()
151
- self.chain.SetBranchStatus('*', 0)
152
- for branch in branches:
153
- self.chain.SetBranchStatus(branch, 1)
154
  newtime = time.time()
155
  times[0] += newtime - oldtime
156
- chunks = np.array_split(np.arange(self.chain.GetEntries()), self.chunks)
157
  chunks = [chunk for i, chunk in enumerate(chunks) if i in self.process_chunks]
158
 
159
  self.graph_chunks = []
@@ -170,28 +168,28 @@ class RootDataset(DGLDataset):
170
  globals = []
171
  for ientry in chunk:
172
  if (ientry % 10000 == 0):
173
- print('Processing event {}/{}'.format(ientry, self.chain.GetEntries()), flush=True)
174
- self.chain.GetEntry(ientry)
175
  passed = True
176
  for selection in self.selections:
177
- if not check_selection(self.chain, selection):
178
  passed = False
179
  continue
180
  oldtime = newtime
181
  newtime = time.time()
182
  times[1] += newtime - oldtime
183
  if passed:
184
- graphs.append(self.make_graph(self.chain))
185
- labels.append( self.label )
186
  tracking.append(torch.zeros(len(self.tracking_info), dtype=torch.double))
187
  globals.append(torch.zeros(len(self.global_features)))
188
  for i_ti, tr_branch in enumerate(self.tracking_info):
189
  if isinstance(tr_branch, str):
190
- tracking[-1][i_ti] = getattr(self.chain, tr_branch)
191
  else:
192
  tracking[-1][i_ti] = tr_branch
193
  for i_gl, gl_branch in enumerate(self.global_features):
194
- globals[-1][i_gl] = getattr(self.chain, gl_branch)
195
  oldtime = newtime
196
  newtime = time.time()
197
  times[2] += newtime - oldtime
@@ -215,17 +213,8 @@ class RootDataset(DGLDataset):
215
  self.graphs = graphs
216
  self.save()
217
  return
218
- self.graphs = self.graph_chunks[0]
219
- for chunk in self.graph_chunks[1:]:
220
- self.graphs += chunk
221
- self.labels = torch.cat(self.label_chunks)
222
- self.tracking = torch.cat(self.tracking_chunks)
223
- self.global_features = torch.cat(self.global_chunks)
224
- print('Time spent: Creating TChain: {}s, Getting Entries and Selection: {}s, Graph Creation: {}s'.format(*times))
225
- print('Time spent in node_features_from_tree: {}s, full_connected_graph: {}s'.format(*self.times))
226
-
227
  def save(self):
228
- """save the graph list and the labels"""
229
  if not self.save_to_disk:
230
  return
231
  graph_path = os.path.join(self.save_dir, self.name + '.bin')
@@ -244,7 +233,7 @@ class RootDataset(DGLDataset):
244
  graph_path = os.path.join(self.save_dir, self.name + '.bin')
245
  print(f'Saving dataset to {os.path.join(self.save_dir, self.name + f"_{self.process_chunks[chunk_id]}.bin")}')
246
  dgl.save_graphs(str(graph_path).replace('.bin', f'_{self.process_chunks[chunk_id]}.bin'), graphs, {'labels': labels, 'tracking': tracking, 'global': globals})
247
-
248
  def has_cache(self):
249
  print(f'Checking for cache of {self.name}')
250
  if not self.save_to_disk:
@@ -293,7 +282,7 @@ class RootDataset(DGLDataset):
293
 
294
  def __len__(self):
295
  return len(self.graphs)
296
-
297
  #Dataset with edge features added (deta, dphi, dR)
298
  class EdgeDataset(RootDataset):
299
  def make_graph(self, ch):
 
1
  from dgl.data import DGLDataset
2
  import dgl
3
+ import uproot
4
+ import awkward as ak
5
  import torch
6
  import os
7
  import glob
 
15
  if node_type == 'single':
16
  lengths.append(1)
17
  elif node_type == 'vector':
18
+ lengths.append(len(ch[branch]))
19
  else:
20
  print('Unknown node branch type: {}'.format(node_type))
21
  features = []
 
39
  this_type_ends_at = sum(lengths[:itype+1])
40
  feat.extend(features[0][this_type_starts_at:this_type_ends_at]*torch.cosh(features[1][this_type_starts_at:this_type_ends_at]))
41
  elif node_type == 'single':
42
+ feat.append(ch[branch])
43
  elif node_type == 'vector':
44
+ feat.extend(ch[branch])
45
  itype += 1
46
  features.append(torch.tensor(feat))
47
  return torch.stack(features, dim=1) * node_feature_scales, lengths
48
 
49
  def full_connected_graph(n_nodes, self_loops=True):
 
 
50
  senders = np.arange(n_nodes*n_nodes) // n_nodes
51
  receivers = np.arange(n_nodes*n_nodes) % n_nodes
52
  if not self_loops and n_nodes > 1:
 
58
  def check_selection(ch, selection):
59
  var, cut, op = selection
60
  if op == '>':
61
+ return ch[var] > cut
62
  elif op == '<':
63
+ return ch[var] < cut
64
  elif op == '==':
65
+ return ch[var] == cut
66
+
67
  def check_selections(ch, selections):
68
  for selection in selections:
69
  if not check_selection(ch, selection):
70
  return False
71
  return True
72
 
 
73
  class RootDataset(DGLDataset):
74
  def __init__(self, name=None, raw_dir=None, save_dir=None, label=1, file_names = '*.root', node_branch_names=None, node_branch_types=None, node_feature_scales=None,
75
  selections=[], save=True, tree_name = 'nominal_Loose', fold_var = 'eventNumber', weight_var = None, chunks = 1, process_chunks = None, global_features = [], tracking_info = [], **kwargs):
 
86
  self.fold_var = fold_var
87
  self.tracking_info = tracking_info
88
  self.tracking_info.insert(0, fold_var)
89
+ if weight_var is None:
90
  weight_var = 1
91
  self.tracking_info.insert(1, weight_var)
92
  self.global_features = global_features
 
114
  branches.append(feat)
115
  for selection in self.selections:
116
  branches.append(selection[0])
117
+ return list(set(branches)) # Remove duplicates
118
 
119
  def make_graph(self, ch):
120
  t1 = time.time()
 
127
  self.times[0] += t2 - t1
128
  self.times[1] += t3 - t2
129
  return g
130
+
131
  def process(self):
132
  times = [0, 0, 0]
133
  oldtime = time.time()
 
137
  self.files = []
138
  for file_name in self.file_names:
139
  self.files.extend(glob.glob(os.path.join(self.raw_dir, file_name)))
140
+ branches = self.get_list_of_branches()
141
 
142
+ # Read all files and concatenate arrays
143
+ arrays = []
144
  for file in self.files:
145
+ with uproot.open(file) as f:
146
+ arrays.append(f[self.tree_name].arrays(branches, library="ak"))
147
+ if len(arrays) == 0:
148
+ print('No files found in {}'.format(os.path.join(self.raw_dir, self.file_names)))
149
+ return
150
+ data = ak.concatenate(arrays, axis=0)
151
+ n_entries = len(data[branches[0]])
152
  newtime = time.time()
153
  times[0] += newtime - oldtime
154
+ chunks = np.array_split(np.arange(n_entries), self.chunks)
155
  chunks = [chunk for i, chunk in enumerate(chunks) if i in self.process_chunks]
156
 
157
  self.graph_chunks = []
 
168
  globals = []
169
  for ientry in chunk:
170
  if (ientry % 10000 == 0):
171
+ print('Processing event {}/{}'.format(ientry, n_entries), flush=True)
172
+ ch = {b: data[b][ientry] for b in branches}
173
  passed = True
174
  for selection in self.selections:
175
+ if not check_selection(ch, selection):
176
  passed = False
177
  continue
178
  oldtime = newtime
179
  newtime = time.time()
180
  times[1] += newtime - oldtime
181
  if passed:
182
+ graphs.append(self.make_graph(ch))
183
+ labels.append(self.label)
184
  tracking.append(torch.zeros(len(self.tracking_info), dtype=torch.double))
185
  globals.append(torch.zeros(len(self.global_features)))
186
  for i_ti, tr_branch in enumerate(self.tracking_info):
187
  if isinstance(tr_branch, str):
188
+ tracking[-1][i_ti] = ch[tr_branch]
189
  else:
190
  tracking[-1][i_ti] = tr_branch
191
  for i_gl, gl_branch in enumerate(self.global_features):
192
+ globals[-1][i_gl] = ch[gl_branch]
193
  oldtime = newtime
194
  newtime = time.time()
195
  times[2] += newtime - oldtime
 
213
  self.graphs = graphs
214
  self.save()
215
  return
216
+
 
 
 
 
 
 
 
 
217
  def save(self):
 
218
  if not self.save_to_disk:
219
  return
220
  graph_path = os.path.join(self.save_dir, self.name + '.bin')
 
233
  graph_path = os.path.join(self.save_dir, self.name + '.bin')
234
  print(f'Saving dataset to {os.path.join(self.save_dir, self.name + f"_{self.process_chunks[chunk_id]}.bin")}')
235
  dgl.save_graphs(str(graph_path).replace('.bin', f'_{self.process_chunks[chunk_id]}.bin'), graphs, {'labels': labels, 'tracking': tracking, 'global': globals})
236
+
237
  def has_cache(self):
238
  print(f'Checking for cache of {self.name}')
239
  if not self.save_to_disk:
 
282
 
283
  def __len__(self):
284
  return len(self.graphs)
285
+
286
  #Dataset with edge features added (deta, dphi, dR)
287
  class EdgeDataset(RootDataset):
288
  def make_graph(self, ch):
root_gnn_dgl/scripts/inference.py CHANGED
@@ -135,7 +135,6 @@ def main():
135
 
136
  import time
137
  start = time.time()
138
- import ROOT
139
  import torch
140
  from array import array
141
  import numpy as np
@@ -247,55 +246,41 @@ def main():
247
  all_labels[branch] = labels
248
  all_tracking[branch] = tracking_info
249
 
250
- if args.write:
251
- from ROOT import std
252
- # Open the original ROOT file
253
- infile = ROOT.TFile.Open(args.target)
254
- tree = infile.Get(dset_config['args']['tree_name'])
255
 
256
- # Create the destination directory if it doesn't exist
257
- os.makedirs(os.path.split(args.destination)[0], exist_ok=True)
 
258
 
259
- # Create a new ROOT file to write the modified tree
260
- outfile = ROOT.TFile.Open(args.destination, 'RECREATE')
 
261
 
262
- # Clone the original tree structure
263
- outtree = tree.CloneTree(0)
264
 
265
- # Create branches for all scores
266
- branch_vectors = {}
 
267
  for branch, scores in all_scores.items():
268
- if isinstance(scores[0], (list, tuple, np.ndarray)) and len(scores[0]) > 1:
269
- # Create a new branch for vectors
270
- branch_vectors[branch] = std.vector('float')()
271
- outtree.Branch(branch, branch_vectors[branch])
272
- else:
273
- # Create a new branch for single floats
274
- branch_vectors[branch] = array('f', [0])
275
- outtree.Branch(branch, branch_vectors[branch], f'{branch}/F')
276
-
277
- # Fill the tree
278
- for i in range(tree.GetEntries()):
279
- tree.GetEntry(i)
280
-
281
- for branch, scores in all_scores.items():
282
- branch_data = branch_vectors[branch]
283
- if isinstance(branch_data, array): # Check if it's a single float array
284
- branch_data[0] = float(scores[i])
285
- else: # Assume it's a std::vector<float>
286
- branch_data.clear()
287
- for value in scores[i]:
288
- branch_data.push_back(float(value))
289
-
290
- outtree.Fill()
291
-
292
- # Write the modified tree to the new file
293
- print(f'Writing to file {args.destination}')
294
- print(f'Input entries: {tree.GetEntries()}, Output entries: {outtree.GetEntries()}')
295
- print(f'Wrote scores to {args.branch_name}')
296
- outtree.Write()
297
- outfile.Close()
298
- infile.Close()
299
  else:
300
  os.makedirs(os.path.split(args.destination)[0], exist_ok=True)
301
  np.savez(args.destination, scores=all_scores, labels=all_labels, tracking_info=all_tracking)
 
135
 
136
  import time
137
  start = time.time()
 
138
  import torch
139
  from array import array
140
  import numpy as np
 
246
  all_labels[branch] = labels
247
  all_tracking[branch] = tracking_info
248
 
 
 
 
 
 
249
 
250
+ if args.write:
251
+ import uproot
252
+ import awkward as ak
253
 
254
+ # Open the original ROOT file and get the tree
255
+ infile = uproot.open(args.target)
256
+ tree = infile[dset_config['args']['tree_name']]
257
 
258
+ # Read the original tree as an awkward array
259
+ original_data = tree.arrays(library="ak")
260
 
261
+ # Prepare new branches as dicts of arrays
262
+ new_branches = {}
263
+ n_entries = len(original_data)
264
  for branch, scores in all_scores.items():
265
+ # Ensure the scores array is the right length
266
+ scores = np.asarray(scores)
267
+ if scores.shape[0] != n_entries:
268
+ raise ValueError(f"Branch '{branch}' has {scores.shape[0]} entries, but tree has {n_entries}")
269
+ new_branches[branch] = scores
270
+
271
+ # Merge all arrays (original + new branches)
272
+ # Convert awkward to dict of numpy arrays for uproot
273
+ out_dict = {k: np.asarray(v) for k, v in ak.to_numpy(original_data).items()}
274
+ out_dict.update(new_branches)
275
+
276
+ # Write to new ROOT file
277
+ os.makedirs(os.path.split(args.destination)[0], exist_ok=True)
278
+ with uproot.recreate(args.destination) as outfile:
279
+ outfile.mktree(dset_config['args']['tree_name'], {k: v.dtype for k, v in out_dict.items()})
280
+ outfile[dset_config['args']['tree_name']].extend(out_dict)
281
+
282
+ print(f"Wrote new ROOT file {args.destination} with new branches {list(new_branches.keys())}")
283
+
 
 
 
 
 
 
 
 
 
 
 
 
284
  else:
285
  os.makedirs(os.path.split(args.destination)[0], exist_ok=True)
286
  np.savez(args.destination, scores=all_scores, labels=all_labels, tracking_info=all_tracking)
root_gnn_dgl/scripts/training_script.py CHANGED
@@ -264,13 +264,9 @@ def train(train_loaders, test_loaders, model, device, config, args, rank):
264
  start = time.time()
265
  run = start
266
  if (args.profile):
267
- if (epoch == 2):
268
  torch.cuda.cudart().cudaProfilerStart()
269
- if (epoch == 3):
270
- print("Done profiling")
271
- torch.cuda.cudart().cudaProfilerStop()
272
  torch.cuda.nvtx.range_push("Epoch Start")
273
- print("executed push")
274
 
275
  if (args.multigpu or args.multinode):
276
  dist.barrier()
@@ -608,7 +604,6 @@ def train(train_loaders, test_loaders, model, device, config, args, rank):
608
 
609
  if (args.profile):
610
  torch.cuda.nvtx.range_pop() # pop epoch
611
- print("executed pop")
612
 
613
  print(f"Load: {cumulative_times[0]:.4f} s")
614
  print(f"Batch: {cumulative_times[1]:.4f} s")
 
264
  start = time.time()
265
  run = start
266
  if (args.profile):
267
+ if (epoch == 0):
268
  torch.cuda.cudart().cudaProfilerStart()
 
 
 
269
  torch.cuda.nvtx.range_push("Epoch Start")
 
270
 
271
  if (args.multigpu or args.multinode):
272
  dist.barrier()
 
604
 
605
  if (args.profile):
606
  torch.cuda.nvtx.range_pop() # pop epoch
 
607
 
608
  print(f"Load: {cumulative_times[0]:.4f} s")
609
  print(f"Batch: {cumulative_times[1]:.4f} s")
root_gnn_dgl/setup/Dockerfile ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM nvcr.io/nvidia/dgl:25.05-py3
2
+
3
+ WORKDIR /global/cfs/projectdirs/atlas/joshua/GNN4Colliders
4
+
5
+ LABEL maintainer.name="Joshua Ho"
6
+ LABEL maintainer.email="ho22joshua@berkeley.edu"
7
+
8
+ ENV LANG=C.UTF-8
9
+
10
+ # Install system dependencies: vim, OpenMPI, and build tools
11
+ RUN apt-get update -qq \
12
+ && apt-get install -y --no-install-recommends \
13
+ wget lsb-release gnupg software-properties-common \
14
+ vim \
15
+ g++-11 gcc-11 libstdc++-11-dev \
16
+ openmpi-bin openmpi-common libopenmpi-dev \
17
+ && rm -rf /var/lib/apt/lists/*
18
+
19
+ # Install Python packages: mpi4py and jupyter
20
+ RUN pip install --no-cache-dir mpi4py jupyter uproot
21
+
22
+ # (Optional) Expose Jupyter port
23
+ EXPOSE 8888
24
+
25
+
root_gnn_dgl/setup/build_image.sh ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ tag=$1
2
+ echo $tag
3
+ podman-hpc build -t joshuaho/pytorch:$tag --platform linux/amd64 .
4
+ podman-hpc migrate joshuaho/pytorch:$tag