merging branch joshua with main

Browse files

Files changed (13) hide show

root_gnn_dgl/configs/stats_all/ttH_CP_even_vs_odd_batch_size_2048.yaml +57 -0
root_gnn_dgl/configs/stats_all/ttH_CP_even_vs_odd_batch_size_4096.yaml +57 -0
root_gnn_dgl/configs/stats_all/ttH_CP_even_vs_odd_batch_size_8192.yaml +57 -0
root_gnn_dgl/jobs/prep_data/run_processing.py +5 -2
root_gnn_dgl/jobs/training/singlegpu/run_job.sh +3 -3
root_gnn_dgl/jobs/training/singlegpu/run_job_image.sh +2 -11
root_gnn_dgl/jobs/training/singlegpu/submit.sh +4 -1
root_gnn_dgl/profile.sh +13 -13
root_gnn_dgl/root_gnn_base/dataset.py +33 -44
root_gnn_dgl/scripts/inference.py +30 -45
root_gnn_dgl/scripts/training_script.py +1 -6
root_gnn_dgl/setup/Dockerfile +25 -0
root_gnn_dgl/setup/build_image.sh +4 -0

root_gnn_dgl/configs/stats_all/ttH_CP_even_vs_odd_batch_size_2048.yaml ADDED Viewed

	@@ -0,0 +1,57 @@

+Training_Name: ttH_CP_even_vs_odd_batch_size_2048
+Training_Directory: trainings/stats_all/ttH_CP_even_vs_odd_batch_size_2048
+Model:
+  module: models.GCN
+  class: Edge_Network
+  args:
+    hid_size: 64
+    in_size: 7
+    out_size: 1
+    n_layers: 4
+    n_proc_steps: 4
+    dropout: 0
+Training:
+  epochs: 500
+  batch_size: 2048
+  learning_rate: 0.0001
+  gamma: 0.99
+Datasets:
+  ttH_CP_even: &dataset_defn
+    module: root_gnn_base.dataset
+    class: LazyDataset
+    shuffle_chunks: 10
+    batch_size: 2048
+    padding_mode: NONE #one of STEPS, FIXED, or NONE
+    args: &dataset_args
+      name: ttH_CP_even
+      label: 0
+      # weight_var: weight
+      chunks: 10
+      buffer_size: 3
+      file_names: ttH_NLO.root
+      tree_name: output
+      fold_var: Number
+      raw_dir: /global/cfs/projectdirs/trn007/lbl_atlas/data/stats_all/
+      save_dir: /global/cfs/projectdirs/trn007/lbl_atlas/data/processed_graphs/stats_all/ttH_CP_even_vs_odd_batch_size_2048/
+      node_branch_names:
+        - [jet_pt, ele_pt, mu_pt, ph_pt, MET_met]
+        - [jet_eta, ele_eta, mu_eta, ph_eta, 0]
+        - [jet_phi, ele_phi, mu_phi, ph_phi, MET_phi]
+        - CALC_E
+        - [jet_btag, 0, 0, 0, 0]
+        - [0, ele_charge, mu_charge, 0, 0]
+        - NODE_TYPE
+      node_branch_types: [vector, vector, vector, vector, single]
+      node_feature_scales: [1e-1, 1, 1, 1e-1, 1, 1, 1]
+    folding:
+      n_folds: 4
+      test: [0]
+      # validation: 1
+      train: [1, 2, 3]
+  ttH_CP_odd:
+    <<: *dataset_defn
+    args:
+      <<: *dataset_args
+      name: ttH_CP_odd
+      label: 1
+      file_names: ttH_CPodd.root

root_gnn_dgl/configs/stats_all/ttH_CP_even_vs_odd_batch_size_4096.yaml ADDED Viewed

	@@ -0,0 +1,57 @@

+Training_Name: ttH_CP_even_vs_odd_batch_size_4096
+Training_Directory: trainings/stats_all/ttH_CP_even_vs_odd_batch_size_4096
+Model:
+  module: models.GCN
+  class: Edge_Network
+  args:
+    hid_size: 64
+    in_size: 7
+    out_size: 1
+    n_layers: 4
+    n_proc_steps: 4
+    dropout: 0
+Training:
+  epochs: 500
+  batch_size: 4096
+  learning_rate: 0.0001
+  gamma: 0.99
+Datasets:
+  ttH_CP_even: &dataset_defn
+    module: root_gnn_base.dataset
+    class: LazyDataset
+    shuffle_chunks: 10
+    batch_size: 4096
+    padding_mode: NONE #one of STEPS, FIXED, or NONE
+    args: &dataset_args
+      name: ttH_CP_even
+      label: 0
+      # weight_var: weight
+      chunks: 10
+      buffer_size: 3
+      file_names: ttH_NLO.root
+      tree_name: output
+      fold_var: Number
+      raw_dir: /global/cfs/projectdirs/trn007/lbl_atlas/data/stats_all/
+      save_dir: /global/cfs/projectdirs/trn007/lbl_atlas/data/processed_graphs/stats_all/ttH_CP_even_vs_odd_batch_size_4096/
+      node_branch_names:
+        - [jet_pt, ele_pt, mu_pt, ph_pt, MET_met]
+        - [jet_eta, ele_eta, mu_eta, ph_eta, 0]
+        - [jet_phi, ele_phi, mu_phi, ph_phi, MET_phi]
+        - CALC_E
+        - [jet_btag, 0, 0, 0, 0]
+        - [0, ele_charge, mu_charge, 0, 0]
+        - NODE_TYPE
+      node_branch_types: [vector, vector, vector, vector, single]
+      node_feature_scales: [1e-1, 1, 1, 1e-1, 1, 1, 1]
+    folding:
+      n_folds: 4
+      test: [0]
+      # validation: 1
+      train: [1, 2, 3]
+  ttH_CP_odd:
+    <<: *dataset_defn
+    args:
+      <<: *dataset_args
+      name: ttH_CP_odd
+      label: 1
+      file_names: ttH_CPodd.root

root_gnn_dgl/configs/stats_all/ttH_CP_even_vs_odd_batch_size_8192.yaml ADDED Viewed

	@@ -0,0 +1,57 @@

+Training_Name: ttH_CP_even_vs_odd_batch_size_8192
+Training_Directory: trainings/stats_all/ttH_CP_even_vs_odd_batch_size_8192
+Model:
+  module: models.GCN
+  class: Edge_Network
+  args:
+    hid_size: 64
+    in_size: 7
+    out_size: 1
+    n_layers: 4
+    n_proc_steps: 4
+    dropout: 0
+Training:
+  epochs: 500
+  batch_size: 8192
+  learning_rate: 0.0001
+  gamma: 0.99
+Datasets:
+  ttH_CP_even: &dataset_defn
+    module: root_gnn_base.dataset
+    class: LazyDataset
+    shuffle_chunks: 10
+    batch_size: 8192
+    padding_mode: NONE #one of STEPS, FIXED, or NONE
+    args: &dataset_args
+      name: ttH_CP_even
+      label: 0
+      # weight_var: weight
+      chunks: 10
+      buffer_size: 3
+      file_names: ttH_NLO.root
+      tree_name: output
+      fold_var: Number
+      raw_dir: /global/cfs/projectdirs/trn007/lbl_atlas/data/stats_all/
+      save_dir: /global/cfs/projectdirs/trn007/lbl_atlas/data/processed_graphs/stats_all/ttH_CP_even_vs_odd_batch_size_8192/
+      node_branch_names:
+        - [jet_pt, ele_pt, mu_pt, ph_pt, MET_met]
+        - [jet_eta, ele_eta, mu_eta, ph_eta, 0]
+        - [jet_phi, ele_phi, mu_phi, ph_phi, MET_phi]
+        - CALC_E
+        - [jet_btag, 0, 0, 0, 0]
+        - [0, ele_charge, mu_charge, 0, 0]
+        - NODE_TYPE
+      node_branch_types: [vector, vector, vector, vector, single]
+      node_feature_scales: [1e-1, 1, 1, 1e-1, 1, 1, 1]
+    folding:
+      n_folds: 4
+      test: [0]
+      # validation: 1
+      train: [1, 2, 3]
+  ttH_CP_odd:
+    <<: *dataset_defn
+    args:
+      <<: *dataset_args
+      name: ttH_CP_odd
+      label: 1
+      file_names: ttH_CPodd.root

root_gnn_dgl/jobs/prep_data/run_processing.py CHANGED Viewed

@@ -77,9 +77,12 @@ def main():
     configs = [
         # "configs/stats_100K/pretraining_multiclass.yaml",
         # "configs/stats_100K/ttH_CP_even_vs_odd.yaml",
-        "configs/stats_all/pretraining_multiclass.yaml",
-        "configs/stats_all/ttH_CP_even_vs_odd.yaml",
         # "configs/attention/ttH_CP_even_vs_odd.yaml",
     ]
     # Path to the bash script to be called

     configs = [
         # "configs/stats_100K/pretraining_multiclass.yaml",
         # "configs/stats_100K/ttH_CP_even_vs_odd.yaml",
+        # "configs/stats_all/pretraining_multiclass.yaml",
+        # "configs/stats_all/ttH_CP_even_vs_odd.yaml",
         # "configs/attention/ttH_CP_even_vs_odd.yaml",
+        "configs/stats_all/ttH_CP_even_vs_odd_batch_size_2048.yaml",
+        "configs/stats_all/ttH_CP_even_vs_odd_batch_size_4096.yaml",
+        "configs/stats_all/ttH_CP_even_vs_odd_batch_size_8192.yaml",
     ]
     # Path to the bash script to be called

root_gnn_dgl/jobs/training/singlegpu/run_job.sh CHANGED Viewed

@@ -5,11 +5,11 @@
 #SBATCH --mail-user=ho22joshua@berkeley.edu
 #SBATCH --mail-type=ALL
 #SBATCH -t 15:00:00
-#SBATCH -A atlas
-#SBATCH -o /global/cfs/projectdirs/atlas/joshua/root_gnn/root_gnn_dgl/jobs/slurm/%j.out # STDOUT
 ARGUEMENTS="$*"
 echo "Arguements: $ARGUEMENTS"
 echo "launching image"
-source launch_image.sh "--entrypoint /global/cfs/projectdirs/atlas/joshua/root_gnn/root_gnn_dgl/jobs/run_job_image.sh" $ARGUEMENTS

 #SBATCH --mail-user=ho22joshua@berkeley.edu
 #SBATCH --mail-type=ALL
 #SBATCH -t 15:00:00
+#SBATCH -A trn007
+#SBATCH -o /global/cfs/projectdirs/atlas/joshua/GNN4Colliders/root_gnn_dgl/jobs/slurm/%j.out # STDOUT
 ARGUEMENTS="$*"
 echo "Arguements: $ARGUEMENTS"
 echo "launching image"
+source /global/homes/j/joshuaho/launch_image.sh "--entrypoint /global/cfs/projectdirs/atlas/joshua/GNN4Colliders/root_gnn_dgl/jobs/training/singlegpu/run_job_image.sh" $ARGUEMENTS

root_gnn_dgl/jobs/training/singlegpu/run_job_image.sh CHANGED Viewed

@@ -4,17 +4,8 @@ CONFIG=$1
 shift
 ARGUEMENTS="$*"
-DIRECTORY="/global/cfs/projectdirs/atlas/joshua/root_gnn/root_gnn_dgl/configs/model_configs/"
-BASE_COMMAND="/global/cfs/projectdirs/atlas/joshua/root_gnn/root_gnn_dgl/scripts/training_script.py $ARGUEMENTS --preshuffle --nocompile --lazy --config $DIRECTORY"
-echo "launched image"
-cd /global/cfs/projectdirs/atlas/joshua/root_gnn/root_gnn_dgl/
-COMMAND="$BASE_COMMAND$CONFIG"
-eval "$(conda shell.bash hook)"
-conda init bash
-conda activate /opt/conda/envs/dgl
 echo "Running my script now"
 echo $COMMAND

 shift
 ARGUEMENTS="$*"
+DIRECTORY="/global/cfs/projectdirs/atlas/joshua/GNN4Colliders/root_gnn_dgl/"
+COMMAND="$DIRECTORY"scripts/training_script.py $ARGUEMENTS --preshuffle --nocompile --lazy --config $DIRECTORY$CONFIG
 echo "Running my script now"
 echo $COMMAND

root_gnn_dgl/jobs/training/singlegpu/submit.sh CHANGED Viewed

@@ -3,7 +3,10 @@ date
 DIRECTORY="/global/cfs/projectdirs/atlas/joshua/root_gnn/root_gnn_dgl/configs/model_configs/"
 configs=(
-  "run_3_ttH/v05/sb_yukawa_cp_abs_weights.yaml --abs"
 )
 counter=0

 DIRECTORY="/global/cfs/projectdirs/atlas/joshua/root_gnn/root_gnn_dgl/configs/model_configs/"
 configs=(
+  "configs/stats_all/ttH_CP_even_vs_odd.yaml"
+  "configs/stats_all/ttH_CP_even_vs_odd_batch_size_2048.yaml"
+  "configs/stats_all/ttH_CP_even_vs_odd_batch_size_4096.yaml"
+  "configs/stats_all/ttH_CP_even_vs_odd_batch_size_8192.yaml"
 )
 counter=0

root_gnn_dgl/profile.sh CHANGED Viewed

@@ -1,35 +1,35 @@
 nsys profile \
-  -o /pscratch/sd/j/joshuaho/my_profile_report_1_gpu_batch_size_1028 \
   --capture-range=cudaProfilerApi \
-  --capture-range-end=stop-shutdown \
   --force-overwrite true \
   --trace=nvtx \
   --cudabacktrace=all \
-  python scripts/training_script.py --config configs/stats_100K/ttH_CP_even_vs_odd.yaml --preshuffle --nocompile --lazy --restart --profile
 nsys profile \
-  -o /pscratch/sd/j/joshuaho/my_profile_report_1_gpu_batch_size_2048 \
   --capture-range=cudaProfilerApi \
-  --capture-range-end=stop-shutdown \
   --force-overwrite true \
   --trace=nvtx \
   --cudabacktrace=all \
-  python scripts/training_script.py --config configs/stats_100K/ttH_CP_even_vs_odd_batch_size_2048.yaml --preshuffle --nocompile --lazy --restart --profile
 nsys profile \
-  -o /pscratch/sd/j/joshuaho/my_profile_report_1_gpu_batch_size_4096 \
   --capture-range=cudaProfilerApi \
-  --capture-range-end=stop-shutdown \
-  --force-overwrite true \
   --trace=nvtx \
   --cudabacktrace=all \
-  python scripts/training_script.py --config configs/stats_100K/ttH_CP_even_vs_odd_batch_size_4096.yaml --preshuffle --nocompile --lazy --restart --profile
 nsys profile \
-  -o /pscratch/sd/j/joshuaho/my_profile_report_1_gpu_batch_size_8192 \
   --capture-range=cudaProfilerApi \
-  --capture-range-end=stop-shutdown \
   --force-overwrite true \
   --trace=nvtx \
   --cudabacktrace=all \
-  python scripts/training_script.py --config configs/stats_100K/ttH_CP_even_vs_odd_batch_size_8192.yaml --preshuffle --nocompile --lazy --restart --profile

 nsys profile \
+  -o /pscratch/sd/j/joshuaho/full_stats_profile_1_gpu_batch_size_1028 \
   --capture-range=cudaProfilerApi \
+  --duration=100 \
   --force-overwrite true \
   --trace=nvtx \
   --cudabacktrace=all \
+  python scripts/training_script.py --config configs/stats_all/ttH_CP_even_vs_odd.yaml --preshuffle --nocompile --lazy --restart --profile
 nsys profile \
+  -o /pscratch/sd/j/joshuaho/full_stats_profile_1_gpu_batch_size_2048 \
   --capture-range=cudaProfilerApi \
+  --duration=100 \
   --force-overwrite true \
   --trace=nvtx \
   --cudabacktrace=all \
+  python scripts/training_script.py --config configs/stats_all/ttH_CP_even_vs_odd_batch_size_2048.yaml --preshuffle --nocompile --lazy --restart --profile
 nsys profile \
+  -o /pscratch/sd/j/joshuaho/full_stats_profile_1_gpu_batch_size_4096 \
   --capture-range=cudaProfilerApi \
+  --duration=100 \
+  --force-overwrite=true \
   --trace=nvtx \
   --cudabacktrace=all \
+  python scripts/training_script.py --config configs/stats_all/ttH_CP_even_vs_odd_batch_size_4096.yaml --preshuffle --nocompile --lazy --restart --profile
 nsys profile \
+  -o /pscratch/sd/j/joshuaho/full_stats_profile_1_gpu_batch_size_8192 \
   --capture-range=cudaProfilerApi \
+  --duration=100 \
   --force-overwrite true \
   --trace=nvtx \
   --cudabacktrace=all \
+  python scripts/training_script.py --config configs/stats_all/ttH_CP_even_vs_odd_batch_size_8192.yaml --preshuffle --nocompile --lazy --restart --profile

root_gnn_dgl/root_gnn_base/dataset.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from dgl.data import DGLDataset
 import dgl
-import ROOT
 import torch
 import os
 import glob
@@ -14,7 +15,7 @@ def node_features_from_tree(ch, node_branch_names, node_branch_types, node_featu
         if node_type == 'single':
             lengths.append(1)
         elif node_type == 'vector':
-            lengths.append(len(getattr(ch, branch)))
         else:
             print('Unknown node branch type: {}'.format(node_type))
     features = []
@@ -38,16 +39,14 @@ def node_features_from_tree(ch, node_branch_names, node_branch_types, node_featu
                 this_type_ends_at = sum(lengths[:itype+1])
                 feat.extend(features[0][this_type_starts_at:this_type_ends_at]*torch.cosh(features[1][this_type_starts_at:this_type_ends_at]))
             elif node_type == 'single':
-                feat.append(getattr(ch, branch))
             elif node_type == 'vector':
-                feat.extend(getattr(ch, branch))
             itype += 1
         features.append(torch.tensor(feat))
     return torch.stack(features, dim=1) * node_feature_scales, lengths
 def full_connected_graph(n_nodes, self_loops=True):
-    senders = []
-    receivers = []
     senders = np.arange(n_nodes*n_nodes) // n_nodes
     receivers = np.arange(n_nodes*n_nodes) % n_nodes
     if not self_loops and n_nodes > 1:
@@ -59,19 +58,18 @@ def full_connected_graph(n_nodes, self_loops=True):
 def check_selection(ch, selection):
     var, cut, op = selection
     if op == '>':
-        return getattr(ch, var) > cut
     elif op == '<':
-        return getattr(ch, var) < cut
     elif op == '==':
-        return getattr(ch, var) == cut
 def check_selections(ch, selections):
     for selection in selections:
         if not check_selection(ch, selection):
             return False
     return True
-#Base dataset class for making graphs from ROOT ntuples.
 class RootDataset(DGLDataset):
     def __init__(self, name=None, raw_dir=None, save_dir=None, label=1, file_names = '*.root', node_branch_names=None, node_branch_types=None, node_feature_scales=None,
                  selections=[], save=True, tree_name = 'nominal_Loose', fold_var = 'eventNumber', weight_var = None, chunks = 1, process_chunks = None, global_features = [], tracking_info = [], **kwargs):
@@ -88,7 +86,7 @@ class RootDataset(DGLDataset):
         self.fold_var = fold_var
         self.tracking_info = tracking_info
         self.tracking_info.insert(0, fold_var)
-        if weight_var == None:
             weight_var = 1
         self.tracking_info.insert(1, weight_var)
         self.global_features = global_features
@@ -116,7 +114,7 @@ class RootDataset(DGLDataset):
                 branches.append(feat)
         for selection in self.selections:
             branches.append(selection[0])
-        return branches
     def make_graph(self, ch):
         t1 = time.time()
@@ -129,7 +127,7 @@ class RootDataset(DGLDataset):
         self.times[0] += t2 - t1
         self.times[1] += t3 - t2
         return g
     def process(self):
         times = [0, 0, 0]
         oldtime = time.time()
@@ -139,21 +137,21 @@ class RootDataset(DGLDataset):
             self.files = []
             for file_name in self.file_names:
                 self.files.extend(glob.glob(os.path.join(self.raw_dir, file_name)))
-        self.chain = ROOT.TChain(self.tree_name)
-        if len(self.files) == 0:
-            print('No files found in {}'.format(os.path.join(self.raw_dir, self.file_names)))
         for file in self.files:
-            utils.set_timeout(60*2)
-            self.chain.Add(file)
-            utils.unset_timeout()
-        branches = self.get_list_of_branches()
-        self.chain.SetBranchStatus('*', 0)
-        for branch in branches:
-            self.chain.SetBranchStatus(branch, 1)
         newtime = time.time()
         times[0] += newtime - oldtime
-        chunks = np.array_split(np.arange(self.chain.GetEntries()), self.chunks)
         chunks = [chunk for i, chunk in enumerate(chunks) if i in self.process_chunks]
         self.graph_chunks = []
@@ -170,28 +168,28 @@ class RootDataset(DGLDataset):
             globals = []
             for ientry in chunk:
                 if (ientry % 10000 == 0):
-                    print('Processing event {}/{}'.format(ientry, self.chain.GetEntries()), flush=True)
-                self.chain.GetEntry(ientry)
                 passed = True
                 for selection in self.selections:
-                    if not check_selection(self.chain, selection):
                         passed = False
                         continue
                 oldtime = newtime
                 newtime = time.time()
                 times[1] += newtime - oldtime
                 if passed:
-                    graphs.append(self.make_graph(self.chain))
-                    labels.append( self.label )
                     tracking.append(torch.zeros(len(self.tracking_info), dtype=torch.double))
                     globals.append(torch.zeros(len(self.global_features)))
                     for i_ti, tr_branch in enumerate(self.tracking_info):
                         if isinstance(tr_branch, str):
-                            tracking[-1][i_ti] = getattr(self.chain, tr_branch)
                         else:
                             tracking[-1][i_ti] = tr_branch
                     for i_gl, gl_branch in enumerate(self.global_features):
-                        globals[-1][i_gl] = getattr(self.chain, gl_branch)
                 oldtime = newtime
                 newtime = time.time()
                 times[2] += newtime - oldtime
@@ -215,17 +213,8 @@ class RootDataset(DGLDataset):
                 self.graphs = graphs
                 self.save()
         return
-        self.graphs = self.graph_chunks[0]
-        for chunk in self.graph_chunks[1:]:
-            self.graphs += chunk
-        self.labels = torch.cat(self.label_chunks)
-        self.tracking = torch.cat(self.tracking_chunks)
-        self.global_features = torch.cat(self.global_chunks)
-        print('Time spent: Creating TChain: {}s, Getting Entries and Selection: {}s, Graph Creation: {}s'.format(*times))
-        print('Time spent in node_features_from_tree: {}s, full_connected_graph: {}s'.format(*self.times))
     def save(self):
-        """save the graph list and the labels"""
         if not self.save_to_disk:
             return
         graph_path = os.path.join(self.save_dir, self.name + '.bin')
@@ -244,7 +233,7 @@ class RootDataset(DGLDataset):
         graph_path = os.path.join(self.save_dir, self.name + '.bin')
         print(f'Saving dataset to {os.path.join(self.save_dir, self.name + f"_{self.process_chunks[chunk_id]}.bin")}')
         dgl.save_graphs(str(graph_path).replace('.bin', f'_{self.process_chunks[chunk_id]}.bin'), graphs, {'labels': labels, 'tracking': tracking, 'global': globals})
     def has_cache(self):
         print(f'Checking for cache of {self.name}')
         if not self.save_to_disk:
@@ -293,7 +282,7 @@ class RootDataset(DGLDataset):
     def __len__(self):
         return len(self.graphs)
 #Dataset with edge features added (deta, dphi, dR)
 class EdgeDataset(RootDataset):
     def make_graph(self, ch):

 from dgl.data import DGLDataset
 import dgl
+import uproot
+import awkward as ak
 import torch
 import os
 import glob
         if node_type == 'single':
             lengths.append(1)
         elif node_type == 'vector':
+            lengths.append(len(ch[branch]))
         else:
             print('Unknown node branch type: {}'.format(node_type))
     features = []
                 this_type_ends_at = sum(lengths[:itype+1])
                 feat.extend(features[0][this_type_starts_at:this_type_ends_at]*torch.cosh(features[1][this_type_starts_at:this_type_ends_at]))
             elif node_type == 'single':
+                feat.append(ch[branch])
             elif node_type == 'vector':
+                feat.extend(ch[branch])
             itype += 1
         features.append(torch.tensor(feat))
     return torch.stack(features, dim=1) * node_feature_scales, lengths
 def full_connected_graph(n_nodes, self_loops=True):
     senders = np.arange(n_nodes*n_nodes) // n_nodes
     receivers = np.arange(n_nodes*n_nodes) % n_nodes
     if not self_loops and n_nodes > 1:
 def check_selection(ch, selection):
     var, cut, op = selection
     if op == '>':
+        return ch[var] > cut
     elif op == '<':
+        return ch[var] < cut
     elif op == '==':
+        return ch[var] == cut
 def check_selections(ch, selections):
     for selection in selections:
         if not check_selection(ch, selection):
             return False
     return True
 class RootDataset(DGLDataset):
     def __init__(self, name=None, raw_dir=None, save_dir=None, label=1, file_names = '*.root', node_branch_names=None, node_branch_types=None, node_feature_scales=None,
                  selections=[], save=True, tree_name = 'nominal_Loose', fold_var = 'eventNumber', weight_var = None, chunks = 1, process_chunks = None, global_features = [], tracking_info = [], **kwargs):
         self.fold_var = fold_var
         self.tracking_info = tracking_info
         self.tracking_info.insert(0, fold_var)
+        if weight_var is None:
             weight_var = 1
         self.tracking_info.insert(1, weight_var)
         self.global_features = global_features
                 branches.append(feat)
         for selection in self.selections:
             branches.append(selection[0])
+        return list(set(branches))  # Remove duplicates
     def make_graph(self, ch):
         t1 = time.time()
         self.times[0] += t2 - t1
         self.times[1] += t3 - t2
         return g
     def process(self):
         times = [0, 0, 0]
         oldtime = time.time()
             self.files = []
             for file_name in self.file_names:
                 self.files.extend(glob.glob(os.path.join(self.raw_dir, file_name)))
+        branches = self.get_list_of_branches()
+        # Read all files and concatenate arrays
+        arrays = []
         for file in self.files:
+            with uproot.open(file) as f:
+                arrays.append(f[self.tree_name].arrays(branches, library="ak"))
+        if len(arrays) == 0:
+            print('No files found in {}'.format(os.path.join(self.raw_dir, self.file_names)))
+            return
+        data = ak.concatenate(arrays, axis=0)
+        n_entries = len(data[branches[0]])
         newtime = time.time()
         times[0] += newtime - oldtime
+        chunks = np.array_split(np.arange(n_entries), self.chunks)
         chunks = [chunk for i, chunk in enumerate(chunks) if i in self.process_chunks]
         self.graph_chunks = []
             globals = []
             for ientry in chunk:
                 if (ientry % 10000 == 0):
+                    print('Processing event {}/{}'.format(ientry, n_entries), flush=True)
+                ch = {b: data[b][ientry] for b in branches}
                 passed = True
                 for selection in self.selections:
+                    if not check_selection(ch, selection):
                         passed = False
                         continue
                 oldtime = newtime
                 newtime = time.time()
                 times[1] += newtime - oldtime
                 if passed:
+                    graphs.append(self.make_graph(ch))
+                    labels.append(self.label)
                     tracking.append(torch.zeros(len(self.tracking_info), dtype=torch.double))
                     globals.append(torch.zeros(len(self.global_features)))
                     for i_ti, tr_branch in enumerate(self.tracking_info):
                         if isinstance(tr_branch, str):
+                            tracking[-1][i_ti] = ch[tr_branch]
                         else:
                             tracking[-1][i_ti] = tr_branch
                     for i_gl, gl_branch in enumerate(self.global_features):
+                        globals[-1][i_gl] = ch[gl_branch]
                 oldtime = newtime
                 newtime = time.time()
                 times[2] += newtime - oldtime
                 self.graphs = graphs
                 self.save()
         return
     def save(self):
         if not self.save_to_disk:
             return
         graph_path = os.path.join(self.save_dir, self.name + '.bin')
         graph_path = os.path.join(self.save_dir, self.name + '.bin')
         print(f'Saving dataset to {os.path.join(self.save_dir, self.name + f"_{self.process_chunks[chunk_id]}.bin")}')
         dgl.save_graphs(str(graph_path).replace('.bin', f'_{self.process_chunks[chunk_id]}.bin'), graphs, {'labels': labels, 'tracking': tracking, 'global': globals})
     def has_cache(self):
         print(f'Checking for cache of {self.name}')
         if not self.save_to_disk:
     def __len__(self):
         return len(self.graphs)
 #Dataset with edge features added (deta, dphi, dR)
 class EdgeDataset(RootDataset):
     def make_graph(self, ch):

root_gnn_dgl/scripts/inference.py CHANGED Viewed

@@ -135,7 +135,6 @@ def main():
     import time
     start  = time.time()
-    import ROOT
     import torch
     from array import array
     import numpy as np
@@ -247,55 +246,41 @@ def main():
             all_labels[branch] = labels
             all_tracking[branch] = tracking_info
-    if args.write:
-        from ROOT import std
-        # Open the original ROOT file
-        infile = ROOT.TFile.Open(args.target)
-        tree = infile.Get(dset_config['args']['tree_name'])
-        # Create the destination directory if it doesn't exist
-        os.makedirs(os.path.split(args.destination)[0], exist_ok=True)
-        # Create a new ROOT file to write the modified tree
-        outfile = ROOT.TFile.Open(args.destination, 'RECREATE')
-        # Clone the original tree structure
-        outtree = tree.CloneTree(0)
-        # Create branches for all scores
-        branch_vectors = {}
         for branch, scores in all_scores.items():
-            if isinstance(scores[0], (list, tuple, np.ndarray)) and len(scores[0]) > 1:
-                # Create a new branch for vectors
-                branch_vectors[branch] = std.vector('float')()
-                outtree.Branch(branch, branch_vectors[branch])
-            else:
-                # Create a new branch for single floats
-                branch_vectors[branch] = array('f', [0])
-                outtree.Branch(branch, branch_vectors[branch], f'{branch}/F')
-        # Fill the tree
-        for i in range(tree.GetEntries()):
-            tree.GetEntry(i)
-            for branch, scores in all_scores.items():
-                branch_data = branch_vectors[branch]
-                if isinstance(branch_data, array):  # Check if it's a single float array
-                    branch_data[0] = float(scores[i])
-                else:  # Assume it's a std::vector<float>
-                    branch_data.clear()
-                    for value in scores[i]:
-                        branch_data.push_back(float(value))
-            outtree.Fill()
-        # Write the modified tree to the new file
-        print(f'Writing to file {args.destination}')
-        print(f'Input entries: {tree.GetEntries()}, Output entries: {outtree.GetEntries()}')
-        print(f'Wrote scores to {args.branch_name}')
-        outtree.Write()
-        outfile.Close()
-        infile.Close()
     else:
         os.makedirs(os.path.split(args.destination)[0], exist_ok=True)
         np.savez(args.destination, scores=all_scores, labels=all_labels, tracking_info=all_tracking)

     import time
     start  = time.time()
     import torch
     from array import array
     import numpy as np
             all_labels[branch] = labels
             all_tracking[branch] = tracking_info
+    if args.write:
+        import uproot
+        import awkward as ak
+        # Open the original ROOT file and get the tree
+        infile = uproot.open(args.target)
+        tree = infile[dset_config['args']['tree_name']]
+        # Read the original tree as an awkward array
+        original_data = tree.arrays(library="ak")
+        # Prepare new branches as dicts of arrays
+        new_branches = {}
+        n_entries = len(original_data)
         for branch, scores in all_scores.items():
+            # Ensure the scores array is the right length
+            scores = np.asarray(scores)
+            if scores.shape[0] != n_entries:
+                raise ValueError(f"Branch '{branch}' has {scores.shape[0]} entries, but tree has {n_entries}")
+            new_branches[branch] = scores
+        # Merge all arrays (original + new branches)
+        # Convert awkward to dict of numpy arrays for uproot
+        out_dict = {k: np.asarray(v) for k, v in ak.to_numpy(original_data).items()}
+        out_dict.update(new_branches)
+        # Write to new ROOT file
+        os.makedirs(os.path.split(args.destination)[0], exist_ok=True)
+        with uproot.recreate(args.destination) as outfile:
+            outfile.mktree(dset_config['args']['tree_name'], {k: v.dtype for k, v in out_dict.items()})
+            outfile[dset_config['args']['tree_name']].extend(out_dict)
+        print(f"Wrote new ROOT file {args.destination} with new branches {list(new_branches.keys())}")
     else:
         os.makedirs(os.path.split(args.destination)[0], exist_ok=True)
         np.savez(args.destination, scores=all_scores, labels=all_labels, tracking_info=all_tracking)

root_gnn_dgl/scripts/training_script.py CHANGED Viewed

@@ -264,13 +264,9 @@ def train(train_loaders, test_loaders, model, device, config, args, rank):
         start = time.time()
         run = start
         if (args.profile):
-            if (epoch == 2):
                 torch.cuda.cudart().cudaProfilerStart()
-            if (epoch == 3):
-                print("Done profiling")
-                torch.cuda.cudart().cudaProfilerStop()
             torch.cuda.nvtx.range_push("Epoch Start")
-            print("executed push")
         if (args.multigpu or args.multinode):
             dist.barrier()
@@ -608,7 +604,6 @@ def train(train_loaders, test_loaders, model, device, config, args, rank):
         if (args.profile):
             torch.cuda.nvtx.range_pop() # pop epoch
-            print("executed pop")
     print(f"Load: {cumulative_times[0]:.4f} s")
     print(f"Batch: {cumulative_times[1]:.4f} s")

         start = time.time()
         run = start
         if (args.profile):
+            if (epoch == 0):
                 torch.cuda.cudart().cudaProfilerStart()
             torch.cuda.nvtx.range_push("Epoch Start")
         if (args.multigpu or args.multinode):
             dist.barrier()
         if (args.profile):
             torch.cuda.nvtx.range_pop() # pop epoch
     print(f"Load: {cumulative_times[0]:.4f} s")
     print(f"Batch: {cumulative_times[1]:.4f} s")

root_gnn_dgl/setup/Dockerfile ADDED Viewed

	@@ -0,0 +1,25 @@

+FROM nvcr.io/nvidia/dgl:25.05-py3
+WORKDIR /global/cfs/projectdirs/atlas/joshua/GNN4Colliders
+LABEL maintainer.name="Joshua Ho"
+LABEL maintainer.email="ho22joshua@berkeley.edu"
+ENV LANG=C.UTF-8
+# Install system dependencies: vim, OpenMPI, and build tools
+RUN apt-get update -qq \
+ && apt-get install -y --no-install-recommends \
+    wget lsb-release gnupg software-properties-common \
+    vim \
+    g++-11 gcc-11 libstdc++-11-dev \
+    openmpi-bin openmpi-common libopenmpi-dev \
+ && rm -rf /var/lib/apt/lists/*
+# Install Python packages: mpi4py and jupyter
+RUN pip install --no-cache-dir mpi4py jupyter uproot
+# (Optional) Expose Jupyter port
+EXPOSE 8888

root_gnn_dgl/setup/build_image.sh ADDED Viewed

	@@ -0,0 +1,4 @@

+tag=$1
+echo $tag
+podman-hpc build -t joshuaho/pytorch:$tag --platform linux/amd64 .
+podman-hpc migrate joshuaho/pytorch:$tag