merging branch joshua with main
Browse files- root_gnn_dgl/configs/stats_all/ttH_CP_even_vs_odd_batch_size_2048.yaml +57 -0
- root_gnn_dgl/configs/stats_all/ttH_CP_even_vs_odd_batch_size_4096.yaml +57 -0
- root_gnn_dgl/configs/stats_all/ttH_CP_even_vs_odd_batch_size_8192.yaml +57 -0
- root_gnn_dgl/jobs/prep_data/run_processing.py +5 -2
- root_gnn_dgl/jobs/training/singlegpu/run_job.sh +3 -3
- root_gnn_dgl/jobs/training/singlegpu/run_job_image.sh +2 -11
- root_gnn_dgl/jobs/training/singlegpu/submit.sh +4 -1
- root_gnn_dgl/profile.sh +13 -13
- root_gnn_dgl/root_gnn_base/dataset.py +33 -44
- root_gnn_dgl/scripts/inference.py +30 -45
- root_gnn_dgl/scripts/training_script.py +1 -6
- root_gnn_dgl/setup/Dockerfile +25 -0
- root_gnn_dgl/setup/build_image.sh +4 -0
root_gnn_dgl/configs/stats_all/ttH_CP_even_vs_odd_batch_size_2048.yaml
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Training_Name: ttH_CP_even_vs_odd_batch_size_2048
|
| 2 |
+
Training_Directory: trainings/stats_all/ttH_CP_even_vs_odd_batch_size_2048
|
| 3 |
+
Model:
|
| 4 |
+
module: models.GCN
|
| 5 |
+
class: Edge_Network
|
| 6 |
+
args:
|
| 7 |
+
hid_size: 64
|
| 8 |
+
in_size: 7
|
| 9 |
+
out_size: 1
|
| 10 |
+
n_layers: 4
|
| 11 |
+
n_proc_steps: 4
|
| 12 |
+
dropout: 0
|
| 13 |
+
Training:
|
| 14 |
+
epochs: 500
|
| 15 |
+
batch_size: 2048
|
| 16 |
+
learning_rate: 0.0001
|
| 17 |
+
gamma: 0.99
|
| 18 |
+
Datasets:
|
| 19 |
+
ttH_CP_even: &dataset_defn
|
| 20 |
+
module: root_gnn_base.dataset
|
| 21 |
+
class: LazyDataset
|
| 22 |
+
shuffle_chunks: 10
|
| 23 |
+
batch_size: 2048
|
| 24 |
+
padding_mode: NONE #one of STEPS, FIXED, or NONE
|
| 25 |
+
args: &dataset_args
|
| 26 |
+
name: ttH_CP_even
|
| 27 |
+
label: 0
|
| 28 |
+
# weight_var: weight
|
| 29 |
+
chunks: 10
|
| 30 |
+
buffer_size: 3
|
| 31 |
+
file_names: ttH_NLO.root
|
| 32 |
+
tree_name: output
|
| 33 |
+
fold_var: Number
|
| 34 |
+
raw_dir: /global/cfs/projectdirs/trn007/lbl_atlas/data/stats_all/
|
| 35 |
+
save_dir: /global/cfs/projectdirs/trn007/lbl_atlas/data/processed_graphs/stats_all/ttH_CP_even_vs_odd_batch_size_2048/
|
| 36 |
+
node_branch_names:
|
| 37 |
+
- [jet_pt, ele_pt, mu_pt, ph_pt, MET_met]
|
| 38 |
+
- [jet_eta, ele_eta, mu_eta, ph_eta, 0]
|
| 39 |
+
- [jet_phi, ele_phi, mu_phi, ph_phi, MET_phi]
|
| 40 |
+
- CALC_E
|
| 41 |
+
- [jet_btag, 0, 0, 0, 0]
|
| 42 |
+
- [0, ele_charge, mu_charge, 0, 0]
|
| 43 |
+
- NODE_TYPE
|
| 44 |
+
node_branch_types: [vector, vector, vector, vector, single]
|
| 45 |
+
node_feature_scales: [1e-1, 1, 1, 1e-1, 1, 1, 1]
|
| 46 |
+
folding:
|
| 47 |
+
n_folds: 4
|
| 48 |
+
test: [0]
|
| 49 |
+
# validation: 1
|
| 50 |
+
train: [1, 2, 3]
|
| 51 |
+
ttH_CP_odd:
|
| 52 |
+
<<: *dataset_defn
|
| 53 |
+
args:
|
| 54 |
+
<<: *dataset_args
|
| 55 |
+
name: ttH_CP_odd
|
| 56 |
+
label: 1
|
| 57 |
+
file_names: ttH_CPodd.root
|
root_gnn_dgl/configs/stats_all/ttH_CP_even_vs_odd_batch_size_4096.yaml
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Training_Name: ttH_CP_even_vs_odd_batch_size_4096
|
| 2 |
+
Training_Directory: trainings/stats_all/ttH_CP_even_vs_odd_batch_size_4096
|
| 3 |
+
Model:
|
| 4 |
+
module: models.GCN
|
| 5 |
+
class: Edge_Network
|
| 6 |
+
args:
|
| 7 |
+
hid_size: 64
|
| 8 |
+
in_size: 7
|
| 9 |
+
out_size: 1
|
| 10 |
+
n_layers: 4
|
| 11 |
+
n_proc_steps: 4
|
| 12 |
+
dropout: 0
|
| 13 |
+
Training:
|
| 14 |
+
epochs: 500
|
| 15 |
+
batch_size: 4096
|
| 16 |
+
learning_rate: 0.0001
|
| 17 |
+
gamma: 0.99
|
| 18 |
+
Datasets:
|
| 19 |
+
ttH_CP_even: &dataset_defn
|
| 20 |
+
module: root_gnn_base.dataset
|
| 21 |
+
class: LazyDataset
|
| 22 |
+
shuffle_chunks: 10
|
| 23 |
+
batch_size: 4096
|
| 24 |
+
padding_mode: NONE #one of STEPS, FIXED, or NONE
|
| 25 |
+
args: &dataset_args
|
| 26 |
+
name: ttH_CP_even
|
| 27 |
+
label: 0
|
| 28 |
+
# weight_var: weight
|
| 29 |
+
chunks: 10
|
| 30 |
+
buffer_size: 3
|
| 31 |
+
file_names: ttH_NLO.root
|
| 32 |
+
tree_name: output
|
| 33 |
+
fold_var: Number
|
| 34 |
+
raw_dir: /global/cfs/projectdirs/trn007/lbl_atlas/data/stats_all/
|
| 35 |
+
save_dir: /global/cfs/projectdirs/trn007/lbl_atlas/data/processed_graphs/stats_all/ttH_CP_even_vs_odd_batch_size_4096/
|
| 36 |
+
node_branch_names:
|
| 37 |
+
- [jet_pt, ele_pt, mu_pt, ph_pt, MET_met]
|
| 38 |
+
- [jet_eta, ele_eta, mu_eta, ph_eta, 0]
|
| 39 |
+
- [jet_phi, ele_phi, mu_phi, ph_phi, MET_phi]
|
| 40 |
+
- CALC_E
|
| 41 |
+
- [jet_btag, 0, 0, 0, 0]
|
| 42 |
+
- [0, ele_charge, mu_charge, 0, 0]
|
| 43 |
+
- NODE_TYPE
|
| 44 |
+
node_branch_types: [vector, vector, vector, vector, single]
|
| 45 |
+
node_feature_scales: [1e-1, 1, 1, 1e-1, 1, 1, 1]
|
| 46 |
+
folding:
|
| 47 |
+
n_folds: 4
|
| 48 |
+
test: [0]
|
| 49 |
+
# validation: 1
|
| 50 |
+
train: [1, 2, 3]
|
| 51 |
+
ttH_CP_odd:
|
| 52 |
+
<<: *dataset_defn
|
| 53 |
+
args:
|
| 54 |
+
<<: *dataset_args
|
| 55 |
+
name: ttH_CP_odd
|
| 56 |
+
label: 1
|
| 57 |
+
file_names: ttH_CPodd.root
|
root_gnn_dgl/configs/stats_all/ttH_CP_even_vs_odd_batch_size_8192.yaml
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Training_Name: ttH_CP_even_vs_odd_batch_size_8192
|
| 2 |
+
Training_Directory: trainings/stats_all/ttH_CP_even_vs_odd_batch_size_8192
|
| 3 |
+
Model:
|
| 4 |
+
module: models.GCN
|
| 5 |
+
class: Edge_Network
|
| 6 |
+
args:
|
| 7 |
+
hid_size: 64
|
| 8 |
+
in_size: 7
|
| 9 |
+
out_size: 1
|
| 10 |
+
n_layers: 4
|
| 11 |
+
n_proc_steps: 4
|
| 12 |
+
dropout: 0
|
| 13 |
+
Training:
|
| 14 |
+
epochs: 500
|
| 15 |
+
batch_size: 8192
|
| 16 |
+
learning_rate: 0.0001
|
| 17 |
+
gamma: 0.99
|
| 18 |
+
Datasets:
|
| 19 |
+
ttH_CP_even: &dataset_defn
|
| 20 |
+
module: root_gnn_base.dataset
|
| 21 |
+
class: LazyDataset
|
| 22 |
+
shuffle_chunks: 10
|
| 23 |
+
batch_size: 8192
|
| 24 |
+
padding_mode: NONE #one of STEPS, FIXED, or NONE
|
| 25 |
+
args: &dataset_args
|
| 26 |
+
name: ttH_CP_even
|
| 27 |
+
label: 0
|
| 28 |
+
# weight_var: weight
|
| 29 |
+
chunks: 10
|
| 30 |
+
buffer_size: 3
|
| 31 |
+
file_names: ttH_NLO.root
|
| 32 |
+
tree_name: output
|
| 33 |
+
fold_var: Number
|
| 34 |
+
raw_dir: /global/cfs/projectdirs/trn007/lbl_atlas/data/stats_all/
|
| 35 |
+
save_dir: /global/cfs/projectdirs/trn007/lbl_atlas/data/processed_graphs/stats_all/ttH_CP_even_vs_odd_batch_size_8192/
|
| 36 |
+
node_branch_names:
|
| 37 |
+
- [jet_pt, ele_pt, mu_pt, ph_pt, MET_met]
|
| 38 |
+
- [jet_eta, ele_eta, mu_eta, ph_eta, 0]
|
| 39 |
+
- [jet_phi, ele_phi, mu_phi, ph_phi, MET_phi]
|
| 40 |
+
- CALC_E
|
| 41 |
+
- [jet_btag, 0, 0, 0, 0]
|
| 42 |
+
- [0, ele_charge, mu_charge, 0, 0]
|
| 43 |
+
- NODE_TYPE
|
| 44 |
+
node_branch_types: [vector, vector, vector, vector, single]
|
| 45 |
+
node_feature_scales: [1e-1, 1, 1, 1e-1, 1, 1, 1]
|
| 46 |
+
folding:
|
| 47 |
+
n_folds: 4
|
| 48 |
+
test: [0]
|
| 49 |
+
# validation: 1
|
| 50 |
+
train: [1, 2, 3]
|
| 51 |
+
ttH_CP_odd:
|
| 52 |
+
<<: *dataset_defn
|
| 53 |
+
args:
|
| 54 |
+
<<: *dataset_args
|
| 55 |
+
name: ttH_CP_odd
|
| 56 |
+
label: 1
|
| 57 |
+
file_names: ttH_CPodd.root
|
root_gnn_dgl/jobs/prep_data/run_processing.py
CHANGED
|
@@ -77,9 +77,12 @@ def main():
|
|
| 77 |
configs = [
|
| 78 |
# "configs/stats_100K/pretraining_multiclass.yaml",
|
| 79 |
# "configs/stats_100K/ttH_CP_even_vs_odd.yaml",
|
| 80 |
-
"configs/stats_all/pretraining_multiclass.yaml",
|
| 81 |
-
"configs/stats_all/ttH_CP_even_vs_odd.yaml",
|
| 82 |
# "configs/attention/ttH_CP_even_vs_odd.yaml",
|
|
|
|
|
|
|
|
|
|
| 83 |
]
|
| 84 |
|
| 85 |
# Path to the bash script to be called
|
|
|
|
| 77 |
configs = [
|
| 78 |
# "configs/stats_100K/pretraining_multiclass.yaml",
|
| 79 |
# "configs/stats_100K/ttH_CP_even_vs_odd.yaml",
|
| 80 |
+
# "configs/stats_all/pretraining_multiclass.yaml",
|
| 81 |
+
# "configs/stats_all/ttH_CP_even_vs_odd.yaml",
|
| 82 |
# "configs/attention/ttH_CP_even_vs_odd.yaml",
|
| 83 |
+
"configs/stats_all/ttH_CP_even_vs_odd_batch_size_2048.yaml",
|
| 84 |
+
"configs/stats_all/ttH_CP_even_vs_odd_batch_size_4096.yaml",
|
| 85 |
+
"configs/stats_all/ttH_CP_even_vs_odd_batch_size_8192.yaml",
|
| 86 |
]
|
| 87 |
|
| 88 |
# Path to the bash script to be called
|
root_gnn_dgl/jobs/training/singlegpu/run_job.sh
CHANGED
|
@@ -5,11 +5,11 @@
|
|
| 5 |
#SBATCH --mail-user=ho22joshua@berkeley.edu
|
| 6 |
#SBATCH --mail-type=ALL
|
| 7 |
#SBATCH -t 15:00:00
|
| 8 |
-
#SBATCH -A
|
| 9 |
-
#SBATCH -o /global/cfs/projectdirs/atlas/joshua/
|
| 10 |
|
| 11 |
ARGUEMENTS="$*"
|
| 12 |
|
| 13 |
echo "Arguements: $ARGUEMENTS"
|
| 14 |
echo "launching image"
|
| 15 |
-
source launch_image.sh "--entrypoint /global/cfs/projectdirs/atlas/joshua/
|
|
|
|
| 5 |
#SBATCH --mail-user=ho22joshua@berkeley.edu
|
| 6 |
#SBATCH --mail-type=ALL
|
| 7 |
#SBATCH -t 15:00:00
|
| 8 |
+
#SBATCH -A trn007
|
| 9 |
+
#SBATCH -o /global/cfs/projectdirs/atlas/joshua/GNN4Colliders/root_gnn_dgl/jobs/slurm/%j.out # STDOUT
|
| 10 |
|
| 11 |
ARGUEMENTS="$*"
|
| 12 |
|
| 13 |
echo "Arguements: $ARGUEMENTS"
|
| 14 |
echo "launching image"
|
| 15 |
+
source /global/homes/j/joshuaho/launch_image.sh "--entrypoint /global/cfs/projectdirs/atlas/joshua/GNN4Colliders/root_gnn_dgl/jobs/training/singlegpu/run_job_image.sh" $ARGUEMENTS
|
root_gnn_dgl/jobs/training/singlegpu/run_job_image.sh
CHANGED
|
@@ -4,17 +4,8 @@ CONFIG=$1
|
|
| 4 |
shift
|
| 5 |
ARGUEMENTS="$*"
|
| 6 |
|
| 7 |
-
DIRECTORY="/global/cfs/projectdirs/atlas/joshua/
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
echo "launched image"
|
| 11 |
-
cd /global/cfs/projectdirs/atlas/joshua/root_gnn/root_gnn_dgl/
|
| 12 |
-
|
| 13 |
-
COMMAND="$BASE_COMMAND$CONFIG"
|
| 14 |
-
|
| 15 |
-
eval "$(conda shell.bash hook)"
|
| 16 |
-
conda init bash
|
| 17 |
-
conda activate /opt/conda/envs/dgl
|
| 18 |
|
| 19 |
echo "Running my script now"
|
| 20 |
echo $COMMAND
|
|
|
|
| 4 |
shift
|
| 5 |
ARGUEMENTS="$*"
|
| 6 |
|
| 7 |
+
DIRECTORY="/global/cfs/projectdirs/atlas/joshua/GNN4Colliders/root_gnn_dgl/"
|
| 8 |
+
COMMAND="$DIRECTORY"scripts/training_script.py $ARGUEMENTS --preshuffle --nocompile --lazy --config $DIRECTORY$CONFIG
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
echo "Running my script now"
|
| 11 |
echo $COMMAND
|
root_gnn_dgl/jobs/training/singlegpu/submit.sh
CHANGED
|
@@ -3,7 +3,10 @@ date
|
|
| 3 |
DIRECTORY="/global/cfs/projectdirs/atlas/joshua/root_gnn/root_gnn_dgl/configs/model_configs/"
|
| 4 |
|
| 5 |
configs=(
|
| 6 |
-
"
|
|
|
|
|
|
|
|
|
|
| 7 |
)
|
| 8 |
|
| 9 |
counter=0
|
|
|
|
| 3 |
DIRECTORY="/global/cfs/projectdirs/atlas/joshua/root_gnn/root_gnn_dgl/configs/model_configs/"
|
| 4 |
|
| 5 |
configs=(
|
| 6 |
+
"configs/stats_all/ttH_CP_even_vs_odd.yaml"
|
| 7 |
+
"configs/stats_all/ttH_CP_even_vs_odd_batch_size_2048.yaml"
|
| 8 |
+
"configs/stats_all/ttH_CP_even_vs_odd_batch_size_4096.yaml"
|
| 9 |
+
"configs/stats_all/ttH_CP_even_vs_odd_batch_size_8192.yaml"
|
| 10 |
)
|
| 11 |
|
| 12 |
counter=0
|
root_gnn_dgl/profile.sh
CHANGED
|
@@ -1,35 +1,35 @@
|
|
| 1 |
nsys profile \
|
| 2 |
-
-o /pscratch/sd/j/joshuaho/
|
| 3 |
--capture-range=cudaProfilerApi \
|
| 4 |
-
--
|
| 5 |
--force-overwrite true \
|
| 6 |
--trace=nvtx \
|
| 7 |
--cudabacktrace=all \
|
| 8 |
-
python scripts/training_script.py --config configs/
|
| 9 |
|
| 10 |
nsys profile \
|
| 11 |
-
-o /pscratch/sd/j/joshuaho/
|
| 12 |
--capture-range=cudaProfilerApi \
|
| 13 |
-
--
|
| 14 |
--force-overwrite true \
|
| 15 |
--trace=nvtx \
|
| 16 |
--cudabacktrace=all \
|
| 17 |
-
python scripts/training_script.py --config configs/
|
| 18 |
|
| 19 |
nsys profile \
|
| 20 |
-
-o /pscratch/sd/j/joshuaho/
|
| 21 |
--capture-range=cudaProfilerApi \
|
| 22 |
-
--
|
| 23 |
-
--force-overwrite
|
| 24 |
--trace=nvtx \
|
| 25 |
--cudabacktrace=all \
|
| 26 |
-
python scripts/training_script.py --config configs/
|
| 27 |
|
| 28 |
nsys profile \
|
| 29 |
-
-o /pscratch/sd/j/joshuaho/
|
| 30 |
--capture-range=cudaProfilerApi \
|
| 31 |
-
--
|
| 32 |
--force-overwrite true \
|
| 33 |
--trace=nvtx \
|
| 34 |
--cudabacktrace=all \
|
| 35 |
-
python scripts/training_script.py --config configs/
|
|
|
|
| 1 |
nsys profile \
|
| 2 |
+
-o /pscratch/sd/j/joshuaho/full_stats_profile_1_gpu_batch_size_1028 \
|
| 3 |
--capture-range=cudaProfilerApi \
|
| 4 |
+
--duration=100 \
|
| 5 |
--force-overwrite true \
|
| 6 |
--trace=nvtx \
|
| 7 |
--cudabacktrace=all \
|
| 8 |
+
python scripts/training_script.py --config configs/stats_all/ttH_CP_even_vs_odd.yaml --preshuffle --nocompile --lazy --restart --profile
|
| 9 |
|
| 10 |
nsys profile \
|
| 11 |
+
-o /pscratch/sd/j/joshuaho/full_stats_profile_1_gpu_batch_size_2048 \
|
| 12 |
--capture-range=cudaProfilerApi \
|
| 13 |
+
--duration=100 \
|
| 14 |
--force-overwrite true \
|
| 15 |
--trace=nvtx \
|
| 16 |
--cudabacktrace=all \
|
| 17 |
+
python scripts/training_script.py --config configs/stats_all/ttH_CP_even_vs_odd_batch_size_2048.yaml --preshuffle --nocompile --lazy --restart --profile
|
| 18 |
|
| 19 |
nsys profile \
|
| 20 |
+
-o /pscratch/sd/j/joshuaho/full_stats_profile_1_gpu_batch_size_4096 \
|
| 21 |
--capture-range=cudaProfilerApi \
|
| 22 |
+
--duration=100 \
|
| 23 |
+
--force-overwrite=true \
|
| 24 |
--trace=nvtx \
|
| 25 |
--cudabacktrace=all \
|
| 26 |
+
python scripts/training_script.py --config configs/stats_all/ttH_CP_even_vs_odd_batch_size_4096.yaml --preshuffle --nocompile --lazy --restart --profile
|
| 27 |
|
| 28 |
nsys profile \
|
| 29 |
+
-o /pscratch/sd/j/joshuaho/full_stats_profile_1_gpu_batch_size_8192 \
|
| 30 |
--capture-range=cudaProfilerApi \
|
| 31 |
+
--duration=100 \
|
| 32 |
--force-overwrite true \
|
| 33 |
--trace=nvtx \
|
| 34 |
--cudabacktrace=all \
|
| 35 |
+
python scripts/training_script.py --config configs/stats_all/ttH_CP_even_vs_odd_batch_size_8192.yaml --preshuffle --nocompile --lazy --restart --profile
|
root_gnn_dgl/root_gnn_base/dataset.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
from dgl.data import DGLDataset
|
| 2 |
import dgl
|
| 3 |
-
import
|
|
|
|
| 4 |
import torch
|
| 5 |
import os
|
| 6 |
import glob
|
|
@@ -14,7 +15,7 @@ def node_features_from_tree(ch, node_branch_names, node_branch_types, node_featu
|
|
| 14 |
if node_type == 'single':
|
| 15 |
lengths.append(1)
|
| 16 |
elif node_type == 'vector':
|
| 17 |
-
lengths.append(len(
|
| 18 |
else:
|
| 19 |
print('Unknown node branch type: {}'.format(node_type))
|
| 20 |
features = []
|
|
@@ -38,16 +39,14 @@ def node_features_from_tree(ch, node_branch_names, node_branch_types, node_featu
|
|
| 38 |
this_type_ends_at = sum(lengths[:itype+1])
|
| 39 |
feat.extend(features[0][this_type_starts_at:this_type_ends_at]*torch.cosh(features[1][this_type_starts_at:this_type_ends_at]))
|
| 40 |
elif node_type == 'single':
|
| 41 |
-
feat.append(
|
| 42 |
elif node_type == 'vector':
|
| 43 |
-
feat.extend(
|
| 44 |
itype += 1
|
| 45 |
features.append(torch.tensor(feat))
|
| 46 |
return torch.stack(features, dim=1) * node_feature_scales, lengths
|
| 47 |
|
| 48 |
def full_connected_graph(n_nodes, self_loops=True):
|
| 49 |
-
senders = []
|
| 50 |
-
receivers = []
|
| 51 |
senders = np.arange(n_nodes*n_nodes) // n_nodes
|
| 52 |
receivers = np.arange(n_nodes*n_nodes) % n_nodes
|
| 53 |
if not self_loops and n_nodes > 1:
|
|
@@ -59,19 +58,18 @@ def full_connected_graph(n_nodes, self_loops=True):
|
|
| 59 |
def check_selection(ch, selection):
|
| 60 |
var, cut, op = selection
|
| 61 |
if op == '>':
|
| 62 |
-
return
|
| 63 |
elif op == '<':
|
| 64 |
-
return
|
| 65 |
elif op == '==':
|
| 66 |
-
return
|
| 67 |
-
|
| 68 |
def check_selections(ch, selections):
|
| 69 |
for selection in selections:
|
| 70 |
if not check_selection(ch, selection):
|
| 71 |
return False
|
| 72 |
return True
|
| 73 |
|
| 74 |
-
#Base dataset class for making graphs from ROOT ntuples.
|
| 75 |
class RootDataset(DGLDataset):
|
| 76 |
def __init__(self, name=None, raw_dir=None, save_dir=None, label=1, file_names = '*.root', node_branch_names=None, node_branch_types=None, node_feature_scales=None,
|
| 77 |
selections=[], save=True, tree_name = 'nominal_Loose', fold_var = 'eventNumber', weight_var = None, chunks = 1, process_chunks = None, global_features = [], tracking_info = [], **kwargs):
|
|
@@ -88,7 +86,7 @@ class RootDataset(DGLDataset):
|
|
| 88 |
self.fold_var = fold_var
|
| 89 |
self.tracking_info = tracking_info
|
| 90 |
self.tracking_info.insert(0, fold_var)
|
| 91 |
-
if weight_var
|
| 92 |
weight_var = 1
|
| 93 |
self.tracking_info.insert(1, weight_var)
|
| 94 |
self.global_features = global_features
|
|
@@ -116,7 +114,7 @@ class RootDataset(DGLDataset):
|
|
| 116 |
branches.append(feat)
|
| 117 |
for selection in self.selections:
|
| 118 |
branches.append(selection[0])
|
| 119 |
-
return branches
|
| 120 |
|
| 121 |
def make_graph(self, ch):
|
| 122 |
t1 = time.time()
|
|
@@ -129,7 +127,7 @@ class RootDataset(DGLDataset):
|
|
| 129 |
self.times[0] += t2 - t1
|
| 130 |
self.times[1] += t3 - t2
|
| 131 |
return g
|
| 132 |
-
|
| 133 |
def process(self):
|
| 134 |
times = [0, 0, 0]
|
| 135 |
oldtime = time.time()
|
|
@@ -139,21 +137,21 @@ class RootDataset(DGLDataset):
|
|
| 139 |
self.files = []
|
| 140 |
for file_name in self.file_names:
|
| 141 |
self.files.extend(glob.glob(os.path.join(self.raw_dir, file_name)))
|
| 142 |
-
|
| 143 |
|
| 144 |
-
|
| 145 |
-
|
| 146 |
for file in self.files:
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
newtime = time.time()
|
| 155 |
times[0] += newtime - oldtime
|
| 156 |
-
chunks = np.array_split(np.arange(
|
| 157 |
chunks = [chunk for i, chunk in enumerate(chunks) if i in self.process_chunks]
|
| 158 |
|
| 159 |
self.graph_chunks = []
|
|
@@ -170,28 +168,28 @@ class RootDataset(DGLDataset):
|
|
| 170 |
globals = []
|
| 171 |
for ientry in chunk:
|
| 172 |
if (ientry % 10000 == 0):
|
| 173 |
-
print('Processing event {}/{}'.format(ientry,
|
| 174 |
-
|
| 175 |
passed = True
|
| 176 |
for selection in self.selections:
|
| 177 |
-
if not check_selection(
|
| 178 |
passed = False
|
| 179 |
continue
|
| 180 |
oldtime = newtime
|
| 181 |
newtime = time.time()
|
| 182 |
times[1] += newtime - oldtime
|
| 183 |
if passed:
|
| 184 |
-
graphs.append(self.make_graph(
|
| 185 |
-
labels.append(
|
| 186 |
tracking.append(torch.zeros(len(self.tracking_info), dtype=torch.double))
|
| 187 |
globals.append(torch.zeros(len(self.global_features)))
|
| 188 |
for i_ti, tr_branch in enumerate(self.tracking_info):
|
| 189 |
if isinstance(tr_branch, str):
|
| 190 |
-
tracking[-1][i_ti] =
|
| 191 |
else:
|
| 192 |
tracking[-1][i_ti] = tr_branch
|
| 193 |
for i_gl, gl_branch in enumerate(self.global_features):
|
| 194 |
-
globals[-1][i_gl] =
|
| 195 |
oldtime = newtime
|
| 196 |
newtime = time.time()
|
| 197 |
times[2] += newtime - oldtime
|
|
@@ -215,17 +213,8 @@ class RootDataset(DGLDataset):
|
|
| 215 |
self.graphs = graphs
|
| 216 |
self.save()
|
| 217 |
return
|
| 218 |
-
|
| 219 |
-
for chunk in self.graph_chunks[1:]:
|
| 220 |
-
self.graphs += chunk
|
| 221 |
-
self.labels = torch.cat(self.label_chunks)
|
| 222 |
-
self.tracking = torch.cat(self.tracking_chunks)
|
| 223 |
-
self.global_features = torch.cat(self.global_chunks)
|
| 224 |
-
print('Time spent: Creating TChain: {}s, Getting Entries and Selection: {}s, Graph Creation: {}s'.format(*times))
|
| 225 |
-
print('Time spent in node_features_from_tree: {}s, full_connected_graph: {}s'.format(*self.times))
|
| 226 |
-
|
| 227 |
def save(self):
|
| 228 |
-
"""save the graph list and the labels"""
|
| 229 |
if not self.save_to_disk:
|
| 230 |
return
|
| 231 |
graph_path = os.path.join(self.save_dir, self.name + '.bin')
|
|
@@ -244,7 +233,7 @@ class RootDataset(DGLDataset):
|
|
| 244 |
graph_path = os.path.join(self.save_dir, self.name + '.bin')
|
| 245 |
print(f'Saving dataset to {os.path.join(self.save_dir, self.name + f"_{self.process_chunks[chunk_id]}.bin")}')
|
| 246 |
dgl.save_graphs(str(graph_path).replace('.bin', f'_{self.process_chunks[chunk_id]}.bin'), graphs, {'labels': labels, 'tracking': tracking, 'global': globals})
|
| 247 |
-
|
| 248 |
def has_cache(self):
|
| 249 |
print(f'Checking for cache of {self.name}')
|
| 250 |
if not self.save_to_disk:
|
|
@@ -293,7 +282,7 @@ class RootDataset(DGLDataset):
|
|
| 293 |
|
| 294 |
def __len__(self):
|
| 295 |
return len(self.graphs)
|
| 296 |
-
|
| 297 |
#Dataset with edge features added (deta, dphi, dR)
|
| 298 |
class EdgeDataset(RootDataset):
|
| 299 |
def make_graph(self, ch):
|
|
|
|
| 1 |
from dgl.data import DGLDataset
|
| 2 |
import dgl
|
| 3 |
+
import uproot
|
| 4 |
+
import awkward as ak
|
| 5 |
import torch
|
| 6 |
import os
|
| 7 |
import glob
|
|
|
|
| 15 |
if node_type == 'single':
|
| 16 |
lengths.append(1)
|
| 17 |
elif node_type == 'vector':
|
| 18 |
+
lengths.append(len(ch[branch]))
|
| 19 |
else:
|
| 20 |
print('Unknown node branch type: {}'.format(node_type))
|
| 21 |
features = []
|
|
|
|
| 39 |
this_type_ends_at = sum(lengths[:itype+1])
|
| 40 |
feat.extend(features[0][this_type_starts_at:this_type_ends_at]*torch.cosh(features[1][this_type_starts_at:this_type_ends_at]))
|
| 41 |
elif node_type == 'single':
|
| 42 |
+
feat.append(ch[branch])
|
| 43 |
elif node_type == 'vector':
|
| 44 |
+
feat.extend(ch[branch])
|
| 45 |
itype += 1
|
| 46 |
features.append(torch.tensor(feat))
|
| 47 |
return torch.stack(features, dim=1) * node_feature_scales, lengths
|
| 48 |
|
| 49 |
def full_connected_graph(n_nodes, self_loops=True):
|
|
|
|
|
|
|
| 50 |
senders = np.arange(n_nodes*n_nodes) // n_nodes
|
| 51 |
receivers = np.arange(n_nodes*n_nodes) % n_nodes
|
| 52 |
if not self_loops and n_nodes > 1:
|
|
|
|
| 58 |
def check_selection(ch, selection):
|
| 59 |
var, cut, op = selection
|
| 60 |
if op == '>':
|
| 61 |
+
return ch[var] > cut
|
| 62 |
elif op == '<':
|
| 63 |
+
return ch[var] < cut
|
| 64 |
elif op == '==':
|
| 65 |
+
return ch[var] == cut
|
| 66 |
+
|
| 67 |
def check_selections(ch, selections):
|
| 68 |
for selection in selections:
|
| 69 |
if not check_selection(ch, selection):
|
| 70 |
return False
|
| 71 |
return True
|
| 72 |
|
|
|
|
| 73 |
class RootDataset(DGLDataset):
|
| 74 |
def __init__(self, name=None, raw_dir=None, save_dir=None, label=1, file_names = '*.root', node_branch_names=None, node_branch_types=None, node_feature_scales=None,
|
| 75 |
selections=[], save=True, tree_name = 'nominal_Loose', fold_var = 'eventNumber', weight_var = None, chunks = 1, process_chunks = None, global_features = [], tracking_info = [], **kwargs):
|
|
|
|
| 86 |
self.fold_var = fold_var
|
| 87 |
self.tracking_info = tracking_info
|
| 88 |
self.tracking_info.insert(0, fold_var)
|
| 89 |
+
if weight_var is None:
|
| 90 |
weight_var = 1
|
| 91 |
self.tracking_info.insert(1, weight_var)
|
| 92 |
self.global_features = global_features
|
|
|
|
| 114 |
branches.append(feat)
|
| 115 |
for selection in self.selections:
|
| 116 |
branches.append(selection[0])
|
| 117 |
+
return list(set(branches)) # Remove duplicates
|
| 118 |
|
| 119 |
def make_graph(self, ch):
|
| 120 |
t1 = time.time()
|
|
|
|
| 127 |
self.times[0] += t2 - t1
|
| 128 |
self.times[1] += t3 - t2
|
| 129 |
return g
|
| 130 |
+
|
| 131 |
def process(self):
|
| 132 |
times = [0, 0, 0]
|
| 133 |
oldtime = time.time()
|
|
|
|
| 137 |
self.files = []
|
| 138 |
for file_name in self.file_names:
|
| 139 |
self.files.extend(glob.glob(os.path.join(self.raw_dir, file_name)))
|
| 140 |
+
branches = self.get_list_of_branches()
|
| 141 |
|
| 142 |
+
# Read all files and concatenate arrays
|
| 143 |
+
arrays = []
|
| 144 |
for file in self.files:
|
| 145 |
+
with uproot.open(file) as f:
|
| 146 |
+
arrays.append(f[self.tree_name].arrays(branches, library="ak"))
|
| 147 |
+
if len(arrays) == 0:
|
| 148 |
+
print('No files found in {}'.format(os.path.join(self.raw_dir, self.file_names)))
|
| 149 |
+
return
|
| 150 |
+
data = ak.concatenate(arrays, axis=0)
|
| 151 |
+
n_entries = len(data[branches[0]])
|
| 152 |
newtime = time.time()
|
| 153 |
times[0] += newtime - oldtime
|
| 154 |
+
chunks = np.array_split(np.arange(n_entries), self.chunks)
|
| 155 |
chunks = [chunk for i, chunk in enumerate(chunks) if i in self.process_chunks]
|
| 156 |
|
| 157 |
self.graph_chunks = []
|
|
|
|
| 168 |
globals = []
|
| 169 |
for ientry in chunk:
|
| 170 |
if (ientry % 10000 == 0):
|
| 171 |
+
print('Processing event {}/{}'.format(ientry, n_entries), flush=True)
|
| 172 |
+
ch = {b: data[b][ientry] for b in branches}
|
| 173 |
passed = True
|
| 174 |
for selection in self.selections:
|
| 175 |
+
if not check_selection(ch, selection):
|
| 176 |
passed = False
|
| 177 |
continue
|
| 178 |
oldtime = newtime
|
| 179 |
newtime = time.time()
|
| 180 |
times[1] += newtime - oldtime
|
| 181 |
if passed:
|
| 182 |
+
graphs.append(self.make_graph(ch))
|
| 183 |
+
labels.append(self.label)
|
| 184 |
tracking.append(torch.zeros(len(self.tracking_info), dtype=torch.double))
|
| 185 |
globals.append(torch.zeros(len(self.global_features)))
|
| 186 |
for i_ti, tr_branch in enumerate(self.tracking_info):
|
| 187 |
if isinstance(tr_branch, str):
|
| 188 |
+
tracking[-1][i_ti] = ch[tr_branch]
|
| 189 |
else:
|
| 190 |
tracking[-1][i_ti] = tr_branch
|
| 191 |
for i_gl, gl_branch in enumerate(self.global_features):
|
| 192 |
+
globals[-1][i_gl] = ch[gl_branch]
|
| 193 |
oldtime = newtime
|
| 194 |
newtime = time.time()
|
| 195 |
times[2] += newtime - oldtime
|
|
|
|
| 213 |
self.graphs = graphs
|
| 214 |
self.save()
|
| 215 |
return
|
| 216 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
def save(self):
|
|
|
|
| 218 |
if not self.save_to_disk:
|
| 219 |
return
|
| 220 |
graph_path = os.path.join(self.save_dir, self.name + '.bin')
|
|
|
|
| 233 |
graph_path = os.path.join(self.save_dir, self.name + '.bin')
|
| 234 |
print(f'Saving dataset to {os.path.join(self.save_dir, self.name + f"_{self.process_chunks[chunk_id]}.bin")}')
|
| 235 |
dgl.save_graphs(str(graph_path).replace('.bin', f'_{self.process_chunks[chunk_id]}.bin'), graphs, {'labels': labels, 'tracking': tracking, 'global': globals})
|
| 236 |
+
|
| 237 |
def has_cache(self):
|
| 238 |
print(f'Checking for cache of {self.name}')
|
| 239 |
if not self.save_to_disk:
|
|
|
|
| 282 |
|
| 283 |
def __len__(self):
|
| 284 |
return len(self.graphs)
|
| 285 |
+
|
| 286 |
#Dataset with edge features added (deta, dphi, dR)
|
| 287 |
class EdgeDataset(RootDataset):
|
| 288 |
def make_graph(self, ch):
|
root_gnn_dgl/scripts/inference.py
CHANGED
|
@@ -135,7 +135,6 @@ def main():
|
|
| 135 |
|
| 136 |
import time
|
| 137 |
start = time.time()
|
| 138 |
-
import ROOT
|
| 139 |
import torch
|
| 140 |
from array import array
|
| 141 |
import numpy as np
|
|
@@ -247,55 +246,41 @@ def main():
|
|
| 247 |
all_labels[branch] = labels
|
| 248 |
all_tracking[branch] = tracking_info
|
| 249 |
|
| 250 |
-
if args.write:
|
| 251 |
-
from ROOT import std
|
| 252 |
-
# Open the original ROOT file
|
| 253 |
-
infile = ROOT.TFile.Open(args.target)
|
| 254 |
-
tree = infile.Get(dset_config['args']['tree_name'])
|
| 255 |
|
| 256 |
-
|
| 257 |
-
|
|
|
|
| 258 |
|
| 259 |
-
#
|
| 260 |
-
|
|
|
|
| 261 |
|
| 262 |
-
#
|
| 263 |
-
|
| 264 |
|
| 265 |
-
#
|
| 266 |
-
|
|
|
|
| 267 |
for branch, scores in all_scores.items():
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
for value in scores[i]:
|
| 288 |
-
branch_data.push_back(float(value))
|
| 289 |
-
|
| 290 |
-
outtree.Fill()
|
| 291 |
-
|
| 292 |
-
# Write the modified tree to the new file
|
| 293 |
-
print(f'Writing to file {args.destination}')
|
| 294 |
-
print(f'Input entries: {tree.GetEntries()}, Output entries: {outtree.GetEntries()}')
|
| 295 |
-
print(f'Wrote scores to {args.branch_name}')
|
| 296 |
-
outtree.Write()
|
| 297 |
-
outfile.Close()
|
| 298 |
-
infile.Close()
|
| 299 |
else:
|
| 300 |
os.makedirs(os.path.split(args.destination)[0], exist_ok=True)
|
| 301 |
np.savez(args.destination, scores=all_scores, labels=all_labels, tracking_info=all_tracking)
|
|
|
|
| 135 |
|
| 136 |
import time
|
| 137 |
start = time.time()
|
|
|
|
| 138 |
import torch
|
| 139 |
from array import array
|
| 140 |
import numpy as np
|
|
|
|
| 246 |
all_labels[branch] = labels
|
| 247 |
all_tracking[branch] = tracking_info
|
| 248 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
|
| 250 |
+
if args.write:
|
| 251 |
+
import uproot
|
| 252 |
+
import awkward as ak
|
| 253 |
|
| 254 |
+
# Open the original ROOT file and get the tree
|
| 255 |
+
infile = uproot.open(args.target)
|
| 256 |
+
tree = infile[dset_config['args']['tree_name']]
|
| 257 |
|
| 258 |
+
# Read the original tree as an awkward array
|
| 259 |
+
original_data = tree.arrays(library="ak")
|
| 260 |
|
| 261 |
+
# Prepare new branches as dicts of arrays
|
| 262 |
+
new_branches = {}
|
| 263 |
+
n_entries = len(original_data)
|
| 264 |
for branch, scores in all_scores.items():
|
| 265 |
+
# Ensure the scores array is the right length
|
| 266 |
+
scores = np.asarray(scores)
|
| 267 |
+
if scores.shape[0] != n_entries:
|
| 268 |
+
raise ValueError(f"Branch '{branch}' has {scores.shape[0]} entries, but tree has {n_entries}")
|
| 269 |
+
new_branches[branch] = scores
|
| 270 |
+
|
| 271 |
+
# Merge all arrays (original + new branches)
|
| 272 |
+
# Convert awkward to dict of numpy arrays for uproot
|
| 273 |
+
out_dict = {k: np.asarray(v) for k, v in ak.to_numpy(original_data).items()}
|
| 274 |
+
out_dict.update(new_branches)
|
| 275 |
+
|
| 276 |
+
# Write to new ROOT file
|
| 277 |
+
os.makedirs(os.path.split(args.destination)[0], exist_ok=True)
|
| 278 |
+
with uproot.recreate(args.destination) as outfile:
|
| 279 |
+
outfile.mktree(dset_config['args']['tree_name'], {k: v.dtype for k, v in out_dict.items()})
|
| 280 |
+
outfile[dset_config['args']['tree_name']].extend(out_dict)
|
| 281 |
+
|
| 282 |
+
print(f"Wrote new ROOT file {args.destination} with new branches {list(new_branches.keys())}")
|
| 283 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 284 |
else:
|
| 285 |
os.makedirs(os.path.split(args.destination)[0], exist_ok=True)
|
| 286 |
np.savez(args.destination, scores=all_scores, labels=all_labels, tracking_info=all_tracking)
|
root_gnn_dgl/scripts/training_script.py
CHANGED
|
@@ -264,13 +264,9 @@ def train(train_loaders, test_loaders, model, device, config, args, rank):
|
|
| 264 |
start = time.time()
|
| 265 |
run = start
|
| 266 |
if (args.profile):
|
| 267 |
-
if (epoch ==
|
| 268 |
torch.cuda.cudart().cudaProfilerStart()
|
| 269 |
-
if (epoch == 3):
|
| 270 |
-
print("Done profiling")
|
| 271 |
-
torch.cuda.cudart().cudaProfilerStop()
|
| 272 |
torch.cuda.nvtx.range_push("Epoch Start")
|
| 273 |
-
print("executed push")
|
| 274 |
|
| 275 |
if (args.multigpu or args.multinode):
|
| 276 |
dist.barrier()
|
|
@@ -608,7 +604,6 @@ def train(train_loaders, test_loaders, model, device, config, args, rank):
|
|
| 608 |
|
| 609 |
if (args.profile):
|
| 610 |
torch.cuda.nvtx.range_pop() # pop epoch
|
| 611 |
-
print("executed pop")
|
| 612 |
|
| 613 |
print(f"Load: {cumulative_times[0]:.4f} s")
|
| 614 |
print(f"Batch: {cumulative_times[1]:.4f} s")
|
|
|
|
| 264 |
start = time.time()
|
| 265 |
run = start
|
| 266 |
if (args.profile):
|
| 267 |
+
if (epoch == 0):
|
| 268 |
torch.cuda.cudart().cudaProfilerStart()
|
|
|
|
|
|
|
|
|
|
| 269 |
torch.cuda.nvtx.range_push("Epoch Start")
|
|
|
|
| 270 |
|
| 271 |
if (args.multigpu or args.multinode):
|
| 272 |
dist.barrier()
|
|
|
|
| 604 |
|
| 605 |
if (args.profile):
|
| 606 |
torch.cuda.nvtx.range_pop() # pop epoch
|
|
|
|
| 607 |
|
| 608 |
print(f"Load: {cumulative_times[0]:.4f} s")
|
| 609 |
print(f"Batch: {cumulative_times[1]:.4f} s")
|
root_gnn_dgl/setup/Dockerfile
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM nvcr.io/nvidia/dgl:25.05-py3
|
| 2 |
+
|
| 3 |
+
WORKDIR /global/cfs/projectdirs/atlas/joshua/GNN4Colliders
|
| 4 |
+
|
| 5 |
+
LABEL maintainer.name="Joshua Ho"
|
| 6 |
+
LABEL maintainer.email="ho22joshua@berkeley.edu"
|
| 7 |
+
|
| 8 |
+
ENV LANG=C.UTF-8
|
| 9 |
+
|
| 10 |
+
# Install system dependencies: vim, OpenMPI, and build tools
|
| 11 |
+
RUN apt-get update -qq \
|
| 12 |
+
&& apt-get install -y --no-install-recommends \
|
| 13 |
+
wget lsb-release gnupg software-properties-common \
|
| 14 |
+
vim \
|
| 15 |
+
g++-11 gcc-11 libstdc++-11-dev \
|
| 16 |
+
openmpi-bin openmpi-common libopenmpi-dev \
|
| 17 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 18 |
+
|
| 19 |
+
# Install Python packages: mpi4py and jupyter
|
| 20 |
+
RUN pip install --no-cache-dir mpi4py jupyter uproot
|
| 21 |
+
|
| 22 |
+
# (Optional) Expose Jupyter port
|
| 23 |
+
EXPOSE 8888
|
| 24 |
+
|
| 25 |
+
|
root_gnn_dgl/setup/build_image.sh
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
tag=$1
|
| 2 |
+
echo $tag
|
| 3 |
+
podman-hpc build -t joshuaho/pytorch:$tag --platform linux/amd64 .
|
| 4 |
+
podman-hpc migrate joshuaho/pytorch:$tag
|