added model and training scripts

Browse files

Files changed (16) hide show

LICENSE +21 -0
models/GCN.py +1944 -0
models/__pycache__/GCN.cpython-38.pyc +0 -0
models/__pycache__/loss.cpython-38.pyc +0 -0
models/loss.py +311 -0
root_gnn_base/batched_dataset.py +190 -0
root_gnn_base/custom_scheduler.py +565 -0
root_gnn_base/dataset.py +685 -0
root_gnn_base/photon_ID_dataset.py +44 -0
root_gnn_base/similarity.py +158 -0
root_gnn_base/uproot_dataset.py +54 -0
root_gnn_base/utils.py +307 -0
scripts/find_free_port.py +12 -0
scripts/inference.py +289 -0
scripts/prep_data.py +43 -0
scripts/training_script.py +755 -0

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2025 JO5HO4
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

models/GCN.py ADDED Viewed

	@@ -0,0 +1,1944 @@

+import dgl
+import dgl.nn as dglnn
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import sys
+import os
+file_path = os.getcwd()
+sys.path.append(file_path)
+import root_gnn_base.dataset as datasets
+from root_gnn_base import utils
+import gc
+def Make_SLP(in_size, out_size, activation = nn.ReLU, dropout = 0):
+    layers = []
+    layers.append(nn.Linear(in_size, out_size))
+    layers.append(activation())
+    layers.append(nn.Dropout(dropout))
+    return layers
+def Make_MLP(in_size, hid_size, out_size, n_layers, activation = nn.ReLU, dropout = 0):
+    layers = []
+    if n_layers > 1:
+        layers += Make_SLP(in_size, hid_size, activation, dropout)
+        for i in range(n_layers-2):
+            layers += Make_SLP(hid_size, hid_size, activation, dropout)
+        layers += Make_SLP(hid_size, out_size, activation, dropout)
+    else:
+        layers += Make_SLP(in_size, out_size, activation, dropout)
+    layers.append(torch.nn.LayerNorm(out_size))
+    return nn.Sequential(*layers)
+class MLP(nn.Module):
+    def __init__(self, in_size, hid_size, out_size, n_layers, activation = nn.ReLU, dropout = 0, **kwargs):
+        super().__init__()
+        print(f'Unused args while creating MLP: {kwargs}')
+        self.layers = Make_MLP(in_size, hid_size, hid_size, n_layers-1, activation, dropout)
+        self.linear = nn.Linear(hid_size, out_size)
+    def forward(self, x):
+        return self.linear(self.layers(x))
+def broadcast_global_to_nodes(g, globals):
+    boundaries = g.batch_num_nodes()
+    return torch.repeat_interleave(globals, boundaries, dim=0)
+def broadcast_global_to_edges(g, globals):
+    boundaries = g.batch_num_edges()
+    return torch.repeat_interleave(globals, boundaries, dim=0)
+def copy_v(edges):
+    return {'m_v': edges.dst['h']}
+def partial_reset(model : nn.Module):
+    in_size = len(model.classify.weight[0])
+    out_size = len(model.classify.weight)
+    device = next(model.classify.parameters()).device
+    torch.manual_seed(2)
+    model.classify = nn.Linear(in_size, out_size)
+    model.classify.to(device)
+    print(model.classify.weight)
+def print_model(model: nn.Module):
+    print(model)
+def print_mlp(layer):
+    for l in layer.children():
+        if isinstance(l, nn.Linear):
+            print(l.state_dict())
+        else:
+            print(l)
+# This function returns a model with the whole GNN completely reset
+def full_reset(model : nn.Module):
+    mlp_list = [model.node_encoder, model.edge_encoder, model.global_encoder,
+                model.node_update, model.edge_update, model.global_update,
+                model.global_decoder]
+    for mlp in mlp_list:
+        for layer in mlp.children():
+            if hasattr(layer, 'reset_parameters'):
+                layer.reset_parameters()
+    partial_reset(model)
+class GCN(nn.Module):
+    def __init__(self, in_size, hid_size, out_size, n_layers, **kwargs):
+        super().__init__()
+        print(f'Unused args while creating GCN: {kwargs}')
+        self.n_layers = n_layers
+        self.layers = nn.ModuleList()
+        # two-layer GCN
+        self.layers.extend(
+            [nn.Linear(in_size, hid_size),] +
+            [nn.Linear(hid_size, hid_size) for i in range(n_layers)] +
+            [dglnn.GraphConv(hid_size, hid_size) for i in range(n_layers)] +
+            [nn.Linear(hid_size, hid_size) for i in range(n_layers)]
+        )
+        self.classify = nn.Linear(hid_size, out_size)
+        #self.dropout = nn.Dropout(0.05)
+    def forward(self, g):
+        h = g.ndata['features']
+        for i, layer in enumerate(self.layers):
+            if i >= self.n_layers + 1 and i < self.n_layers * 2 + 1:
+                h = layer(g, h)
+            else:
+                h = layer(h)
+            h = F.relu(h)
+        with g.local_scope():
+            g.ndata['h'] = h
+            # Calculate graph representation by average readout.
+            hg = dgl.mean_nodes(g, 'h')
+            return self.classify(hg)
+class GCN_global(nn.Module):
+    def __init__(self, in_size, hid_size=4, out_size=1, n_layers=1, dropout=0, **kwargs):
+        super().__init__()
+        print(f'Unused args while creating GCN: {kwargs}')
+        self.n_layers = n_layers
+        #encoder
+        self.node_encoder = Make_MLP(in_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.global_encoder = Make_MLP(1, hid_size, hid_size, n_layers, dropout=dropout)
+        #GCN
+        self.node_update = Make_MLP(hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.global_update = Make_MLP(2*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.conv = dglnn.GraphConv(hid_size, hid_size)
+        #decoder
+        self.global_decoder = Make_MLP(hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.classify = nn.Linear(hid_size, out_size)
+    def forward(self, g):
+        h = self.node_encoder(g.ndata['features'])
+        h_global = self.global_encoder(g.batch_num_nodes()[:, None].to(torch.float))
+        for i in range(self.n_layers):
+            h = self.node_update(h)
+            h = self.conv(g, h)
+            g.ndata['h'] = h
+            h_global = self.global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h')), dim = 1))
+        h_global = self.global_decoder(h_global)
+        return self.classify(h_global)
+class GCN_global_2way(nn.Module):
+    def __init__(self, in_size, hid_size=4, out_size=1, n_layers=1, dropout=0, **kwargs):
+        super().__init__()
+        print(f'Unused args while creating GCN: {kwargs}')
+        self.n_layers = n_layers
+        #encoder
+        self.node_encoder = Make_MLP(in_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.global_encoder = Make_MLP(1, hid_size, hid_size, n_layers, dropout=dropout)
+        #GCN
+        self.node_update = Make_MLP(2*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.global_update = Make_MLP(2*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.conv = dglnn.GraphConv(hid_size, hid_size)
+        #decoder
+        self.global_decoder = Make_MLP(hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.classify = nn.Linear(hid_size, out_size)
+    def forward(self, g):
+        h = self.node_encoder(g.ndata['features'])
+        h_global = self.global_encoder(g.batch_num_nodes()[:, None].to(torch.float))
+        for i in range(self.n_layers):
+            h = self.node_update(torch.cat((h, broadcast_global_to_nodes(g, h_global)), dim = 1))
+            h = self.conv(g, h)
+            g.ndata['h'] = h
+            h_global = self.global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h')), dim = 1))
+        h_global = self.global_decoder(h_global)
+        return self.classify(h_global)
+class Edge_Network(nn.Module):
+    def __init__(self, sample_graph, sample_global, hid_size, out_size, n_layers, n_proc_steps, dropout=0, **kwargs):
+        super().__init__()
+        print(f'Unused args while creating GCN: {kwargs}')
+        self.n_layers = n_layers
+        self.n_proc_steps = n_proc_steps
+        self.layers = nn.ModuleList()
+        if (len(sample_global) == 0):
+            self.has_global = False
+        else:
+            self.has_global = sample_global.shape[1] != 0
+        gl_size = sample_global.shape[1] if self.has_global else 1
+        #encoder
+        self.node_encoder = Make_MLP(sample_graph.ndata['features'].shape[1], hid_size, hid_size, n_layers, dropout=dropout)
+        self.edge_encoder = Make_MLP(sample_graph.edata['features'].shape[1], hid_size, hid_size, n_layers, dropout=dropout)
+        self.global_encoder = Make_MLP(gl_size, hid_size, hid_size, n_layers, dropout=dropout)
+        #GNN
+        self.node_update = Make_MLP(3*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.edge_update = Make_MLP(4*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.global_update = Make_MLP(3*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        #decoder
+        self.global_decoder = Make_MLP(hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.classify = nn.Linear(hid_size, out_size)
+    def forward(self, g, global_feats):
+        h = self.node_encoder(g.ndata['features'])
+        e = self.edge_encoder(g.edata['features'])
+        g.ndata['h'] = h
+        g.edata['e'] = e
+        if not self.has_global:
+            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
+        batch_num_nodes = None
+        sum_weights = None
+        if "w" in g.ndata:
+            batch_indices = g.batch_num_nodes()
+            # Find non-zero rows (non-padded nodes)
+            non_padded_nodes_mask = torch.any(g.ndata['features'] != 0, dim=1)
+            # Split the mask according to the batch indices
+            batch_num_nodes = []
+            start_idx = 0
+            for num_nodes in batch_indices:
+                end_idx = start_idx + num_nodes
+                non_padded_count = non_padded_nodes_mask[start_idx:end_idx].sum().item()
+                batch_num_nodes.append(non_padded_count)
+                start_idx = end_idx
+            batch_num_nodes = torch.tensor(batch_num_nodes, device = g.ndata['features'].device)
+            sum_weights = batch_num_nodes[:, None].repeat(1, 64)
+            global_feats = batch_num_nodes[:, None].to(torch.float)
+        h_global = self.global_encoder(global_feats)
+        for i in range(self.n_proc_steps):
+            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
+            g.apply_edges(copy_v)
+            g.edata['e'] = self.edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1))
+            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
+            g.ndata['h'] = self.node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1))
+            if "w" in g.ndata:
+                mean_nodes = dgl.sum_nodes(g, 'h', 'w') / sum_weights
+                h_global = self.global_update(torch.cat((h_global, mean_nodes, dgl.mean_edges(g, 'e')), dim = 1))
+            else:
+                h_global = self.global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1))
+        h_global = self.global_decoder(h_global)
+        return self.classify(h_global)
+    def representation(self, g, global_feats):
+        h = self.node_encoder(g.ndata['features'])
+        e = self.edge_encoder(g.edata['features'])
+        g.ndata['h'] = h
+        g.edata['e'] = e
+        if not self.has_global:
+            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
+        batch_num_nodes = None
+        sum_weights = None
+        if "w" in g.ndata:
+            batch_indices = g.batch_num_nodes()
+            # Find non-zero rows (non-padded nodes)
+            non_padded_nodes_mask = torch.any(g.ndata['features'] != 0, dim=1)
+            # Split the mask according to the batch indices
+            batch_num_nodes = []
+            start_idx = 0
+            for num_nodes in batch_indices:
+                end_idx = start_idx + num_nodes
+                non_padded_count = non_padded_nodes_mask[start_idx:end_idx].sum().item()
+                batch_num_nodes.append(non_padded_count)
+                start_idx = end_idx
+            batch_num_nodes = torch.tensor(batch_num_nodes, device = g.ndata['features'].device)
+            sum_weights = batch_num_nodes[:, None].repeat(1, 64)
+            global_feats = batch_num_nodes[:, None].to(torch.float)
+        h_global = self.global_encoder(global_feats)
+        for i in range(self.n_proc_steps):
+            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
+            g.apply_edges(copy_v)
+            g.edata['e'] = self.edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1))
+            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
+            g.ndata['h'] = self.node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1))
+            if "w" in g.ndata:
+                mean_nodes = dgl.sum_nodes(g, 'h', 'w') / sum_weights
+                h_global = self.global_update(torch.cat((h_global, mean_nodes, dgl.mean_edges(g, 'e')), dim = 1))
+            else:
+                h_global = self.global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1))
+        before_global_decoder = h_global
+        after_global_decoder = self.global_decoder(before_global_decoder)
+        after_classify = self.classify(after_global_decoder)
+        return before_global_decoder, after_global_decoder, after_classify
+    def __str__(self):
+        layer_names = ["node_encoder", "edge_encoder", "global_encoder",
+                  "node_update", "edge_update", "global_update", "global_decoder"]
+        layers = [self.node_encoder, self.edge_encoder, self.global_encoder,
+                  self.node_update, self.edge_update, self.global_update, self.global_decoder]
+        for i in range(len(layers)):
+            print(layer_names[i])
+            for layer in layers[i].children():
+                if isinstance(layer, nn.Linear):
+                    print(layer.state_dict())
+        print("classify")
+        print(self.classify.weight)
+        return ""
+class Transferred_Learning(nn.Module):
+    def __init__(self, pretraining_path, pretraining_model, sample_graph, sample_global, hid_size, out_size, n_layers, n_proc_steps, dropout=0, **kwargs):
+        super().__init__()
+        print(f'Unused args while creating GCN: {kwargs}')
+        self.n_layers = n_layers
+        self.n_proc_steps = n_proc_steps
+        self.layers = nn.ModuleList()
+        if (len(sample_global) == 0):
+            self.has_global = False
+        else:
+            self.has_global = sample_global.shape[1] != 0
+        gl_size = sample_global.shape[1] if self.has_global else 1
+        self.pretrained_model = utils.buildFromConfig(pretraining_model, {'sample_graph': sample_graph, 'sample_global': sample_global})
+        checkpoint = torch.load(pretraining_path)
+        self.pretrained_model.load_state_dict(checkpoint['model_state_dict'])
+        pretrained_layers = list(self.pretrained_model.children())
+        pretrained_layers = pretrained_layers[:-1]
+        self.pretrained_model = nn.Sequential(*pretrained_layers)
+        # Freeze Weights
+        for param in self.pretrained_model.parameters():
+            param.requires_grad = False  # Freeze all layers
+        self.global_decoder = Make_MLP(pretraining_model['args']['hid_size'], hid_size, hid_size, n_layers, dropout=dropout)
+        self.classify = nn.Linear(hid_size, out_size)
+    def TL_node_encoder(self, x):
+        for layer in self.pretrained_model[1]:
+            x = layer(x)
+        return x
+    def TL_edge_encoder(self, x):
+        for layer in self.pretrained_model[2]:
+            x = layer(x)
+        return x
+    def TL_global_encoder(self, x):
+        for layer in self.pretrained_model[3]:
+            x = layer(x)
+        return x
+    def TL_node_update(self, x):
+        for layer in self.pretrained_model[4]:
+            x = layer(x)
+        return x
+    def TL_edge_update(self, x):
+        for layer in self.pretrained_model[5]:
+            x = layer(x)
+        return x
+    def TL_global_update(self, x):
+        for layer in self.pretrained_model[6]:
+            x = layer(x)
+        return x
+    def TL_global_decoder(self, x):
+        for layer in self.pretrained_model[7]:
+            x = layer(x)
+        return x
+    def forward(self, g, global_feats):
+        h = self.TL_node_encoder(g.ndata['features'])
+        e = self.TL_edge_encoder(g.edata['features'])
+        g.ndata['h'] = h
+        g.edata['e'] = e
+        if not self.has_global:
+            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
+        h_global = self.TL_global_encoder(global_feats)
+        for i in range(self.n_proc_steps):
+            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
+            g.apply_edges(copy_v)
+            g.edata['e'] = self.TL_edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1))
+            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
+            g.ndata['h'] = self.TL_node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1))
+            h_global = self.TL_global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1))
+        h_global = self.TL_global_decoder(h_global)
+        return self.classify(self.global_decoder(h_global))
+class Transferred_Learning_Graph(nn.Module):
+    def __init__(self, pretraining_path, pretraining_model, sample_graph, sample_global, hid_size, out_size, n_layers, n_proc_steps, additional_proc_steps=1, dropout=0, **kwargs):
+        super().__init__()
+        print(f'Unused args while creating GCN: {kwargs}')
+        self.n_layers = n_layers
+        self.n_proc_steps = n_proc_steps
+        self.layers = nn.ModuleList()
+        if (len(sample_global) == 0):
+            self.has_global = False
+        else:
+            self.has_global = sample_global.shape[1] != 0
+        gl_size = sample_global.shape[1] if self.has_global else 1
+        self.pretrained_model = utils.buildFromConfig(pretraining_model, {'sample_graph': sample_graph, 'sample_global': sample_global})
+        checkpoint = torch.load(pretraining_path)
+        self.pretrained_model.load_state_dict(checkpoint['model_state_dict'])
+        pretrained_layers = list(self.pretrained_model.children())
+        pretrained_layers = pretrained_layers[:-1]
+        self.pretrained_model = nn.Sequential(*pretrained_layers)
+        self.additional_proc_steps = additional_proc_steps
+        # Freeze Weights
+        for param in self.pretrained_model.parameters():
+            param.requires_grad = False  # Freeze all layers
+        #GNN
+        self.node_update = Make_MLP(3*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.edge_update = Make_MLP(4*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.global_update = Make_MLP(3*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        #decoder
+        self.global_decoder = Make_MLP(hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.classify = nn.Linear(hid_size, out_size)
+    def TL_node_encoder(self, x):
+        for layer in self.pretrained_model[1]:
+            x = layer(x)
+        return x
+    def TL_edge_encoder(self, x):
+        for layer in self.pretrained_model[2]:
+            x = layer(x)
+        return x
+    def TL_global_encoder(self, x):
+        for layer in self.pretrained_model[3]:
+            x = layer(x)
+        return x
+    def TL_node_update(self, x):
+        for layer in self.pretrained_model[4]:
+            x = layer(x)
+        return x
+    def TL_edge_update(self, x):
+        for layer in self.pretrained_model[5]:
+            x = layer(x)
+        return x
+    def TL_global_update(self, x):
+        for layer in self.pretrained_model[6]:
+            x = layer(x)
+        return x
+    def forward(self, g, global_feats):
+        h = self.TL_node_encoder(g.ndata['features'])
+        e = self.TL_edge_encoder(g.edata['features'])
+        g.ndata['h'] = h
+        g.edata['e'] = e
+        if not self.has_global:
+            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
+        h_global = self.TL_global_encoder(global_feats)
+        for i in range(self.n_proc_steps):
+            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
+            g.apply_edges(copy_v)
+            g.edata['e'] = self.TL_edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1))
+            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
+            g.ndata['h'] = self.TL_node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1))
+            h_global = self.TL_global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1))
+        for j in range(self.additional_proc_steps):
+            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
+            g.apply_edges(copy_v)
+            g.edata['e'] = self.edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1))
+            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
+            g.ndata['h'] = self.node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1))
+            h_global = self.global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1))
+        h_global = self.global_decoder(h_global)
+        return self.classify(h_global)
+class Transferred_Learning_Parallel(nn.Module):
+    def __init__(self, pretraining_path, pretraining_model, sample_graph, sample_global, hid_size, out_size, n_layers, n_proc_steps, dropout=0, **kwargs):
+        super().__init__()
+        print(f'Unused args while creating GCN: {kwargs}')
+        self.n_layers = n_layers
+        self.n_proc_steps = n_proc_steps
+        self.layers = nn.ModuleList()
+        self.has_global = sample_global.shape[1] != 0
+        gl_size = sample_global.shape[1] if self.has_global else 1
+        self.pretrained_model = utils.buildFromConfig(pretraining_model, {'sample_graph': sample_graph, 'sample_global': sample_global})
+        checkpoint = torch.load(pretraining_path)
+        self.pretrained_model.load_state_dict(checkpoint['model_state_dict'])
+        pretrained_layers = list(self.pretrained_model.children())
+        pretrained_layers = pretrained_layers[:-1]
+        self.pretrained_model = nn.Sequential(*pretrained_layers)
+        # Freeze Weights
+        for param in self.pretrained_model.parameters():
+            param.requires_grad = False  # Freeze all layers
+        #encoder
+        self.node_encoder = Make_MLP(sample_graph.ndata['features'].shape[1], hid_size, hid_size, n_layers, dropout=dropout)
+        self.edge_encoder = Make_MLP(sample_graph.edata['features'].shape[1], hid_size, hid_size, n_layers, dropout=dropout)
+        self.global_encoder = Make_MLP(gl_size, hid_size, hid_size, n_layers, dropout=dropout)
+        #GNN
+        self.node_update = Make_MLP(3*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.edge_update = Make_MLP(4*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.global_update = Make_MLP(3*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        #decoder
+        self.global_decoder = Make_MLP(hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.classify = nn.Linear(hid_size + pretraining_model['args']['hid_size'], out_size)
+    def TL_node_encoder(self, x):
+        for layer in self.pretrained_model[1]:
+            x = layer(x)
+        return x
+    def TL_edge_encoder(self, x):
+        for layer in self.pretrained_model[2]:
+            x = layer(x)
+        return x
+    def TL_global_encoder(self, x):
+        for layer in self.pretrained_model[3]:
+            x = layer(x)
+        return x
+    def TL_node_update(self, x):
+        for layer in self.pretrained_model[4]:
+            x = layer(x)
+        return x
+    def TL_edge_update(self, x):
+        for layer in self.pretrained_model[5]:
+            x = layer(x)
+        return x
+    def TL_global_update(self, x):
+        for layer in self.pretrained_model[6]:
+            x = layer(x)
+        return x
+    def TL_global_decoder(self, x):
+        for layer in self.pretrained_model[7]:
+            x = layer(x)
+        return x
+    def Pretrained_Output(self, g):
+        h = self.TL_node_encoder(g.ndata['features'])
+        e = self.TL_edge_encoder(g.edata['features'])
+        g.ndata['h'] = h
+        g.edata['e'] = e
+        if not self.has_global:
+            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
+        h_global = self.TL_global_encoder(global_feats)
+        for i in range(self.n_proc_steps):
+            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
+            g.apply_edges(copy_v)
+            g.edata['e'] = self.TL_edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1))
+            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
+            g.ndata['h'] = self.TL_node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1))
+            h_global = self.TL_global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1))
+        h_global = self.TL_global_decoder(h_global)
+        return h_global
+    def forward(self, g, global_feats):
+        pretrained_global = self.Pretrained_Output(g.clone())
+        h = self.node_encoder(g.ndata['features'])
+        e = self.edge_encoder(g.edata['features'])
+        g.ndata['h'] = h
+        g.edata['e'] = e
+        if not self.has_global:
+            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
+        h_global = self.global_encoder(global_feats)
+        for i in range(self.n_proc_steps):
+            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
+            g.apply_edges(copy_v)
+            g.edata['e'] = self.edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1))
+            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
+            g.ndata['h'] = self.node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1))
+            h_global = self.global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1))
+        h_global = self.global_decoder(h_global)
+        return self.classify(torch.cat((pretrained_global, h_global), dim = 1))
+class Transferred_Learning_Sequential(nn.Module):
+    def __init__(self, pretraining_path, pretraining_model, sample_graph, sample_global, hid_size, out_size, n_layers, n_proc_steps, dropout=0, **kwargs):
+        super().__init__()
+        print(f'Unused args while creating GCN: {kwargs}')
+        self.n_layers = n_layers
+        self.n_proc_steps = n_proc_steps
+        self.layers = nn.ModuleList()
+        self.has_global = sample_global.shape[1] != 0
+        gl_size = sample_global.shape[1] if self.has_global else 1
+        self.pretrained_model = utils.buildFromConfig(pretraining_model, {'sample_graph': sample_graph, 'sample_global': sample_global})
+        checkpoint = torch.load(pretraining_path)
+        self.pretrained_model.load_state_dict(checkpoint['model_state_dict'])
+        pretrained_layers = list(self.pretrained_model.children())
+        pretrained_layers = pretrained_layers[:-1]
+        self.pretrained_model = nn.Sequential(*pretrained_layers)
+        # Freeze Weights
+        for param in self.pretrained_model.parameters():
+            param.requires_grad = False  # Freeze all layers
+        #encoder
+        self.mlp = Make_MLP(pretraining_model['args']['hid_size'], hid_size, hid_size, n_layers, dropout=dropout)
+        self.classify = nn.Linear(hid_size, out_size)
+    def TL_node_encoder(self, x):
+        for layer in self.pretrained_model[1]:
+            x = layer(x)
+        return x
+    def TL_edge_encoder(self, x):
+        for layer in self.pretrained_model[2]:
+            x = layer(x)
+        return x
+    def TL_global_encoder(self, x):
+        for layer in self.pretrained_model[3]:
+            x = layer(x)
+        return x
+    def TL_node_update(self, x):
+        for layer in self.pretrained_model[4]:
+            x = layer(x)
+        return x
+    def TL_edge_update(self, x):
+        for layer in self.pretrained_model[5]:
+            x = layer(x)
+        return x
+    def TL_global_update(self, x):
+        for layer in self.pretrained_model[6]:
+            x = layer(x)
+        return x
+    def TL_global_decoder(self, x):
+        for layer in self.pretrained_model[7]:
+            x = layer(x)
+        return x
+    def Pretrained_Output(self, g):
+        h = self.TL_node_encoder(g.ndata['features'])
+        e = self.TL_edge_encoder(g.edata['features'])
+        g.ndata['h'] = h
+        g.edata['e'] = e
+        if not self.has_global:
+            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
+        h_global = self.TL_global_encoder(global_feats)
+        for i in range(self.n_proc_steps):
+            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
+            g.apply_edges(copy_v)
+            g.edata['e'] = self.TL_edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1))
+            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
+            g.ndata['h'] = self.TL_node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1))
+            h_global = self.TL_global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1))
+        h_global = self.TL_global_decoder(h_global)
+        return h_global
+    def forward(self, g, global_feats):
+        pretrained_global = self.Pretrained_Output(g.clone())
+        global_features = self.mlp(pretrained_global)
+        return self.classify(global_features)
+class Transferred_Learning_Message_Passing(nn.Module):
+    def __init__(self, pretraining_path, pretraining_model, sample_graph, sample_global, hid_size, out_size, n_layers, n_proc_steps, dropout=0, **kwargs):
+        super().__init__()
+        print(f'Unused args while creating GCN: {kwargs}')
+        self.n_layers = n_layers
+        self.n_proc_steps = n_proc_steps
+        self.layers = nn.ModuleList()
+        self.has_global = sample_global.shape[1] != 0
+        gl_size = sample_global.shape[1] if self.has_global else 1
+        self.pretrained_model = utils.buildFromConfig(pretraining_model, {'sample_graph': sample_graph, 'sample_global': sample_global})
+        checkpoint = torch.load(pretraining_path)
+        self.pretrained_model.load_state_dict(checkpoint['model_state_dict'])
+        pretrained_layers = list(self.pretrained_model.children())
+        pretrained_layers = pretrained_layers[:-1]
+        self.pretrained_model = nn.Sequential(*pretrained_layers)
+        # Freeze Weights
+        for param in self.pretrained_model.parameters():
+            param.requires_grad = False  # Freeze all layers
+        #encoder
+        self.mlp = Make_MLP(pretraining_model['args']['hid_size']*pretraining_model['args']['n_proc_steps'], hid_size, hid_size, n_layers, dropout=dropout)
+        self.classify = nn.Linear(hid_size, out_size)
+    def TL_node_encoder(self, x):
+        for layer in self.pretrained_model[1]:
+            x = layer(x)
+        return x
+    def TL_edge_encoder(self, x):
+        for layer in self.pretrained_model[2]:
+            x = layer(x)
+        return x
+    def TL_global_encoder(self, x):
+        for layer in self.pretrained_model[3]:
+            x = layer(x)
+        return x
+    def TL_node_update(self, x):
+        for layer in self.pretrained_model[4]:
+            x = layer(x)
+        return x
+    def TL_edge_update(self, x):
+        for layer in self.pretrained_model[5]:
+            x = layer(x)
+        return x
+    def TL_global_update(self, x):
+        for layer in self.pretrained_model[6]:
+            x = layer(x)
+        return x
+    def TL_global_decoder(self, x):
+        for layer in self.pretrained_model[7]:
+            x = layer(x)
+        return x
+    def Pretrained_Output(self, g):
+        message_passing = None
+        h = self.TL_node_encoder(g.ndata['features'])
+        e = self.TL_edge_encoder(g.edata['features'])
+        g.ndata['h'] = h
+        g.edata['e'] = e
+        if not self.has_global:
+            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
+        h_global = self.TL_global_encoder(global_feats)
+        for i in range(self.n_proc_steps):
+            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
+            g.apply_edges(copy_v)
+            g.edata['e'] = self.TL_edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1))
+            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
+            g.ndata['h'] = self.TL_node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1))
+            h_global = self.TL_global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1))
+            if (message_passing is None):
+                message_passing = h_global.clone()
+            else:
+                message_passing = torch.cat((message_passing, h_global.clone()), dim=1)
+        h_global = self.TL_global_decoder(h_global)
+        return message_passing
+    def forward(self, g, global_feats):
+        pretrained_global = self.Pretrained_Output(g.clone())
+        #print(f"message_passing layers have size = {pretrained_global.shape}")
+        #print(pretrained_global)
+        global_features = self.mlp(pretrained_global)
+        return self.classify(global_features)
+class Transferred_Learning_Message_Passing_Parallel(nn.Module):
+    def __init__(self, pretraining_path, pretraining_model, sample_graph, sample_global, hid_size, out_size, n_layers, n_proc_steps, dropout=0, **kwargs):
+        super().__init__()
+        print(f'Unused args while creating GCN: {kwargs}')
+        self.n_layers = n_layers
+        self.n_proc_steps = n_proc_steps
+        self.layers = nn.ModuleList()
+        self.has_global = sample_global.shape[1] != 0
+        gl_size = sample_global.shape[1] if self.has_global else 1
+        self.pretrained_model = utils.buildFromConfig(pretraining_model, {'sample_graph': sample_graph, 'sample_global': sample_global})
+        checkpoint = torch.load(pretraining_path)
+        self.pretrained_model.load_state_dict(checkpoint['model_state_dict'])
+        pretrained_layers = list(self.pretrained_model.children())
+        pretrained_layers = pretrained_layers[:-1]
+        self.pretrained_model = nn.Sequential(*pretrained_layers)
+        #encoder
+        self.node_encoder = Make_MLP(sample_graph.ndata['features'].shape[1], hid_size, hid_size, n_layers, dropout=dropout)
+        self.edge_encoder = Make_MLP(sample_graph.edata['features'].shape[1], hid_size, hid_size, n_layers, dropout=dropout)
+        self.global_encoder = Make_MLP(gl_size, hid_size, hid_size, n_layers, dropout=dropout)
+        #GNN
+        self.node_update = Make_MLP(3*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.edge_update = Make_MLP(4*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.global_update = Make_MLP(3*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        #decoder
+        self.global_decoder = Make_MLP(hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        # Freeze Weights
+        for param in self.pretrained_model.parameters():
+            param.requires_grad = False  # Freeze all layers
+        self.classify = nn.Linear(pretraining_model['args']['hid_size']*pretraining_model['args']['n_proc_steps'] + hid_size, out_size)
+    def TL_node_encoder(self, x):
+        for layer in self.pretrained_model[1]:
+            x = layer(x)
+        return x
+    def TL_edge_encoder(self, x):
+        for layer in self.pretrained_model[2]:
+            x = layer(x)
+        return x
+    def TL_global_encoder(self, x):
+        for layer in self.pretrained_model[3]:
+            x = layer(x)
+        return x
+    def TL_node_update(self, x):
+        for layer in self.pretrained_model[4]:
+            x = layer(x)
+        return x
+    def TL_edge_update(self, x):
+        for layer in self.pretrained_model[5]:
+            x = layer(x)
+        return x
+    def TL_global_update(self, x):
+        for layer in self.pretrained_model[6]:
+            x = layer(x)
+        return x
+    def TL_global_decoder(self, x):
+        for layer in self.pretrained_model[7]:
+            x = layer(x)
+        return x
+    def Pretrained_Output(self, g):
+        message_passing = None
+        h = self.TL_node_encoder(g.ndata['features'])
+        e = self.TL_edge_encoder(g.edata['features'])
+        g.ndata['h'] = h
+        g.edata['e'] = e
+        if not self.has_global:
+            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
+        h_global = self.TL_global_encoder(global_feats)
+        for i in range(self.n_proc_steps):
+            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
+            g.apply_edges(copy_v)
+            g.edata['e'] = self.TL_edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1))
+            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
+            g.ndata['h'] = self.TL_node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1))
+            h_global = self.TL_global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1))
+            if (message_passing is None):
+                message_passing = h_global.clone()
+            else:
+                message_passing = torch.cat((message_passing, h_global.clone()), dim=1)
+        h_global = self.TL_global_decoder(h_global)
+        return message_passing
+    def forward(self, g, global_feats):
+        pretrained_message = self.Pretrained_Output(g.clone())
+        h = self.node_encoder(g.ndata['features'])
+        e = self.edge_encoder(g.edata['features'])
+        g.ndata['h'] = h
+        g.edata['e'] = e
+        if not self.has_global:
+            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
+        h_global = self.global_encoder(global_feats)
+        for i in range(self.n_proc_steps):
+            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
+            g.apply_edges(copy_v)
+            g.edata['e'] = self.edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1))
+            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
+            g.ndata['h'] = self.node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1))
+            h_global = self.global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1))
+        h_global = self.global_decoder(h_global)
+        return self.classify(torch.cat((pretrained_message, h_global), dim = 1))
+class Transferred_Learning_Finetuning(nn.Module):
+    def __init__(self, pretraining_path, pretraining_model, sample_graph, sample_global, hid_size, out_size, n_layers, n_proc_steps, dropout=0, frozen_pretraining=False, **kwargs):
+        super().__init__()
+        print(f'Unused args while creating GCN: {kwargs}')
+        self.n_layers = n_layers
+        self.n_proc_steps = n_proc_steps
+        self.layers = nn.ModuleList()
+        if (len(sample_global) == 0):
+            self.has_global = False
+        else:
+            self.has_global = sample_global.shape[1] != 0
+        gl_size = sample_global.shape[1] if self.has_global else 1
+        self.pretrained_model = utils.buildFromConfig(pretraining_model, {'sample_graph': sample_graph, 'sample_global': sample_global})
+        checkpoint = torch.load(pretraining_path)
+        self.pretrained_model.load_state_dict(checkpoint['model_state_dict'])
+        pretrained_layers = list(self.pretrained_model.children())
+        pretrained_layers = pretrained_layers[:-1]
+        self.pretrained_model = nn.Sequential(*pretrained_layers)
+        print(f"Freeze Pretraining = {frozen_pretraining}")
+        if (frozen_pretraining):
+            for param in self.pretrained_model.parameters():
+                param.requires_grad = False  # Freeze all layers
+            for param in self.pretrained_model[7]:
+                param.requires_grad = True
+        torch.manual_seed(2)
+        self.classify = nn.Linear(pretraining_model['args']['hid_size'], out_size)
+    def TL_node_encoder(self, x):
+        for layer in self.pretrained_model[1]:
+            x = layer(x)
+        return x
+    def TL_edge_encoder(self, x):
+        for layer in self.pretrained_model[2]:
+            x = layer(x)
+        return x
+    def TL_global_encoder(self, x):
+        for layer in self.pretrained_model[3]:
+            x = layer(x)
+        return x
+    def TL_node_update(self, x):
+        for layer in self.pretrained_model[4]:
+            x = layer(x)
+        return x
+    def TL_edge_update(self, x):
+        for layer in self.pretrained_model[5]:
+            x = layer(x)
+        return x
+    def TL_global_update(self, x):
+        for layer in self.pretrained_model[6]:
+            x = layer(x)
+        return x
+    def TL_global_decoder(self, x):
+        for layer in self.pretrained_model[7]:
+            x = layer(x)
+        return x
+    def Pretrained_Output(self, g):
+        h = self.TL_node_encoder(g.ndata['features'])
+        e = self.TL_edge_encoder(g.edata['features'])
+        g.ndata['h'] = h
+        g.edata['e'] = e
+        if not self.has_global:
+            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
+        h_global = self.TL_global_encoder(global_feats)
+        for i in range(self.n_proc_steps):
+            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
+            g.apply_edges(copy_v)
+            g.edata['e'] = self.TL_edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1))
+            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
+            g.ndata['h'] = self.TL_node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1))
+            h_global = self.TL_global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1))
+        h_global = self.TL_global_decoder(h_global)
+        return h_global
+    def forward(self, g, global_feats):
+        h_global = self.Pretrained_Output(g.clone())
+        return self.classify(h_global)
+    def representation(self, g, global_feats):
+        h = self.TL_node_encoder(g.ndata['features'])
+        e = self.TL_edge_encoder(g.edata['features'])
+        g.ndata['h'] = h
+        g.edata['e'] = e
+        if not self.has_global:
+            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
+        h_global = self.TL_global_encoder(global_feats)
+        for i in range(self.n_proc_steps):
+            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
+            g.apply_edges(copy_v)
+            g.edata['e'] = self.TL_edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1))
+            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
+            g.ndata['h'] = self.TL_node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1))
+            h_global = self.TL_global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1))
+        before_global_decoder = h_global
+        after_global_decoder = self.TL_global_decoder(before_global_decoder)
+        after_classify = self.classify(after_global_decoder)
+        return before_global_decoder, after_global_decoder, after_classify
+    def __str__(self):
+        layer_names = ["node_encoder", "edge_encoder", "global_encoder",
+                  "node_update", "edge_update", "global_update", "global_decoder"]
+        layers = [self.pretrained_model[1], self.pretrained_model[2], self.pretrained_model[3],
+                  self.pretrained_model[4], self.pretrained_model[5], self.pretrained_model[6],
+                  self.pretrained_model[7]]
+        for i in range(len(layers)):
+            print(layer_names[i])
+            for layer in layers[i].children():
+                if isinstance(layer, nn.Linear):
+                    print(layer.state_dict())
+        print("classify")
+        print(self.classify.weight)
+        return ""
+class Transferred_Learning_Parallel_Finetuning(nn.Module):
+    def __init__(self, pretraining_path, pretraining_model, sample_graph, sample_global, hid_size, out_size, n_layers, n_proc_steps, dropout=0, learning_rate=0.0001, **kwargs):
+        super().__init__()
+        print(f'Unused args while creating GCN: {kwargs}')
+        self.learning_rate = learning_rate
+        self.parallel_params = []
+        self.finetuning_params = []
+        self.n_layers = n_layers
+        self.n_proc_steps = n_proc_steps
+        self.layers = nn.ModuleList()
+        self.has_global = sample_global.shape[1] != 0
+        gl_size = sample_global.shape[1] if self.has_global else 1
+        self.pretrained_model = utils.buildFromConfig(pretraining_model, {'sample_graph': sample_graph, 'sample_global': sample_global})
+        checkpoint = torch.load(pretraining_path)
+        self.pretrained_model.load_state_dict(checkpoint['model_state_dict'])
+        pretrained_layers = list(self.pretrained_model.children())
+        pretrained_layers = pretrained_layers[:-1]
+        self.pretrained_model = nn.Sequential(*pretrained_layers)
+        self.finetuning_params.append(self.pretrained_model)
+        #encoder
+        self.node_encoder = Make_MLP(sample_graph.ndata['features'].shape[1], hid_size, hid_size, n_layers, dropout=dropout)
+        self.edge_encoder = Make_MLP(sample_graph.edata['features'].shape[1], hid_size, hid_size, n_layers, dropout=dropout)
+        self.global_encoder = Make_MLP(gl_size, hid_size, hid_size, n_layers, dropout=dropout)
+        #GNN
+        self.node_update = Make_MLP(3*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.edge_update = Make_MLP(4*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.global_update = Make_MLP(3*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        #decoder
+        self.global_decoder = Make_MLP(hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.classify = nn.Linear(hid_size + pretraining_model['args']['hid_size'], out_size)
+        self.parallel_params.append(self.node_encoder)
+        self.parallel_params.append(self.edge_encoder)
+        self.parallel_params.append(self.global_encoder)
+        self.parallel_params.append(self.node_update)
+        self.parallel_params.append(self.edge_update)
+        self.parallel_params.append(self.global_update)
+        self.parallel_params.append(self.global_decoder)
+        self.parallel_params.append(self.classify)
+    def TL_node_encoder(self, x):
+        for layer in self.pretrained_model[1]:
+            x = layer(x)
+        return x
+    def TL_edge_encoder(self, x):
+        for layer in self.pretrained_model[2]:
+            x = layer(x)
+        return x
+    def TL_global_encoder(self, x):
+        for layer in self.pretrained_model[3]:
+            x = layer(x)
+        return x
+    def TL_node_update(self, x):
+        for layer in self.pretrained_model[4]:
+            x = layer(x)
+        return x
+    def TL_edge_update(self, x):
+        for layer in self.pretrained_model[5]:
+            x = layer(x)
+        return x
+    def TL_global_update(self, x):
+        for layer in self.pretrained_model[6]:
+            x = layer(x)
+        return x
+    def TL_global_decoder(self, x):
+        for layer in self.pretrained_model[7]:
+            x = layer(x)
+        return x
+    def Pretrained_Output(self, g):
+        h = self.TL_node_encoder(g.ndata['features'])
+        e = self.TL_edge_encoder(g.edata['features'])
+        g.ndata['h'] = h
+        g.edata['e'] = e
+        if not self.has_global:
+            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
+        h_global = self.TL_global_encoder(global_feats)
+        for i in range(self.n_proc_steps):
+            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
+            g.apply_edges(copy_v)
+            g.edata['e'] = self.TL_edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1))
+            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
+            g.ndata['h'] = self.TL_node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1))
+            h_global = self.TL_global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1))
+        h_global = self.TL_global_decoder(h_global)
+        return h_global
+    def forward(self, g, global_feats):
+        pretrained_global = self.Pretrained_Output(g.clone())
+        h = self.node_encoder(g.ndata['features'])
+        e = self.edge_encoder(g.edata['features'])
+        g.ndata['h'] = h
+        g.edata['e'] = e
+        if not self.has_global:
+            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
+        h_global = self.global_encoder(global_feats)
+        for i in range(self.n_proc_steps):
+            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
+            g.apply_edges(copy_v)
+            g.edata['e'] = self.edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1))
+            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
+            g.ndata['h'] = self.node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1))
+            h_global = self.global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1))
+        h_global = self.global_decoder(h_global)
+        return self.classify(torch.cat((pretrained_global, h_global), dim = 1))
+    def parameters(self, recurse: bool = True):
+        params = []
+        for model_section in self.parallel_params:
+            if (type(self.learning_rate) == dict and self.learning_rate["trainable_lr"]):
+                params.append({'params': model_section.parameters(), 'lr': self.learning_rate["trainable_lr"]})
+            else:
+                params.append({'params': model_section.parameters(), 'lr': 0.0001})
+        for model_section in self.finetuning_params:
+            if (type(self.learning_rate) == dict and self.learning_rate["finetuning_lr"]):
+                params.append({'params': model_section.parameters(), 'lr': self.learning_rate["finetuning_lr"]})
+            else:
+                params.append({'params': model_section.parameters(), 'lr': 0.0001})
+        return params
+class Attention(nn.Module):
+    def __init__(self, sample_graph, sample_global, hid_size, out_size, n_layers, n_proc_steps, dropout=0, num_heads = 1, **kwargs):
+        super().__init__()
+        print(f'Unused args while creating GCN: {kwargs}')
+        self.n_layers = n_layers
+        self.n_proc_steps = n_proc_steps
+        self.layers = nn.ModuleList()
+        self.has_global = sample_global.shape[1] != 0
+        gl_size = sample_global.shape[1] if self.has_global else 1
+        #encoder
+        self.node_encoder = Make_MLP(sample_graph.ndata['features'].shape[1], hid_size, hid_size, n_layers, dropout=dropout)
+        self.global_encoder = Make_MLP(gl_size, hid_size, hid_size, n_layers, dropout=dropout)
+        #GNN
+        self.node_update = Make_MLP(2*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.global_update = Make_MLP(2*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        #decoder
+        self.global_decoder = Make_MLP(hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.classify = nn.Linear(hid_size, out_size)
+        #attention
+        self.multihead_attn = nn.MultiheadAttention(hid_size, num_heads, dropout=dropout, batch_first=True)
+        self.queries = nn.Linear(hid_size, hid_size)
+        self.keys = nn.Linear(hid_size, hid_size)
+        self.values = nn.Linear(hid_size, hid_size)
+    def forward(self, g, global_feats):
+        h = self.node_encoder(g.ndata['features'])
+        g.ndata['h'] = h
+        if not self.has_global:
+            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
+        batch_num_nodes = None
+        sum_weights = None
+        if "w" in g.ndata:
+            batch_indices = g.batch_num_nodes()
+            # Find non-zero rows (non-padded nodes)
+            non_padded_nodes_mask = torch.any(g.ndata['features'] != 0, dim=1)
+            # Split the mask according to the batch indices
+            batch_num_nodes = []
+            start_idx = 0
+            for num_nodes in batch_indices:
+                end_idx = start_idx + num_nodes
+                non_padded_count = non_padded_nodes_mask[start_idx:end_idx].sum().item()
+                batch_num_nodes.append(non_padded_count)
+                start_idx = end_idx
+            batch_num_nodes = torch.tensor(batch_num_nodes, device = g.ndata['features'].device)
+            sum_weights = batch_num_nodes[:, None].repeat(1, 64)
+            global_feats = batch_num_nodes[:, None].to(torch.float)
+        h_global = self.global_encoder(global_feats)
+        h_original_shape = h.shape
+        num_graphs = len(dgl.unbatch(g))
+        num_nodes = g.batch_num_nodes()[0].item()
+        padding_mask = g.ndata['padding_mask'] > 0
+        padding_mask = torch.reshape(padding_mask, (num_graphs, num_nodes))
+        h = g.ndata['h']
+        query = self.queries(h)
+        key = self.keys(h)
+        value = self.values(h)
+        query = torch.reshape(query, (num_graphs, num_nodes, h_original_shape[1]))
+        key = torch.reshape(key, (num_graphs, num_nodes, h_original_shape[1]))
+        value = torch.reshape(value, (num_graphs, num_nodes, h_original_shape[1]))
+        h, _ = self.multihead_attn(query, key, value, key_padding_mask=padding_mask)
+        h = torch.reshape(h, h_original_shape)
+        h = self.node_update(torch.cat((h, broadcast_global_to_nodes(g, h_global)), dim = 1))
+        g.ndata['h'] = h
+        mean_nodes = dgl.sum_nodes(g, 'h', 'w') / sum_weights
+        h_global = self.global_update(torch.cat((h_global, mean_nodes), dim = 1))
+        h_global = self.global_decoder(h_global)
+        return self.classify(h_global)
+class Attention_Edge_Network(nn.Module):
+    def __init__(self, sample_graph, sample_global, hid_size, out_size, n_layers, n_proc_steps, dropout=0, num_heads = 1, **kwargs):
+        super().__init__()
+        print(f'Unused args while creating GCN: {kwargs}')
+        self.n_layers = n_layers
+        self.n_proc_steps = n_proc_steps
+        self.layers = nn.ModuleList()
+        self.has_global = sample_global.shape[1] != 0
+        gl_size = sample_global.shape[1] if self.has_global else 1
+        #encoder
+        self.node_encoder = Make_MLP(sample_graph.ndata['features'].shape[1], hid_size, hid_size, n_layers, dropout=dropout)
+        self.edge_encoder = Make_MLP(sample_graph.edata['features'].shape[1], hid_size, hid_size, n_layers, dropout=dropout)
+        self.global_encoder = Make_MLP(gl_size, hid_size, hid_size, n_layers, dropout=dropout)
+        #GNN
+        self.node_update = Make_MLP(3*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.edge_update = Make_MLP(4*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.global_update = Make_MLP(3*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        #decoder
+        self.global_decoder = Make_MLP(hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.classify = nn.Linear(hid_size, out_size)
+        #attention
+        self.multihead_attn = nn.MultiheadAttention(hid_size, num_heads, dropout=dropout, batch_first=True)
+        self.queries = nn.Linear(hid_size, hid_size)
+        self.keys = nn.Linear(hid_size, hid_size)
+        self.values = nn.Linear(hid_size, hid_size)
+    def forward(self, g, global_feats):
+        h = self.node_encoder(g.ndata['features'])
+        e = self.edge_encoder(g.edata['features'])
+        g.ndata['h'] = h
+        g.edata['e'] = e
+        if not self.has_global:
+            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
+        h_global = self.global_encoder(global_feats)
+        h = g.ndata['h']
+        h_original_shape = h.shape
+        num_graphs = len(dgl.unbatch(g))
+        num_nodes = g.batch_num_nodes()[0].item()
+        padding_mask = g.ndata['padding_mask'] > 0
+        padding_mask = torch.reshape(padding_mask, (num_graphs, num_nodes))
+        for i in range(self.n_proc_steps):
+            h = g.ndata['h']
+            query = self.queries(h)
+            key = self.keys(h)
+            value = self.values(h)
+            query = torch.reshape(query, (num_graphs, num_nodes, h_original_shape[1]))
+            key = torch.reshape(key, (num_graphs, num_nodes, h_original_shape[1]))
+            value = torch.reshape(value, (num_graphs, num_nodes, h_original_shape[1]))
+            h, _ = self.multihead_attn(query, key, value, key_padding_mask=padding_mask)
+            h = torch.reshape(h, h_original_shape)
+            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
+            g.apply_edges(copy_v)
+            g.edata['e'] = self.edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1))
+            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
+            g.ndata['h'] = self.node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1))
+            h_global = self.global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h', 'w'), dgl.mean_edges(g, 'e')), dim = 1))
+        h_global = self.global_decoder(h_global)
+        return self.classify(h_global)
+class Attention_Unbatched(nn.Module):
+    def __init__(self, sample_graph, sample_global, hid_size, out_size, n_layers, n_proc_steps, dropout=0, num_heads = 1, **kwargs):
+        super().__init__()
+        print(f'Unused args while creating GCN: {kwargs}')
+        self.n_layers = n_layers
+        self.n_proc_steps = n_proc_steps
+        self.layers = nn.ModuleList()
+        self.has_global = sample_global.shape[1] != 0
+        gl_size = sample_global.shape[1] if self.has_global else 1
+        #encoder
+        self.node_encoder = Make_MLP(sample_graph.ndata['features'].shape[1], hid_size, hid_size, n_layers, dropout=dropout)
+        self.edge_encoder = Make_MLP(sample_graph.edata['features'].shape[1], hid_size, hid_size, n_layers, dropout=dropout)
+        self.global_encoder = Make_MLP(gl_size, hid_size, hid_size, n_layers, dropout=dropout)
+        #GNN
+        self.node_update = Make_MLP(3*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.edge_update = Make_MLP(4*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.global_update = Make_MLP(3*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        #decoder
+        self.global_decoder = Make_MLP(hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.classify = nn.Linear(hid_size, out_size)
+        #attention
+        self.multihead_attn = nn.MultiheadAttention(hid_size, 1, dropout=dropout)
+        self.queries = nn.Linear(hid_size, hid_size)
+        self.keys = nn.Linear(hid_size, hid_size)
+        self.values = nn.Linear(hid_size, hid_size)
+    def forward(self, g, global_feats):
+        h = self.node_encoder(g.ndata['features'])
+        e = self.edge_encoder(g.edata['features'])
+        g.ndata['h'] = h
+        g.edata['e'] = e
+        if not self.has_global:
+            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
+        h_global = self.global_encoder(global_feats)
+        for i in range(self.n_proc_steps):
+            unbatched_g = dgl.unbatch(g)
+            for graph in unbatched_g:
+                h = graph.ndata['h']
+                h, _ = self.multihead_attn(self.queries(h), self.keys(h), self.values(h))
+                graph.ndata['h'] = h
+            g = dgl.batch(unbatched_g)
+            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
+            g.apply_edges(copy_v)
+            g.edata['e'] = self.edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1))
+            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
+            g.ndata['h'] = self.node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1))
+            h_global = self.global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1))
+        h_global = self.global_decoder(h_global)
+        return self.classify(h_global)
+class Transferred_Learning_Attention(nn.Module):
+    def __init__(self, pretraining_path, pretraining_model, sample_graph, sample_global, hid_size, out_size, n_layers, n_proc_steps, num_heads, dropout=0, learning_rate=0.0001, **kwargs):
+        super().__init__()
+        print(f'Unused args while creating GCN: {kwargs}')
+        self.n_layers = n_layers
+        self.n_proc_steps = n_proc_steps
+        self.layers = nn.ModuleList()
+        self.has_global = sample_global.shape[1] != 0
+        gl_size = sample_global.shape[1] if self.has_global else 1
+        self.learning_rate = learning_rate
+        self.pretraining_params = []
+        self.attention_params = []
+        self.pretrained_model = utils.buildFromConfig(pretraining_model, {'sample_graph': sample_graph, 'sample_global': sample_global})
+        checkpoint = torch.load(pretraining_path)
+        self.pretrained_model.load_state_dict(checkpoint['model_state_dict'])
+        pretrained_layers = list(self.pretrained_model.children())
+        pretrained_layers = pretrained_layers[:-1]
+        self.pretrained_model = nn.Sequential(*pretrained_layers)
+        self.pretraining_params.append(self.pretrained_model[1])
+        self.pretraining_params.append(self.pretrained_model[3])
+        self.pretraining_params.append(self.pretrained_model[7])
+        #attention
+        self.multihead_attn = nn.MultiheadAttention(hid_size, num_heads, dropout=dropout, batch_first=True)
+        self.queries = nn.Linear(hid_size, hid_size)
+        self.keys = nn.Linear(hid_size, hid_size)
+        self.values = nn.Linear(hid_size, hid_size)
+        self.node_update = Make_MLP(2*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.global_update = Make_MLP(2*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.classify = nn.Linear(pretraining_model['args']['hid_size'], out_size)
+        self.attention_params.append(self.multihead_attn)
+        self.attention_params.append(self.queries)
+        self.attention_params.append(self.keys)
+        self.attention_params.append(self.values)
+        self.attention_params.append(self.classify)
+        self.attention_params.append(self.node_update)
+        self.attention_params.append(self.global_update)
+    def TL_node_encoder(self, x):
+        for layer in self.pretrained_model[1]:
+            x = layer(x)
+        return x
+    def TL_global_encoder(self, x):
+        for layer in self.pretrained_model[3]:
+            x = layer(x)
+        return x
+    def TL_global_decoder(self, x):
+        for layer in self.pretrained_model[7]:
+            x = layer(x)
+        return x
+    def forward(self, g, global_feats):
+        h = self.TL_node_encoder(g.ndata['features'])
+        g.ndata['h'] = h
+        if not self.has_global:
+            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
+        batch_num_nodes = None
+        sum_weights = None
+        if "w" in g.ndata:
+            batch_indices = g.batch_num_nodes()
+            # Find non-zero rows (non-padded nodes)
+            non_padded_nodes_mask = torch.any(g.ndata['features'] != 0, dim=1)
+            # Split the mask according to the batch indices
+            batch_num_nodes = []
+            start_idx = 0
+            for num_nodes in batch_indices:
+                end_idx = start_idx + num_nodes
+                non_padded_count = non_padded_nodes_mask[start_idx:end_idx].sum().item()
+                batch_num_nodes.append(non_padded_count)
+                start_idx = end_idx
+            batch_num_nodes = torch.tensor(batch_num_nodes, device = g.ndata['features'].device)
+            sum_weights = batch_num_nodes[:, None].repeat(1, 64)
+            global_feats = batch_num_nodes[:, None].to(torch.float)
+        h_global = self.TL_global_encoder(global_feats)
+        h_original_shape = h.shape
+        num_graphs = len(dgl.unbatch(g))
+        num_nodes = g.batch_num_nodes()[0].item()
+        padding_mask = g.ndata['padding_mask'] > 0
+        padding_mask = torch.reshape(padding_mask, (num_graphs, num_nodes))
+        h = g.ndata['h']
+        query = self.queries(h)
+        key = self.keys(h)
+        value = self.values(h)
+        query = torch.reshape(query, (num_graphs, num_nodes, h_original_shape[1]))
+        key = torch.reshape(key, (num_graphs, num_nodes, h_original_shape[1]))
+        value = torch.reshape(value, (num_graphs, num_nodes, h_original_shape[1]))
+        h, _ = self.multihead_attn(query, key, value, key_padding_mask=padding_mask)
+        h = torch.reshape(h, h_original_shape)
+        h = self.node_update(torch.cat((h, broadcast_global_to_nodes(g, h_global)), dim = 1))
+        g.ndata['h'] = h
+        mean_nodes = dgl.sum_nodes(g, 'h', 'w') / sum_weights
+        h_global = self.global_update(torch.cat((h_global, mean_nodes), dim = 1))
+        h_global = self.TL_global_decoder(h_global)
+        return self.classify(h_global)
+    def parameters(self, recurse: bool = True):
+        params = []
+        for model_section in self.pretraining_params:
+            if (type(self.learning_rate) == dict and self.learning_rate["pretraining_lr"]):
+                params.append({'params': model_section.parameters(), 'lr': self.learning_rate["pretraining_lr"]})
+            else:
+                params.append({'params': model_section.parameters(), 'lr': 0.0001})
+        for model_section in self.attention_params:
+            if (type(self.learning_rate) == dict and self.learning_rate["attention_lr"]):
+                params.append({'params': model_section.parameters(), 'lr': self.learning_rate["attention_lr"]})
+            else:
+                params.append({'params': model_section.parameters(), 'lr': 0.0001})
+        return params
+class Multimodel_Transferred_Learning(nn.Module):
+    def __init__(self, pretraining_path, pretraining_model, sample_graph, sample_global, hid_size, out_size, n_layers, n_proc_steps, dropout=0, frozen_pretraining=True, learning_rate=None, **kwargs):
+        super().__init__()
+        print(f'Unused args while creating GCN: {kwargs}')
+        self.n_layers = n_layers
+        self.n_proc_steps = n_proc_steps
+        self.layers = nn.ModuleList()
+        self.has_global = sample_global.shape[1] != 0
+        gl_size = sample_global.shape[1] if self.has_global else 1
+        self.learning_rate = learning_rate
+        input_size = 0
+        self.pretraining_params = []
+        self.model_params = []
+        self.pretrained_models = []
+        for model, path in zip(pretraining_model, pretraining_path):
+            input_size += model['args']['hid_size']
+            model = utils.buildFromConfig(model, {'sample_graph': sample_graph, 'sample_global': sample_global})
+            checkpoint = torch.load(path)['model_state_dict']
+            new_state_dict = {}
+            for k, v in checkpoint.items():
+                new_key = k.replace('module.', '')
+                new_state_dict[new_key] = v
+            model.load_state_dict(new_state_dict)
+            pretrained_layers = list(model.children())
+            pretrained_layers = pretrained_layers[:-1]
+            model = nn.Sequential(*pretrained_layers)
+            # Freeze Weights
+            print(f"Freeze Pretraining = {frozen_pretraining}")
+            if (frozen_pretraining):
+                for param in model.parameters():
+                    param.requires_grad = False  # Freeze all layers
+            self.pretraining_params.append(model)
+            self.pretrained_models.append(model)
+        print(f"len(pretrained_models) = {len(self.pretrained_models)}")
+        print(f"input size = {input_size}")
+        self.final_mlp = Make_MLP(input_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.classify = nn.Linear(hid_size, out_size)
+        self.model_params.append(self.final_mlp)
+        self.model_params.append(self.classify)
+    def TL_node_encoder(self, x, model_idx):
+        try:
+            for layer in self.pretrained_models[model_idx][1]:
+                x = layer(x)
+            return x
+        except (NotImplementedError, IndexError):
+            for layer in self.pretrained_models[model_idx][1][1]:
+                x = layer(x)
+            return x
+    def TL_edge_encoder(self, x, model_idx):
+        try:
+            for layer in self.pretrained_models[model_idx][2]:
+                x = layer(x)
+            return x
+        except (NotImplementedError, IndexError):
+            for layer in self.pretrained_models[model_idx][1][2]:
+                x = layer(x)
+            return x
+    def TL_global_encoder(self, x, model_idx):
+        try:
+            for layer in self.pretrained_models[model_idx][3]:
+                x = layer(x)
+            return x
+        except (NotImplementedError, IndexError):
+            for layer in self.pretrained_models[model_idx][1][3]:
+                x = layer(x)
+            return x
+    def TL_node_update(self, x, model_idx):
+        try:
+            for layer in self.pretrained_models[model_idx][4]:
+                x = layer(x)
+            return x
+        except (NotImplementedError, IndexError):
+            for layer in self.pretrained_models[model_idx][1][4]:
+                x = layer(x)
+            return x
+    def TL_edge_update(self, x, model_idx):
+        try:
+            for layer in self.pretrained_models[model_idx][5]:
+                x = layer(x)
+            return x
+        except (NotImplementedError, IndexError):
+            for layer in self.pretrained_models[model_idx][1][5]:
+                x = layer(x)
+            return x
+    def TL_global_update(self, x, model_idx):
+        try:
+            for layer in self.pretrained_models[model_idx][6]:
+                x = layer(x)
+            return x
+        except (NotImplementedError, IndexError):
+            for layer in self.pretrained_models[model_idx][1][6]:
+                x = layer(x)
+            return x
+    def TL_global_decoder(self, x, model_idx):
+        try:
+            for layer in self.pretrained_models[model_idx][7]:
+                x = layer(x)
+            return x
+        except (NotImplementedError, IndexError):
+            for layer in self.pretrained_models[model_idx][1][7]:
+                x = layer(x)
+            return x
+    def Pretrained_Output(self, g, model_idx):
+        h = self.TL_node_encoder(g.ndata['features'], model_idx)
+        e = self.TL_edge_encoder(g.edata['features'], model_idx)
+        g.ndata['h'] = h
+        g.edata['e'] = e
+        if not self.has_global:
+            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
+        h_global = self.TL_global_encoder(global_feats, model_idx)
+        for i in range(self.n_proc_steps):
+            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
+            g.apply_edges(copy_v)
+            g.edata['e'] = self.TL_edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1), model_idx)
+            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
+            g.ndata['h'] = self.TL_node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1), model_idx)
+            h_global = self.TL_global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1), model_idx)
+        # h_global = self.TL_global_decoder(h_global, model_idx)
+        return h_global
+    def forward(self, g, global_feats):
+        h_global = []
+        for i in range(len(self.pretrained_models)):
+            h_global.append(self.Pretrained_Output(g.clone(), i))
+        h_global = torch.concatenate(h_global, dim=1)
+        return self.classify(self.final_mlp(h_global))
+    def to(self, device):
+        for i in range(len(self.pretrained_models)):
+            self.pretrained_models[i].to(device)
+        self.classify.to(device)
+        self.final_mlp.to(device)
+        return self
+    def parameters(self, recurse: bool = True):
+        params = []
+        for model_section in self.pretraining_params:
+            if (type(self.learning_rate) == dict and self.learning_rate["pretraining_lr"]):
+                params.append({'params': model_section.parameters(), 'lr': self.learning_rate["pretraining_lr"]})
+            else:
+                params.append({'params': model_section.parameters(), 'lr': 0.00001})
+        for model_section in self.model_params:
+            if (type(self.learning_rate) == dict and self.learning_rate["model_lr"]):
+                params.append({'params': model_section.parameters(), 'lr': self.learning_rate["model_lr"]})
+            else:
+                params.append({'params': model_section.parameters(), 'lr': 0.0001})
+        return params
+class MultiModel(nn.Module):
+    def __init__(self, pretraining_path, pretraining_model, sample_graph, sample_global, hid_size, out_size, n_layers, n_proc_steps, dropout=0, frozen_pretraining=True, learning_rate=None, **kwargs):
+        super().__init__()
+        print(f'Unused args while creating GCN: {kwargs}')
+        self.n_layers = n_layers
+        self.n_proc_steps = n_proc_steps
+        self.layers = nn.ModuleList()
+        self.has_global = sample_global.shape[1] != 0
+        gl_size = sample_global.shape[1] if self.has_global else 1
+        self.learning_rate = learning_rate
+        input_size = 0
+        self.model_params = []
+        self.pretraining_params = []
+        self.pretrained_models = []
+        for model, path in zip(pretraining_model, pretraining_path):
+            input_size += model['args']['hid_size']
+            model = utils.buildFromConfig(model, {'sample_graph': sample_graph, 'sample_global': sample_global})
+            checkpoint = torch.load(path)['model_state_dict']
+            new_state_dict = {}
+            for k, v in checkpoint.items():
+                new_key = k.replace('module.', '')
+                new_state_dict[new_key] = v
+            model.load_state_dict(new_state_dict)
+            pretrained_layers = list(model.children())
+            pretrained_layers = pretrained_layers[:-1]
+            model = nn.Sequential(*pretrained_layers)
+            # Freeze Weights
+            print(f"Freeze Pretraining = {frozen_pretraining}")
+            if (frozen_pretraining):
+                for param in model.parameters():
+                    param.requires_grad = False  # Freeze all layers
+            self.pretraining_params.append(model)
+            self.pretrained_models.append(model)
+        print(f"len(pretrained_models) = {len(self.pretrained_models)}")
+        print(f"input size = {input_size}")
+        #encoder
+        self.node_encoder = Make_MLP(sample_graph.ndata['features'].shape[1], hid_size, hid_size, n_layers, dropout=dropout)
+        self.edge_encoder = Make_MLP(sample_graph.edata['features'].shape[1], hid_size, hid_size, n_layers, dropout=dropout)
+        self.global_encoder = Make_MLP(gl_size, hid_size, hid_size, n_layers, dropout=dropout)
+        #GNN
+        self.node_update = Make_MLP(3*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.edge_update = Make_MLP(4*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.global_update = Make_MLP(3*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.final_mlp = Make_MLP(input_size + hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.classify = nn.Linear(hid_size, out_size)
+        self.model_params.append(self.final_mlp)
+        self.model_params.append(self.classify)
+    def TL_node_encoder(self, x, model_idx):
+        try:
+            for layer in self.pretrained_models[model_idx][1]:
+                x = layer(x)
+            return x
+        except (NotImplementedError, IndexError):
+            for layer in self.pretrained_models[model_idx][1][1]:
+                x = layer(x)
+            return x
+    def TL_edge_encoder(self, x, model_idx):
+        try:
+            for layer in self.pretrained_models[model_idx][2]:
+                x = layer(x)
+            return x
+        except (NotImplementedError, IndexError):
+            for layer in self.pretrained_models[model_idx][1][2]:
+                x = layer(x)
+            return x
+    def TL_global_encoder(self, x, model_idx):
+        try:
+            for layer in self.pretrained_models[model_idx][3]:
+                x = layer(x)
+            return x
+        except (NotImplementedError, IndexError):
+            for layer in self.pretrained_models[model_idx][1][3]:
+                x = layer(x)
+            return x
+    def TL_node_update(self, x, model_idx):
+        try:
+            for layer in self.pretrained_models[model_idx][4]:
+                x = layer(x)
+            return x
+        except (NotImplementedError, IndexError):
+            for layer in self.pretrained_models[model_idx][1][4]:
+                x = layer(x)
+            return x
+    def TL_edge_update(self, x, model_idx):
+        try:
+            for layer in self.pretrained_models[model_idx][5]:
+                x = layer(x)
+            return x
+        except (NotImplementedError, IndexError):
+            for layer in self.pretrained_models[model_idx][1][5]:
+                x = layer(x)
+            return x
+    def TL_global_update(self, x, model_idx):
+        try:
+            for layer in self.pretrained_models[model_idx][6]:
+                x = layer(x)
+            return x
+        except (NotImplementedError, IndexError):
+            for layer in self.pretrained_models[model_idx][1][6]:
+                x = layer(x)
+            return x
+    def TL_global_decoder(self, x, model_idx):
+        try:
+            for layer in self.pretrained_models[model_idx][7]:
+                x = layer(x)
+            return x
+        except (NotImplementedError, IndexError):
+            for layer in self.pretrained_models[model_idx][1][7]:
+                x = layer(x)
+            return x
+    def Pretrained_Output(self, g, model_idx):
+        h = self.TL_node_encoder(g.ndata['features'], model_idx)
+        e = self.TL_edge_encoder(g.edata['features'], model_idx)
+        g.ndata['h'] = h
+        g.edata['e'] = e
+        if not self.has_global:
+            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
+        h_global = self.TL_global_encoder(global_feats, model_idx)
+        for i in range(self.n_proc_steps):
+            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
+            g.apply_edges(copy_v)
+            g.edata['e'] = self.TL_edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1), model_idx)
+            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
+            g.ndata['h'] = self.TL_node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1), model_idx)
+            h_global = self.TL_global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1), model_idx)
+        # h_global = self.TL_global_decoder(h_global, model_idx)
+        return h_global
+    def forward(self, g, global_feats):
+        h = self.node_encoder(g.ndata['features'])
+        e = self.edge_encoder(g.edata['features'])
+        g.ndata['h'] = h
+        g.edata['e'] = e
+        if not self.has_global:
+            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
+        h_global = self.global_encoder(global_feats)
+        for i in range(self.n_proc_steps):
+            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
+            g.apply_edges(copy_v)
+            g.edata['e'] = self.edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1))
+            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
+            g.ndata['h'] = self.node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1))
+            h_global = self.global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1))
+        h_global = [h_global]
+        for i in range(len(self.pretrained_models)):
+            h_global.append(self.Pretrained_Output(g.clone(), i))
+        h_global = torch.concatenate(h_global, dim=1)
+        return self.classify(self.final_mlp(h_global))
+    def to(self, device):
+        for i in range(len(self.pretrained_models)):
+            self.pretrained_models[i].to(device)
+        self.classify.to(device)
+        self.final_mlp.to(device)
+        self.node_encoder.to(device)
+        self.edge_encoder.to(device)
+        self.global_encoder.to(device)
+        self.node_update.to(device)
+        self.edge_update.to(device)
+        self.global_update.to(device)
+        return self
+    def parameters(self, recurse: bool = True):
+        params = []
+        for i, model_section in enumerate(self.pretraining_params):
+            if (type(self.learning_rate) == dict and self.learning_rate["pretraining_lr"]):
+                print(f"Pretraining LR = {self.learning_rate['pretraining_lr'][i]}")
+                params.append({'params': model_section.parameters(), 'lr': self.learning_rate["pretraining_lr"][i]})
+            else:
+                print(f"Pretraining LR = 0.00001")
+                params.append({'params': model_section.parameters(), 'lr': 0.00001})
+        for model_section in self.model_params:
+            if (type(self.learning_rate) == dict and self.learning_rate["model_lr"]):
+                print(f"Model LR = {self.learning_rate['model_lr']}")
+                params.append({'params': model_section.parameters(), 'lr': self.learning_rate["model_lr"]})
+            else:
+                print(f"Model LR = 0.0001")
+                params.append({'params': model_section.parameters(), 'lr': 0.0001})
+        return params
+class Clustering(nn.Module):
+    def __init__(self, sample_graph, sample_global, hid_size, out_size, n_layers, n_proc_steps, dropout=0, **kwargs):
+        super().__init__()
+        print(f'Unused args while creating GCN: {kwargs}')
+        self.n_layers = n_layers
+        self.n_proc_steps = n_proc_steps
+        self.layers = nn.ModuleList()
+        if (len(sample_global) == 0):
+            self.has_global = False
+        else:
+            self.has_global = sample_global.shape[1] != 0
+        gl_size = sample_global.shape[1] if self.has_global else 1
+        #encoder
+        self.node_encoder = Make_MLP(sample_graph.ndata['features'].shape[1], hid_size, hid_size, n_layers, dropout=dropout)
+        self.edge_encoder = Make_MLP(sample_graph.edata['features'].shape[1], hid_size, hid_size, n_layers, dropout=dropout)
+        self.global_encoder = Make_MLP(gl_size, hid_size, hid_size, n_layers, dropout=dropout)
+        #GNN
+        self.node_update = Make_MLP(3*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.edge_update = Make_MLP(4*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        self.global_update = Make_MLP(3*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
+        #decoder
+        self.global_decoder = Make_MLP(hid_size, hid_size, out_size, n_layers, dropout=dropout)
+    def model_forward(self, g, global_feats, features = 'features'):
+        h = self.node_encoder(g.ndata[features])
+        e = self.edge_encoder(g.edata[features])
+        g.ndata['h'] = h
+        g.edata['e'] = e
+        if not self.has_global:
+            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
+        batch_num_nodes = None
+        sum_weights = None
+        if "w" in g.ndata:
+            batch_indices = g.batch_num_nodes()
+            # Find non-zero rows (non-padded nodes)
+            non_padded_nodes_mask = torch.any(g.ndata[features] != 0, dim=1)
+            # Split the mask according to the batch indices
+            batch_num_nodes = []
+            start_idx = 0
+            for num_nodes in batch_indices:
+                end_idx = start_idx + num_nodes
+                non_padded_count = non_padded_nodes_mask[start_idx:end_idx].sum().item()
+                batch_num_nodes.append(non_padded_count)
+                start_idx = end_idx
+            batch_num_nodes = torch.tensor(batch_num_nodes, device = g.ndata[features].device)
+            sum_weights = batch_num_nodes[:, None].repeat(1, 64)
+            global_feats = batch_num_nodes[:, None].to(torch.float)
+        h_global = self.global_encoder(global_feats)
+        for i in range(self.n_proc_steps):
+            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
+            g.apply_edges(copy_v)
+            g.edata['e'] = self.edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1))
+            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
+            g.ndata['h'] = self.node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1))
+            if "w" in g.ndata:
+                mean_nodes = dgl.sum_nodes(g, 'h', 'w') / sum_weights
+                h_global = self.global_update(torch.cat((h_global, mean_nodes, dgl.mean_edges(g, 'e')), dim = 1))
+            else:
+                h_global = self.global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1))
+        h_global = self.global_decoder(h_global)
+        return h_global
+    def forward(self, g, global_feats):
+        h_global = self.model_forward(g, global_feats, 'features')
+        h_global_augmented = self.model_forward(g, global_feats, 'augmented_features')
+        return torch.cat((h_global, h_global_augmented), dim=1)
+    def representation(self, g, global_feats):
+        h_global = self.model_forward(g, global_feats, 'features')
+        h_global_augmented = self.model_forward(g, global_feats, 'augmented_features')
+        return h_global, h_global_augmented, torch.cat((h_global, h_global_augmented), dim=1)
+    def __str__(self):
+        layer_names = ["node_encoder", "edge_encoder", "global_encoder",
+                  "node_update", "edge_update", "global_update", "global_decoder"]
+        layers = [self.node_encoder, self.edge_encoder, self.global_encoder,
+                  self.node_update, self.edge_update, self.global_update, self.global_decoder]
+        for i in range(len(layers)):
+            print(layer_names[i])
+            for layer in layers[i].children():
+                if isinstance(layer, nn.Linear):
+                    print(layer.state_dict())
+        print("classify")
+        print(self.classify.weight)
+        return ""

models/__pycache__/GCN.cpython-38.pyc ADDED Viewed

Binary file (57 kB). View file

models/__pycache__/loss.cpython-38.pyc ADDED Viewed

Binary file (11.4 kB). View file

models/loss.py ADDED Viewed

	@@ -0,0 +1,311 @@

+from torch import nn
+import torch
+from root_gnn_base import utils
+import numpy as np
+class MaskedLoss():
+    def __init__(self, mask = []):
+        self.mask = mask
+    def make_mask(self, targets):
+        mask = torch.ones_like(targets[:,0])
+        for m in self.mask:
+            if m['op'] == 'eq':
+                mask[targets[:,m['idx']] == m['val']] = 0
+            elif m['op'] == 'gt':
+                mask[targets[:,m['idx']] > m['val']] = 0
+            elif m['op'] == 'lt':
+                mask[targets[:,m['idx']] < m['val']] = 0
+            elif m['op'] == 'ge':
+                mask[targets[:,m['idx']] >= m['val']] = 0
+            elif m['op'] == 'le':
+                mask[targets[:,m['idx']] <= m['val']] = 0
+            elif m['op'] == 'ne':
+                mask[targets[:,m['idx']] != m['val']] = 0
+            else:
+                raise ValueError(f'Unknown mask op {m["op"]}')
+        return mask == 1
+class MaskedL1Loss(MaskedLoss):
+    def __init__(self, mask = [], index = 0):
+        super().__init__(mask)
+        self.index = index
+        self.loss = nn.L1Loss()
+    def __call__(self, logits, targets):
+        mask = self.make_mask(targets)
+        return self.loss(logits[mask], targets[mask][:,self.index])
+class BCEWithLogitsLoss():
+    def __init__(self, weight=None, reduction='mean'):
+        self.loss = nn.BCEWithLogitsLoss(weight=weight, reduction=reduction)
+    def __call__(self, logits, targets):
+        return self.loss(logits[:,0], targets.float())
+class MultiScore():
+    def __init__(self, scores):
+        self. score_fcns = []
+        self.start_idx = []
+        self.end_idx = []
+        for score in scores:
+            self.score_fcns.append(utils.buildFromConfig(score))
+            self.start_idx.append(score['start_idx'])
+            self.end_idx.append(score['end_idx'])
+    def __call__(self, last_layer):
+        scores = []
+        for i in range(len(self.score_fcns)):
+            scores.append(self.score_fcns[i](last_layer[:, self.start_idx[i]:self.end_idx[i]]))
+        return torch.cat(scores, dim=1)
+class MultiLoss():
+    def __init__(self, losses):
+        self.loss_fcns = []
+        self.label_start_idx = []
+        self.label_end_idx = []
+        self.output_start_idx = []
+        self.output_end_idx = []
+        self.weights = []
+        self.label_types = []
+        for loss in losses:
+            self.loss_fcns.append(utils.buildFromConfig(loss))
+            self.label_start_idx.append(loss['label_start_idx'])
+            self.label_end_idx.append(loss['label_end_idx'])
+            self.output_start_idx.append(loss['output_start_idx'])
+            self.output_end_idx.append(loss['output_end_idx'])
+            self.weights.append(loss.get('weight', 1.0))
+            self.label_types.append(loss.get('label_type', 'float'))
+    def __call__(self, logits, targets):
+        loss = 0
+        # print(logits.shape, targets.shape)
+        for i in range(len(self.loss_fcns)):
+            if self.label_types[i] == 'int':
+                # print('loss', i, self.label_start_idx[i], self.label_end_idx[i], self.output_start_idx[i], self.output_end_idx[i])
+                # print(logits[:, self.output_start_idx[i]:self.output_end_idx[i]].shape, targets[:, self.label_start_idx[i]].shape)
+                loss += self.weights[i] * self.loss_fcns[i](logits[:, self.output_start_idx[i]:self.output_end_idx[i]], targets[:, self.label_start_idx[i]].to(int))
+            elif self.label_end_idx[i] - self.label_start_idx[i] == 1:
+                loss += self.weights[i] * self.loss_fcns[i](logits[:, self.output_start_idx[i]:self.output_end_idx[i]], targets[:, self.label_start_idx[i]])
+            else:
+                # print('loos', i, self.label_start_idx[i], self.label_end_idx[i], self.output_start_idx[i], self.output_end_idx[i])
+                # print(logits[:, self.output_start_idx[i]:self.output_end_idx[i]].shape, targets[:, self.label_start_idx[i]:self.label_end_idx[i]].shape)
+                loss += self.weights[i] * self.loss_fcns[i](logits[:, self.output_start_idx[i]:self.output_end_idx[i]], targets[:, self.label_start_idx[i]:self.label_end_idx[i]])
+        return loss
+class AdvLoss():
+    def __init__(self, loss, adv_loss, adv_weight=1.0):
+        self.loss_fcn = utils.buildFromConfig(loss)
+        self.adv_loss_fcn = utils.buildFromConfig(adv_loss)
+        self.adv_weight = adv_weight
+    def __call__(self, logits, targets):
+        mask = targets[:,0] == 0
+        loss = self.loss_fcn(logits[:,0], targets[:,0])
+        adv_loss = self.adv_loss_fcn(logits[mask][:,1], targets[mask])
+        return loss - self.adv_weight * adv_loss
+class MassWindowAdvLoss(AdvLoss):
+    def __call__(self, logits, targets):
+        mask = (targets[:,0] == 0) & (targets[:,1] > 5) & (targets[:,1] < 25)
+        print(mask, mask.shape, mask.sum())
+        loss = self.loss_fcn(logits[:,0], targets[:,0])
+        print(loss)
+        adv_loss = self.adv_loss_fcn(logits[mask][:,1], targets[mask][:,1])
+        print(adv_loss)
+        return loss - self.adv_weight * adv_loss
+class KDELoss(MaskedLoss):
+    def __init__(self, mask = [], index = 0):
+        self.index = index
+        super().__init__(mask)
+    def __call__(self, logits, targets):
+        mask = self.make_mask(targets)
+        logits = logits[mask]
+        targets = targets[mask][:,self.index]
+        N = logits.shape[0]
+        masses = targets / torch.sqrt(torch.mean(targets**2))
+        scores = logits[:,0]  / torch.sqrt(torch.mean(logits**2))
+        factor_2d = (1.0*N) ** (-2/6)
+        covs = (factor_2d * torch.var(masses), factor_2d * torch.var(scores))
+        m_diffs = torch.unsqueeze(masses, 1) - torch.unsqueeze(masses, 0)
+        s_diffs = torch.unsqueeze(scores, 1) - torch.unsqueeze(scores, 0)
+        ymm = torch.exp(- (m_diffs**2) / (4 * covs[0]))
+        yss = torch.exp(- (s_diffs**2) / (4 * covs[1]))
+        integral_rho_2d_rho_2d = torch.einsum('ij,ij->', ymm, yss)
+        integral_rho_1d_rho_1d = torch.einsum('ij,kl->', ymm, yss)
+        integral_rho_2d_rho_1d = torch.einsum('ij,ik->', ymm, yss)
+        raw_integral = integral_rho_2d_rho_2d - 2 * integral_rho_2d_rho_1d / N + integral_rho_1d_rho_1d / N**2
+        return raw_integral / (4 * torch.pi * N**2)
+class MultiLabelLoss():
+    def __init__(self, label_names, label_types, label_weights = None):
+        self.loss_fcn = []
+        if (label_weights):
+            self.weights = torch.tensor(label_weights)
+        else:
+            self.weights = torch.ones(len(label_types))
+        for type in label_types:
+            if (type == "r"):
+                self.loss_fcn.append(torch.nn.MSELoss(reduce=False))
+            elif (type == "c"):
+                self.loss_fcn.append(torch.nn.BCEWithLogitsLoss())
+        print(f"self.weights = {self.weights}")
+    def __call__(self, logits, targets):
+        targets = targets.float()
+        loss = torch.zeros(len(logits[:, 0]), device = logits.get_device())
+        for i in range(len(self.loss_fcn)):
+            loss += self.weights[i] * self.loss_fcn[i](logits[:, i], targets[:, i])
+        return torch.mean(loss)
+class MultiLabelFinish():
+    def __init__(self, label_names, label_types):
+        self.finish_fcn = []
+        for type in label_types:
+            if (type == "r"):
+                self.finish_fcn.append(None)
+            elif (type == "c"):
+                self.finish_fcn.append(torch.special.expit)
+    def __call__(self, logits):
+        for i in range(len(self.finish_fcn)):
+            if (self.finish_fcn[i]):
+                logits[:, i] = self.finish_fcn[i](logits[:, i].to(torch.long))
+        return logits
+class ContrastiveClusterLoss():
+    def __init__(self, k=10, temperature=1, alpha=1):
+        self.k = k
+        self.temperature = temperature
+        self.alpha = alpha
+    def __call__(self, logits, targets):
+        targets = targets.float()
+        logits_combined = logits.float()
+        hid_size = int(len(logits[0]) / 2)
+        logits = normalize_embeddings(logits_combined[:, :hid_size])
+        logits_augmented = normalize_embeddings(logits_combined[:, hid_size:])
+        contrastive = contrastive_loss(logits, logits_augmented, self.temperature)
+        clustering, _ = clustering_loss(logits, self.k)
+        variance_loss = variance_regularization(logits) + variance_regularization(logits_augmented)
+        return torch.mean(contrastive + clustering + self.alpha * variance_loss)
+class ContrastiveClusterFinish():
+    def __init__(self, k = 10, temperature = 1, max_cluster_iterations = 10):
+        self.k = k
+        self.temperature = temperature
+        self.max_cluster_iterations = max_cluster_iterations
+        print(f"ContrastiveClusterFinish: k = {k}, temperature = {temperature}")
+    def __call__(self, logits):
+        logits_combined = logits.float()
+        hid_size = int(len(logits[0]) / 2)
+        logits = logits_combined[:, :hid_size]
+        logits_augmented = logits_combined[:, hid_size:]
+        contrastive = contrastive_loss(logits, logits_augmented, self.temperature)
+        clustering, _ = clustering_loss(logits, self.k, self.max_cluster_iterations)
+        variance = variance_regularization(logits) + variance_regularization(logits_augmented)
+        return contrastive, clustering, variance
+def s(z_i, z_j):
+    z_i = torch.tensor(z_i) if not isinstance(z_i, torch.Tensor) else z_i
+    z_j = torch.tensor(z_j) if not isinstance(z_j, torch.Tensor) else z_j
+    return torch.cdist(z_i, z_j, p=2)
+    # dot_product = torch.dot(z_i, z_j)
+    # norm_i = torch.linalg.norm(z_i)
+    # norm_j = torch.linalg.norm(z_j)
+    # return dot_product / (norm_i * norm_j)
+def contrastive_loss(logits, logits_augmented, temperature=1, margin=1.0):
+    logits = torch.tensor(logits) if not isinstance(logits, torch.Tensor) else logits
+    logits_augmented = torch.tensor(logits_augmented) if not isinstance(logits_augmented, torch.Tensor) else logits_augmented
+    z = torch.cat((logits, logits_augmented), dim=0)
+    similarity_matrix = torch.mm(z, z.t()) / temperature
+    norms = torch.linalg.norm(z, dim=1)
+    norm_matrix = torch.ger(norms, norms)
+    similarity_matrix = similarity_matrix / norm_matrix
+    mask = torch.eye(similarity_matrix.size(0), dtype=torch.bool)
+    loss = 0
+    for k in range(len(logits)):
+        numerator = torch.exp(similarity_matrix[k, k + len(logits)])
+        denominator = torch.sum(torch.exp(similarity_matrix[k, ~mask[k]]))
+        loss += -torch.log(numerator / denominator)
+    return loss
+def clustering_loss(logits, k=10, max_iterations=10):
+    # Step 1: Initialize cluster means
+    indices = torch.randperm(logits.size(0))[:k]
+    cluster_means = logits[indices]
+    prev_assignments = None
+    assignment_history = []
+    iteration = 0
+    while iteration < max_iterations:
+        iteration += 1
+        # Step 2: Assign each data point to the nearest cluster mean
+        distances = torch.cdist(logits, cluster_means, p=2)  # Compute distances between logits and cluster means
+        cluster_assignments = torch.argmin(distances, dim=1)  # Assign each point to the nearest cluster mean
+        # Check for convergence: if assignments do not change, break the loop
+        if prev_assignments is not None and torch.equal(cluster_assignments, prev_assignments):
+            break
+        # Check for cycles: if assignments have been seen before, break the loop
+        if any(torch.equal(cluster_assignments, prev) for prev in assignment_history):
+            break
+        assignment_history.append(cluster_assignments.clone())
+        prev_assignments = cluster_assignments.clone()
+        # Step 3: Update cluster means based on assignments
+        new_cluster_means = torch.zeros_like(cluster_means)
+        for i in range(k):
+            assigned_points = logits[cluster_assignments == i]
+            if assigned_points.size(0) > 0:
+                new_cluster_means[i] = assigned_points.mean(dim=0)
+            else:
+                # If no points are assigned to the cluster, reinitialize the mean randomly
+                new_cluster_means[i] = logits[torch.randint(0, logits.size(0), (1,)).item()]
+        cluster_means = new_cluster_means
+    # Step 4: Compute the clustering loss
+    distances = torch.cdist(logits, cluster_means, p=2)
+    min_distances = torch.min(distances, dim=1)[0]
+    loss = torch.sum(min_distances ** 2)
+    return loss, cluster_means
+def normalize_embeddings(embeddings):
+    return embeddings / embeddings.norm(dim=1, keepdim=True)
+def variance_regularization(embeddings):
+    mean_embedding = embeddings.mean(dim=0)
+    variance = ((embeddings - mean_embedding) ** 2).mean()
+    return variance

root_gnn_base/batched_dataset.py ADDED Viewed

	@@ -0,0 +1,190 @@

+from dgl.dataloading import GraphDataLoader
+from torch.utils.data.sampler import SubsetRandomSampler
+from torch.utils.data.sampler import SequentialSampler
+from dgl.data import DGLDataset
+import torch
+import time
+import os
+import dgl
+from root_gnn_base import utils
+def GetBatchedLoader(dataset, batch_size, mask_fn = None, drop_last=True, **kwargs):
+    if mask_fn == None:
+        mask_fn = lambda x: torch.ones(len(x), dtype=torch.bool)
+    dloader = GraphDataLoader(dataset, sampler=SubsetRandomSampler(torch.arange(len(dataset))[mask_fn(dataset)]), batch_size=batch_size, drop_last=drop_last, num_workers = 0)
+    return dloader
+#Dataset which contains prebatched shuffled graphs. Cannot be saved to disk, else batching info is lost.
+class PreBatchedDataset(DGLDataset):
+    def __init__(self, start_dataset, batch_size, mask_fn = None, drop_last=True, save_to_disk = True, suffix = '', chunks = 1, chunkno = -1, shuffle = True, padding_mode = 'NONE', **kwargs):
+        print(f'Unused kwargs: {kwargs}')
+        self.start_dataset = start_dataset
+        self.start_dataset.load()
+        self.batch_size = batch_size
+        self.chunks = chunks
+        self.chunkno = chunkno
+        self.mask_fn = mask_fn
+        self.drop_last = drop_last
+        self.graphs = []
+        self.label = []
+        self.padding_mode = padding_mode
+        self.save_to_disk = save_to_disk
+        self.shuffle = shuffle
+        self.suffix = suffix
+        self.current_chunk = None
+        self.current_chunk_idx = -1
+        super().__init__(name = start_dataset.name + '_prebatched_padded', save_dir=start_dataset.save_dir)
+    def process(self):
+        first = 0
+        last = len(self.start_dataset)
+        if self.chunks > 1 and self.chunkno >= 0:
+            first = int(self.chunkno / self.chunks * len(self.start_dataset))
+            last = int((self.chunkno + 1) / self.chunks * len(self.start_dataset))
+        print(f'Processing chunk {self.chunkno} of {self.chunks} from {first} to {last} of {len(self.start_dataset)}')
+        mask = torch.logical_and(torch.logical_and(self.mask_fn(self.start_dataset), torch.arange(len(self.start_dataset)) >= first), torch.arange(len(self.start_dataset)) < last)
+        if self.shuffle:
+            dloader = GraphDataLoader(self.start_dataset, sampler=SubsetRandomSampler(torch.arange(len(self.start_dataset))[mask]), batch_size=self.batch_size, drop_last=self.drop_last)
+        else: #Only don't shuffle if we're doing inference. Then we want all of the events anyways?
+            dloader = GraphDataLoader(self.start_dataset, sampler=SequentialSampler(self.start_dataset), batch_size=self.batch_size, drop_last=self.drop_last)
+        self.graphs = []
+        self.labels = []
+        self.tracking = []
+        self.globals = []
+        self.batch_num_nodes = []
+        self.batch_num_edges = []
+        max_edges = 0
+        max_nodes = 0
+        load_batch_start = time.time()
+        for batch, label, tracking, global_feat in dloader:
+            if batch.num_edges() > max_edges:
+                max_edges = batch.num_edges()
+            if batch.num_nodes() > max_nodes:
+                max_nodes = batch.num_nodes()
+            self.graphs.append(batch)
+            self.labels.append(label)
+            self.tracking.append(tracking)
+            self.globals.append(global_feat)
+        load_batch_end = time.time()
+        print(f'Loaded {len(self.graphs)} batches in {load_batch_end - load_batch_start} seconds')
+        if self.padding_mode == 'STEPS':
+            pad_node, pad_edge = utils.pad_size(self.batch_size, max_edges, max_nodes)
+        elif self.padding_mode == 'FIXED':
+            print('Padding to fixed size. This is currently hardcoded.')
+            pad_node = 16000
+            pad_edge = 104000
+        elif self.padding_mode == 'NONE':
+            pad_node = 0
+            pad_edge = 0
+        else:
+            pad_node = 0
+            pad_edge = 0
+        print(f'Max edges: {max_edges}, Max nodes: {max_nodes}, Padding to {pad_edge} edges and {pad_node} nodes')
+        pad_start = time.time()
+        if self.padding_mode == 'NODE':
+            for i in range(len(self.graphs)):
+                unbatched_g = dgl.unbatch(self.graphs[i])
+                max_num_nodes = max(g.number_of_nodes() for g in unbatched_g)
+                self.graphs[i] = utils.pad_batch_num_nodes(self.graphs[i], max_num_nodes)
+                self.batch_num_nodes.append(self.graphs[i].batch_num_nodes())
+                self.batch_num_edges.append(self.graphs[i].batch_num_edges())
+        else:
+            for i in range(len(self.graphs)):
+                self.graphs[i] = utils.pad_batch(self.graphs[i], pad_edge, pad_node)
+                self.batch_num_nodes.append(self.graphs[i].batch_num_nodes())
+                self.batch_num_edges.append(self.graphs[i].batch_num_edges())
+        pad_end = time.time()
+        print(f'Padded {len(self.graphs)} batches in {pad_end - pad_start} seconds')
+    def save(self):
+        if not self.save_to_disk:
+            return
+        graph_path = os.path.join(self.save_dir, f'{self.name}_{self.chunkno}_{self.suffix}.bin')
+        print(f'Saving dataset to {graph_path}')
+        if len(self.graphs) == 0:
+            return
+        dgl.save_graphs(str(graph_path), self.graphs, {'labels': torch.stack(self.labels), 'batch_num_nodes': torch.stack(self.batch_num_nodes), 'batch_num_edges': torch.stack(self.batch_num_edges), 'tracking': torch.stack(self.tracking), 'globals': torch.stack(self.globals)})
+    def has_cache(self):
+        if not self.save_to_disk:
+            return False
+        for ch in range(self.chunks):
+            graph_path = os.path.join(self.save_dir, f'{self.name}_{ch}_{self.suffix}.bin')
+            if not os.path.exists(graph_path):
+                print(f'Cache file {graph_path} does not exist, not loading from cache.')
+                return False
+        return True
+    def load(self):
+        if not self.save_to_disk:
+            return
+        self.graphs = []
+        label_chunks = []
+        tracking_chunks = []
+        global_chunks = []
+        for ch in range(self.chunks):
+            graph_path = os.path.join(self.save_dir, f'{self.name}_{ch}_{self.suffix}.bin')
+            print(f'Loading dataset from {graph_path}')
+            graphs, label_dict = dgl.load_graphs(graph_path)
+            label_chunks.append(label_dict['labels'])
+            tracking_chunks.append(label_dict['tracking'])
+            global_chunks.append(label_dict['globals'])
+            for g, bnn, bne in zip(graphs, label_dict['batch_num_nodes'], label_dict['batch_num_edges']):
+                g.set_batch_num_nodes(bnn)
+                g.set_batch_num_edges(bne)
+            self.graphs.extend(graphs)
+        self.labels = torch.cat(label_chunks)
+        self.tracking = torch.cat(tracking_chunks)
+        self.globals = torch.cat(global_chunks)
+    def __getitem__(self, idx):
+        return self.graphs[idx], self.labels[idx], self.tracking[idx], self.globals[idx]
+    def __len__(self):
+        return len(self.graphs)
+#Dataset which contains prebatched shuffled graphs. Cannot be saved to disk, else batching info is lost.
+class LazyPreBatchedDataset(PreBatchedDataset):
+    def __init__(self, **kwargs):
+        # print(f'Unused kwargs: {kwargs}')
+        self.current_chunk = None
+        self.current_chunk_idx = -10
+        self.label_chunks = []
+        super().__init__(**kwargs)
+    def load(self):
+        if not self.save_to_disk:
+            return
+        self.label_chunks = []
+        for ch in range(self.chunks):
+            graph_path = os.path.join(self.save_dir, f'{self.name}_{ch}_{self.suffix}.bin')
+            print(f'Loading dataset from {graph_path}')
+            label_dict = dgl.data.graph_serialize.load_labels_v2(graph_path)
+            self.label_chunks.append(label_dict)
+    def __getitem__(self, idx):
+        chunk_idx = -1
+        sum = 0
+        ev_idx = -999
+        for i in range(len(self.label_chunks)):
+            count = len(self.label_chunks[i]['labels'])
+            if idx < sum + count:
+                chunk_idx = i
+                ev_idx = idx - sum
+                break
+            sum += count
+        if chunk_idx != self.current_chunk_idx:
+            # print(f"rank {self.rank} getting data from {self.name}_{chunk_idx}_{self.suffix}.bin")
+            self.current_chunk, _ = dgl.load_graphs(os.path.join(self.save_dir, f'{self.name}_{chunk_idx}_{self.suffix}.bin'))
+            self.current_chunk_idx = chunk_idx
+        g = self.current_chunk[ev_idx]
+        g.set_batch_num_nodes(self.label_chunks[chunk_idx]['batch_num_nodes'][ev_idx])
+        g.set_batch_num_edges(self.label_chunks[chunk_idx]['batch_num_edges'][ev_idx])
+        return g, self.label_chunks[chunk_idx]['labels'][ev_idx], self.label_chunks[chunk_idx]['tracking'][ev_idx], self.label_chunks[chunk_idx]['globals'][ev_idx]
+    def __len__(self):
+        l = 0
+        for chunk in self.label_chunks:
+            l += len(chunk['labels'])
+        return l

root_gnn_base/custom_scheduler.py ADDED Viewed

	@@ -0,0 +1,565 @@

+import types
+import math
+import torch
+from torch import inf
+from functools import wraps, partial
+import warnings
+import weakref
+from collections import Counter
+from bisect import bisect_right
+from models import GCN
+### Code from: https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#ReduceLROnPlateau
+Optimizer = torch.optim.Optimizer
+__all__ = ['LambdaLR', 'MultiplicativeLR', 'StepLR', 'MultiStepLR', 'ConstantLR', 'LinearLR',
+           'ExponentialLR', 'SequentialLR', 'CosineAnnealingLR', 'ChainedScheduler', 'ReduceLROnPlateau',
+           'CyclicLR', 'CosineAnnealingWarmRestarts', 'OneCycleLR', 'PolynomialLR', 'LRScheduler']
+EPOCH_DEPRECATION_WARNING = (
+    "The epoch parameter in `scheduler.step()` was not necessary and is being "
+    "deprecated where possible. Please use `scheduler.step()` to step the "
+    "scheduler. During the deprecation, if epoch is different from None, the "
+    "closed form is used instead of the new chainable form, where available. "
+    "Please open an issue if you are unable to replicate your use case: "
+    "https://github.com/pytorch/pytorch/issues/new/choose."
+)
+def update_LR(opt, lr):
+    for param_group in opt.param_groups:
+        param_group['lr'] = lr
+def print_LR(opt):
+    for param_group in opt.param_groups:
+        print(f"LR = {param_group['lr']}")
+def _check_verbose_deprecated_warning(verbose):
+    """Raises a warning when verbose is not the default value."""
+    if verbose != "deprecated":
+        warnings.warn("The verbose parameter is deprecated. Please use get_last_lr() "
+                      "to access the learning rate.", UserWarning)
+        return verbose
+    return False
+class LRScheduler:
+    def __init__(self, optimizer, last_epoch=-1, verbose="deprecated"):
+        # Attach optimizer
+        if not isinstance(optimizer, Optimizer):
+            raise TypeError(f'{type(optimizer).__name__} is not an Optimizer')
+        self.optimizer = optimizer
+        # Initialize epoch and base learning rates
+        if last_epoch == -1:
+            for group in optimizer.param_groups:
+                group.setdefault('initial_lr', group['lr'])
+        else:
+            for i, group in enumerate(optimizer.param_groups):
+                if 'initial_lr' not in group:
+                    raise KeyError("param 'initial_lr' is not specified "
+                                   f"in param_groups[{i}] when resuming an optimizer")
+        self.base_lrs = [group['initial_lr'] for group in optimizer.param_groups]
+        self.last_epoch = last_epoch
+        # Following https://github.com/pytorch/pytorch/issues/20124
+        # We would like to ensure that `lr_scheduler.step()` is called after
+        # `optimizer.step()`
+        def with_counter(method):
+            if getattr(method, '_with_counter', False):
+                # `optimizer.step()` has already been replaced, return.
+                return method
+            # Keep a weak reference to the optimizer instance to prevent
+            # cyclic references.
+            instance_ref = weakref.ref(method.__self__)
+            # Get the unbound method for the same purpose.
+            func = method.__func__
+            cls = instance_ref().__class__
+            del method
+            @wraps(func)
+            def wrapper(*args, **kwargs):
+                instance = instance_ref()
+                instance._step_count += 1
+                wrapped = func.__get__(instance, cls)
+                return wrapped(*args, **kwargs)
+            # Note that the returned function here is no longer a bound method,
+            # so attributes like `__func__` and `__self__` no longer exist.
+            wrapper._with_counter = True
+            return wrapper
+        self.optimizer.step = with_counter(self.optimizer.step)
+        self.verbose = _check_verbose_deprecated_warning(verbose)
+        self._initial_step()
+    def _initial_step(self):
+        """Initialize step counts and performs a step"""
+        self.optimizer._step_count = 0
+        self._step_count = 0
+        self.step()
+    def state_dict(self):
+        """Returns the state of the scheduler as a :class:`dict`.
+        It contains an entry for every variable in self.__dict__ which
+        is not the optimizer.
+        """
+        return {key: value for key, value in self.__dict__.items() if key != 'optimizer'}
+    def load_state_dict(self, state_dict):
+        """Loads the schedulers state.
+        Args:
+            state_dict (dict): scheduler state. Should be an object returned
+                from a call to :meth:`state_dict`.
+        """
+        self.__dict__.update(state_dict)
+    def get_last_lr(self):
+        """ Return last computed learning rate by current scheduler.
+        """
+        return self._last_lr
+    def get_lr(self):
+        # Compute learning rate using chainable form of the scheduler
+        raise NotImplementedError
+    def print_lr(self, is_verbose, group, lr, epoch=None):
+        """Display the current learning rate.
+        """
+        if is_verbose:
+            if epoch is None:
+                print(f'Adjusting learning rate of group {group} to {lr:.4e}.')
+            else:
+                epoch_str = ("%.2f" if isinstance(epoch, float) else
+                             "%.5d") % epoch
+                print(f'Epoch {epoch_str}: adjusting learning rate of group {group} to {lr:.4e}.')
+    def step(self, epoch=None):
+        # Raise a warning if old pattern is detected
+        # https://github.com/pytorch/pytorch/issues/20124
+        if self._step_count == 1:
+            if not hasattr(self.optimizer.step, "_with_counter"):
+                warnings.warn("Seems like `optimizer.step()` has been overridden after learning rate scheduler "
+                              "initialization. Please, make sure to call `optimizer.step()` before "
+                              "`lr_scheduler.step()`. See more details at "
+                              "https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate", UserWarning)
+            # Just check if there were two first lr_scheduler.step() calls before optimizer.step()
+            elif self.optimizer._step_count < 1:
+                warnings.warn("Detected call of `lr_scheduler.step()` before `optimizer.step()`. "
+                              "In PyTorch 1.1.0 and later, you should call them in the opposite order: "
+                              "`optimizer.step()` before `lr_scheduler.step()`.  Failure to do this "
+                              "will result in PyTorch skipping the first value of the learning rate schedule. "
+                              "See more details at "
+                              "https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate", UserWarning)
+        self._step_count += 1
+        with _enable_get_lr_call(self):
+            if epoch is None:
+                self.last_epoch += 1
+                values = self.get_lr()
+            else:
+                warnings.warn(EPOCH_DEPRECATION_WARNING, UserWarning)
+                self.last_epoch = epoch
+                if hasattr(self, "_get_closed_form_lr"):
+                    values = self._get_closed_form_lr()
+                else:
+                    values = self.get_lr()
+        for i, data in enumerate(zip(self.optimizer.param_groups, values)):
+            param_group, lr = data
+            param_group['lr'] = lr
+        self._last_lr = [group['lr'] for group in self.optimizer.param_groups]
+# Including _LRScheduler for backwards compatibility
+# Subclass instead of assign because we want __name__ of _LRScheduler to be _LRScheduler (assigning would make it LRScheduler).
+class _LRScheduler(LRScheduler):
+    pass
+class _enable_get_lr_call:
+    def __init__(self, o):
+        self.o = o
+    def __enter__(self):
+        self.o._get_lr_called_within_step = True
+        return self
+    def __exit__(self, type, value, traceback):
+        self.o._get_lr_called_within_step = False
+class Dynamic_LR(LRScheduler):
+    """Reduce learning rate when a metric has stopped improving.
+    Models often benefit from reducing the learning rate by a factor
+    of 2-10 once learning stagnates. This scheduler reads a metrics
+    quantity and if no improvement is seen for a 'patience' number
+    of epochs, the learning rate is reduced.
+    Args:
+        optimizer (Optimizer): Wrapped optimizer.
+        mode (str): One of `min`, `max`. In `min` mode, lr will
+            be reduced when the quantity monitored has stopped
+            decreasing; in `max` mode it will be reduced when the
+            quantity monitored has stopped increasing. Default: 'min'.
+        factor (float): Factor by which the learning rate will be
+            reduced. new_lr = lr * factor. Default: 0.1.
+        patience (int): Number of epochs with no improvement after
+            which learning rate will be reduced. For example, if
+            `patience = 2`, then we will ignore the first 2 epochs
+            with no improvement, and will only decrease the LR after the
+            3rd epoch if the loss still hasn't improved then.
+            Default: 10.
+        threshold (float): Threshold for measuring the new optimum,
+            to only focus on significant changes. Default: 1e-4.
+        threshold_mode (str): One of `rel`, `abs`. In `rel` mode,
+            dynamic_threshold = best * ( 1 + threshold ) in 'max'
+            mode or best * ( 1 - threshold ) in `min` mode.
+            In `abs` mode, dynamic_threshold = best + threshold in
+            `max` mode or best - threshold in `min` mode. Default: 'rel'.
+        cooldown (int): Number of epochs to wait before resuming
+            normal operation after lr has been reduced. Default: 0.
+        min_lr (float or list): A scalar or a list of scalars. A
+            lower bound on the learning rate of all param groups
+            or each group respectively. Default: 0.
+        eps (float): Minimal decay applied to lr. If the difference
+            between new and old lr is smaller than eps, the update is
+            ignored. Default: 1e-8.
+        verbose (bool): If ``True``, prints a message to stdout for
+            each update. Default: ``False``.
+            .. deprecated:: 2.2
+                ``verbose`` is deprecated. Please use ``get_last_lr()`` to access the
+                learning rate.
+    Example:
+        >>> # xdoctest: +SKIP
+        >>> optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
+        >>> scheduler = ReduceLROnPlateau(optimizer, 'min')
+        >>> for epoch in range(10):
+        >>>     train(...)
+        >>>     val_loss = validate(...)
+        >>>     # Note that step should be called after validate()
+        >>>     scheduler.step(val_loss)
+    """
+    def __init__(self, optimizer, mode = 'max', factor=0.1, patience=10,
+                 plateau_var = "test_auc",
+                 threshold=1e-4, threshold_mode='rel', cooldown=0,
+                 min_lr=0, max_lr=1e-4, eps=1e-8, verbose=False):
+        """
+        if factor >= 1.0:
+            raise ValueError('Factor should be < 1.0.')
+        """
+        self.factor = factor
+        # Attach optimizer
+        if not isinstance(optimizer, Optimizer):
+            raise TypeError(f'{type(optimizer).__name__} is not an Optimizer')
+        self.optimizer = optimizer
+        if isinstance(min_lr, (list, tuple)):
+            if len(min_lr) != len(optimizer.param_groups):
+                raise ValueError(f"expected {len(optimizer.param_groups)} min_lrs, got {len(min_lr)}")
+            self.min_lrs = list(min_lr)
+            self.max_lrs = list(max_lr)
+        else:
+            self.min_lrs = [min_lr] * len(optimizer.param_groups)
+            self.max_lrs = [max_lr] * len(optimizer.param_groups)
+        self.patience = patience
+        self.plateau_var = plateau_var
+        self.verbose = verbose
+        self.cooldown = cooldown
+        self.cooldown_counter = 0
+        self.mode = mode
+        self.threshold = threshold
+        self.threshold_mode = threshold_mode
+        self.best = None
+        self.num_bad_epochs = None
+        self.mode_worse = None  # the worse value for the chosen mode
+        self.eps = eps
+        self.last_epoch = 0
+        self._last_lr = [group['lr'] for group in self.optimizer.param_groups]
+        self._init_is_better(mode=mode, threshold=threshold,
+                             threshold_mode=threshold_mode)
+        self._reset()
+    def _reset(self):
+        """Resets num_bad_epochs counter and cooldown counter."""
+        self.best = self.mode_worse
+        self.cooldown_counter = 0
+        self.num_bad_epochs = 0
+    def step(self, model, metrics, epoch=None):
+        # convert `metrics` to float, in case it's a zero-dim Tensor
+        current = float(metrics[self.plateau_var])
+        if epoch is None:
+            epoch = self.last_epoch + 1
+        else:
+            warnings.warn(EPOCH_DEPRECATION_WARNING, UserWarning)
+        self.last_epoch = epoch
+        if self.is_better(current, self.best):
+            if(self.verbose):
+                print("Model is improving!")
+            self.best = current
+            self.num_bad_epochs = 0
+        else:
+            if(self.verbose):
+                print(f"Model is not improving :( best = {self.best}, current = {current}")
+            self.num_bad_epochs += 1
+        if self.in_cooldown:
+            self.cooldown_counter -= 1
+            self.num_bad_epochs = 0  # ignore any bad epochs in cooldown
+        if self.num_bad_epochs > self.patience:
+            self._reduce_lr(epoch)
+            self.cooldown_counter = self.cooldown
+            self.num_bad_epochs = 0
+        self._last_lr = [group['lr'] for group in self.optimizer.param_groups]
+    def _reduce_lr(self, epoch):
+        print("Adjusting Learning Rate")
+        self._reset()
+        for i, param_group in enumerate(self.optimizer.param_groups):
+            old_lr = float(param_group['lr'])
+            new_lr = max(old_lr * self.factor, self.min_lrs[i])
+            new_lr = min(new_lr, self.max_lrs[i])
+            if abs(old_lr - new_lr) > self.eps:
+                param_group['lr'] = new_lr
+    def get_last_lr(self):
+        return self._last_lr
+    @property
+    def in_cooldown(self):
+        return self.cooldown_counter > 0
+    def is_better(self, a, best):
+        if self.mode == 'min' and self.threshold_mode == 'rel':
+            rel_epsilon = 1. - self.threshold
+            return a < best * rel_epsilon
+        elif self.mode == 'min' and self.threshold_mode == 'abs':
+            return a < best - self.threshold
+        elif self.mode == 'max' and self.threshold_mode == 'rel':
+            rel_epsilon = self.threshold + 1.
+            return a > best * rel_epsilon
+        else:  # mode == 'max' and epsilon_mode == 'abs':
+            return a > best + self.threshold
+    def _init_is_better(self, mode, threshold, threshold_mode):
+        if mode not in {'min', 'max'}:
+            raise ValueError('mode ' + mode + ' is unknown!')
+        if threshold_mode not in {'rel', 'abs'}:
+            raise ValueError('threshold mode ' + threshold_mode + ' is unknown!')
+        if mode == 'min':
+            self.mode_worse = inf
+        else:  # mode == 'max':
+            self.mode_worse = -inf
+        self.mode = mode
+        self.threshold = threshold
+        self.threshold_mode = threshold_mode
+    def state_dict(self):
+        return {key: value for key, value in self.__dict__.items() if key != 'optimizer'}
+    def load_state_dict(self, state_dict):
+        self.__dict__.update(state_dict)
+        self._init_is_better(mode=self.mode, threshold=self.threshold, threshold_mode=self.threshold_mode)
+class Action_On_Plateau():
+    def __init__(self, mode = 'max', patience=10,
+                 plateau_var = "test_auc",
+                 threshold=1e-4, threshold_mode='rel', cooldown=0,
+                 eps=1e-8, verbose=False):
+        self.patience = patience
+        self.plateau_var = plateau_var
+        self.verbose = verbose
+        self.cooldown = cooldown
+        self.cooldown_counter = 0
+        self.mode = mode
+        self.threshold = threshold
+        self.threshold_mode = threshold_mode
+        self.best = None
+        self.num_bad_epochs = None
+        self.mode_worse = None  # the worse value for the chosen mode
+        self.eps = eps
+        self.last_epoch = 0
+        self._init_is_better(mode=mode, threshold=threshold,
+                             threshold_mode=threshold_mode)
+        self._reset()
+    def _reset(self):
+        """Resets num_bad_epochs counter and cooldown counter."""
+        self.best = self.mode_worse
+        self.cooldown_counter = 0
+        self.num_bad_epochs = 0
+    def step(self, model, metrics, epoch=None):
+        # convert `metrics` to float, in case it's a zero-dim Tensor
+        current = float(metrics[self.plateau_var])
+        if epoch is None:
+            epoch = self.last_epoch + 1
+        else:
+            warnings.warn(EPOCH_DEPRECATION_WARNING, UserWarning)
+        self.last_epoch = epoch
+        if self.is_better(current, self.best):
+            if(self.verbose):
+                print("Model is improving!")
+            self.best = current
+            self.num_bad_epochs = 0
+        else:
+            if(self.verbose):
+                print(f"Model is not improving :( best = {self.best}, current = {current}")
+            self.num_bad_epochs += 1
+        if self.in_cooldown:
+            self.cooldown_counter -= 1
+            self.num_bad_epochs = 0  # ignore any bad epochs in cooldown
+        if self.num_bad_epochs > self.patience:
+            self.action(model, metrics, epoch)
+    def action(self, model, metrics, epoch=None):
+        if(self.verbose):
+            print("Doing my action")
+    @property
+    def in_cooldown(self):
+        return self.cooldown_counter > 0
+    def is_better(self, a, best):
+        if self.mode == 'min' and self.threshold_mode == 'rel':
+            rel_epsilon = 1. - self.threshold
+            return a < best * rel_epsilon
+        elif self.mode == 'min' and self.threshold_mode == 'abs':
+            return a < best - self.threshold
+        elif self.mode == 'max' and self.threshold_mode == 'rel':
+            rel_epsilon = self.threshold + 1.
+            return a > best * rel_epsilon
+        else:  # mode == 'max' and epsilon_mode == 'abs':
+            return a > best + self.threshold
+    def _init_is_better(self, mode, threshold, threshold_mode):
+        if mode not in {'min', 'max'}:
+            raise ValueError('mode ' + mode + ' is unknown!')
+        if threshold_mode not in {'rel', 'abs'}:
+            raise ValueError('threshold mode ' + threshold_mode + ' is unknown!')
+        if mode == 'min':
+            self.mode_worse = inf
+        else:  # mode == 'max':
+            self.mode_worse = -inf
+        self.mode = mode
+        self.threshold = threshold
+        self.threshold_mode = threshold_mode
+class Partial_Reset(Action_On_Plateau):
+    def __init__(self, mode='max', patience=10, plateau_var="test_auc",
+                        threshold=0.0001, threshold_mode='rel', cooldown=0,
+                        eps=1e-8, verbose=False):
+        super().__init__(mode, patience, plateau_var, threshold,
+                         threshold_mode, cooldown, eps, verbose)
+    def action(self, model, metrics, epoch=None):
+        print("Partial Reset!!")
+        GCN.partial_reset(model)
+        self._reset()
+        self.cooldown_counter = self.cooldown
+        self.num_bad_epochs = 0
+class Full_Reset(Action_On_Plateau):
+    def __init__(self, mode='max', patience=10, plateau_var="test_auc",
+                        threshold=0.0001, threshold_mode='rel', cooldown=0,
+                        eps=1e-8, verbose=False):
+        super().__init__(mode, patience, plateau_var, threshold,
+                         threshold_mode, cooldown, eps, verbose)
+    def action(self, model, metrics, epoch=None):
+        print("Full Reset!!")
+        GCN.full_reset(model)
+        self._reset()
+        self.cooldown_counter = self.cooldown
+        self.num_bad_epochs = 0
+class Dynamic_LR_AND_Partial_Reset():
+    def __init__(self, optimizer, mode = 'max', factor=0.1, patience=10,
+                plateau_var = "test_auc", reset_patience=None, reset_plateau_var=None,
+                threshold=1e-4, threshold_mode='rel', cooldown=0,
+                min_lr=0, max_lr=1e-4, eps=1e-8, verbose=False):
+        if (reset_patience == None):
+            reset_patience = patience
+        if(reset_plateau_var == None):
+            reset_plateau_var = plateau_var
+        self.dynamic_lr = Dynamic_LR(optimizer, mode=mode, factor=factor, patience = patience,
+                        plateau_var=plateau_var, threshold=threshold, threshold_mode =threshold_mode,
+                        cooldown=cooldown, min_lr=min_lr, max_lr=max_lr, eps=eps, verbose=verbose)
+        self.partial_reset = Partial_Reset(mode=mode, patience=reset_patience, plateau_var=reset_plateau_var,
+                        threshold=threshold, threshold_mode=threshold_mode, cooldown=cooldown,
+                        eps=eps)
+    def step(self, model, metrics, epoch=None):
+        self.dynamic_lr.step(model=model, metrics=metrics, epoch=epoch)
+        self.partial_reset.step(model=model, metrics=metrics, epoch=epoch)
+class Dynamic_LR_AND_Full_Reset():
+    def __init__(self, optimizer, mode = 'max', factor=0.1, patience=10,
+                plateau_var = "test_auc", reset_patience=None, reset_plateau_var=None,
+                threshold=1e-4, threshold_mode='rel', cooldown=0,
+                min_lr=0, max_lr=1e-4, eps=1e-8, verbose=False):
+        if (reset_patience == None):
+            reset_patience = patience
+        if(reset_plateau_var == None):
+            reset_plateau_var = plateau_var
+        self.dynamic_lr = Dynamic_LR(optimizer, mode=mode, factor=factor, patience = patience,
+                        plateau_var=plateau_var, threshold=threshold, threshold_mode =threshold_mode,
+                        cooldown=cooldown, min_lr=min_lr, max_lr=max_lr, eps=eps, verbose=verbose)
+        self.full_reset = Full_Reset(mode=mode, patience=reset_patience, plateau_var=reset_plateau_var,
+                        threshold=threshold, threshold_mode=threshold_mode, cooldown=cooldown,
+                        eps=eps)
+    def step(self, model, metrics, epoch=None):
+        self.dynamic_lr.step(model=model, metrics=metrics, epoch=epoch)
+        self.full_reset.step(model=model, metrics=metrics, epoch=epoch)

root_gnn_base/dataset.py ADDED Viewed

	@@ -0,0 +1,685 @@

+from dgl.data import DGLDataset
+import dgl
+import ROOT
+import torch
+import os
+import glob
+import time
+import numpy as np
+from root_gnn_base import utils
+def node_features_from_tree(ch, node_branch_names, node_branch_types, node_feature_scales):
+    lengths = []
+    for branch, node_type in zip(node_branch_names[0], node_branch_types):
+        if node_type == 'single':
+            lengths.append(1)
+        elif node_type == 'vector':
+            lengths.append(len(getattr(ch, branch)))
+        else:
+            print('Unknown node branch type: {}'.format(node_type))
+    features = []
+    for node_feat in node_branch_names:
+        if node_feat == 'CALC_E':
+            features.append(features[0]*torch.cosh(features[1]))
+            continue
+        elif node_feat == 'NODE_TYPE':
+            feat = []
+            for i, length in enumerate(lengths):
+                feat.extend([i,]*length)
+            features.append(torch.tensor(feat))
+            continue
+        feat = []
+        itype = 0
+        for length, branch, node_type in zip(lengths, node_feat, node_branch_types):
+            if isinstance(branch, (int, float, complex)):
+                feat.extend([branch,]*length)
+            elif branch == 'CALC_E':
+                this_type_starts_at = sum(lengths[:itype])
+                this_type_ends_at = sum(lengths[:itype+1])
+                feat.extend(features[0][this_type_starts_at:this_type_ends_at]*torch.cosh(features[1][this_type_starts_at:this_type_ends_at]))
+            elif node_type == 'single':
+                feat.append(getattr(ch, branch))
+            elif node_type == 'vector':
+                feat.extend(getattr(ch, branch))
+            itype += 1
+        features.append(torch.tensor(feat))
+    return torch.stack(features, dim=1) * node_feature_scales, lengths
+def full_connected_graph(n_nodes, self_loops=True):
+    senders = []
+    receivers = []
+    senders = np.arange(n_nodes*n_nodes) // n_nodes
+    receivers = np.arange(n_nodes*n_nodes) % n_nodes
+    if not self_loops and n_nodes > 1:
+        mask = senders != receivers
+        senders = senders[mask]
+        receivers = receivers[mask]
+    return dgl.graph((senders, receivers))
+def check_selection(ch, selection):
+    var, cut, op = selection
+    if op == '>':
+        return getattr(ch, var) > cut
+    elif op == '<':
+        return getattr(ch, var) < cut
+    elif op == '==':
+        return getattr(ch, var) == cut
+def check_selections(ch, selections):
+    for selection in selections:
+        if not check_selection(ch, selection):
+            return False
+    return True
+#Base dataset class for making graphs from ROOT ntuples.
+class RootDataset(DGLDataset):
+    def __init__(self, name=None, raw_dir=None, save_dir=None, label=1, file_names = '*.root', node_branch_names=None, node_branch_types=None, node_feature_scales=None,
+                 selections=[], save=True, tree_name = 'nominal_Loose', fold_var = 'eventNumber', weight_var = None, chunks = 1, process_chunks = None, global_features = [], tracking_info = [], **kwargs):
+        print(f'Unused args while creating RootDataset: {kwargs}')
+        self.label = label
+        self.counts = []
+        self.selections = selections
+        self.save_to_disk = save
+        self.file_names = file_names
+        self.node_branch_names = node_branch_names
+        self.node_branch_types = node_branch_types
+        self.node_feature_scales = torch.tensor([float(sf) for sf in node_feature_scales])
+        self.tree_name = tree_name
+        self.fold_var = fold_var
+        self.tracking_info = tracking_info
+        self.tracking_info.insert(0, fold_var)
+        if weight_var == None:
+            weight_var = 1
+        self.tracking_info.insert(1, weight_var)
+        self.global_features = global_features
+        self.chunks = chunks
+        self.process_chunks = process_chunks
+        if self.process_chunks is None:
+            self.process_chunks = [i for i in range(self.chunks)]
+        self.times = [0, 0]
+        super().__init__(name=name, raw_dir=raw_dir, save_dir=save_dir)
+    def get_list_of_branches(self):
+        branches = []
+        for feat in self.node_branch_names:
+            if isinstance(feat, list):
+                for branch in feat:
+                    if branch == 'CALC_E':
+                        continue
+                    if isinstance(branch, str):
+                        branches.append(branch)
+        for feat in self.global_features:
+            if isinstance(feat, str):
+                branches.append(feat)
+        for feat in self.tracking_info:
+            if isinstance(feat, str):
+                branches.append(feat)
+        for selection in self.selections:
+            branches.append(selection[0])
+        return branches
+    def make_graph(self, ch):
+        t1 = time.time()
+        features, _ = node_features_from_tree(ch, self.node_branch_names, self.node_branch_types, self.node_feature_scales)
+        features = features[features[:,0] != 0]
+        t2 = time.time()
+        g = full_connected_graph(features.shape[0], self_loops=False)
+        g.ndata['features'] = features
+        t3 = time.time()
+        self.times[0] += t2 - t1
+        self.times[1] += t3 - t2
+        return g
+    def process(self):
+        times = [0, 0, 0]
+        oldtime = time.time()
+        if isinstance(self.file_names, str):
+            self.files = glob.glob(os.path.join(self.raw_dir, self.file_names))
+        else:
+            self.files = []
+            for file_name in self.file_names:
+                self.files.extend(glob.glob(os.path.join(self.raw_dir, file_name)))
+        self.chain = ROOT.TChain(self.tree_name)
+        if len(self.files) == 0:
+            print('No files found in {}'.format(os.path.join(self.raw_dir, self.file_names)))
+        for file in self.files:
+            utils.set_timeout(60*2)
+            self.chain.Add(file)
+            utils.unset_timeout()
+        branches = self.get_list_of_branches()
+        self.chain.SetBranchStatus('*', 0)
+        for branch in branches:
+            self.chain.SetBranchStatus(branch, 1)
+        newtime = time.time()
+        times[0] += newtime - oldtime
+        chunks = np.array_split(np.arange(self.chain.GetEntries()), self.chunks)
+        chunks = [chunk for i, chunk in enumerate(chunks) if i in self.process_chunks]
+        self.graph_chunks = []
+        self.label_chunks = []
+        self.tracking_chunks = []
+        self.global_chunks = []
+        chunk_id = -1
+        for chunk in chunks:
+            chunk_id += 1
+            graphs = []
+            labels = []
+            tracking = []
+            globals = []
+            for ientry in chunk:
+                if (ientry % 10000 == 0):
+                    print('Processing event {}/{}'.format(ientry, self.chain.GetEntries()), flush=True)
+                self.chain.GetEntry(ientry)
+                passed = True
+                for selection in self.selections:
+                    if not check_selection(self.chain, selection):
+                        passed = False
+                        continue
+                oldtime = newtime
+                newtime = time.time()
+                times[1] += newtime - oldtime
+                if passed:
+                    graphs.append(self.make_graph(self.chain))
+                    labels.append( self.label )
+                    tracking.append(torch.zeros(len(self.tracking_info), dtype=torch.double))
+                    globals.append(torch.zeros(len(self.global_features)))
+                    for i_ti, tr_branch in enumerate(self.tracking_info):
+                        if isinstance(tr_branch, str):
+                            tracking[-1][i_ti] = getattr(self.chain, tr_branch)
+                        else:
+                            tracking[-1][i_ti] = tr_branch
+                    for i_gl, gl_branch in enumerate(self.global_features):
+                        globals[-1][i_gl] = getattr(self.chain, gl_branch)
+                oldtime = newtime
+                newtime = time.time()
+                times[2] += newtime - oldtime
+            labels = torch.tensor(labels)
+            tracking = torch.stack(tracking)
+            globals = torch.stack(globals)
+            # self.labels = labels
+            # self.tracking = tracking
+            # self.global_features = globals
+            # self.graphs = graphs
+            self.save_chunk(chunk_id, graphs, labels, tracking, globals)
+        return
+        self.graphs = self.graph_chunks[0]
+        for chunk in self.graph_chunks[1:]:
+            self.graphs += chunk
+        self.labels = torch.cat(self.label_chunks)
+        self.tracking = torch.cat(self.tracking_chunks)
+        self.global_features = torch.cat(self.global_chunks)
+        print('Time spent: Creating TChain: {}s, Getting Entries and Selection: {}s, Graph Creation: {}s'.format(*times))
+        print('Time spent in node_features_from_tree: {}s, full_connected_graph: {}s'.format(*self.times))
+    def save(self):
+        """save the graph list and the labels"""
+        if not self.save_to_disk:
+            return
+        graph_path = os.path.join(self.save_dir, self.name + '.bin')
+        if self.chunks == 1:
+            # print(len(self.graphs))
+            # print(len(self.labels))
+            # print(len(self.tracking))
+            # print(len(self.globals))
+            print(f'Saving dataset to {os.path.join(self.save_dir, self.name + ".bin")}')
+            dgl.save_graphs(str(graph_path), self.graphs, {'labels': torch.tensor(self.labels), 'tracking': torch.tensor(self.tracking), 'global': torch.tensor(self.global_features)})
+        else:
+            print(len(self.graph_chunks))
+            for i in range(len(self.process_chunks)):
+                print(f'Saving dataset to {os.path.join(self.save_dir, self.name + f"_{self.process_chunks[i]}.bin")}')
+                dgl.save_graphs(str(graph_path).replace('.bin', f'_{self.process_chunks[i]}.bin'), self.graph_chunks[i], {'labels': self.label_chunks[i], 'tracking': self.tracking_chunks[i], 'global': self.global_chunks[i]})
+    def save_chunk(self, chunk_id, graphs, labels, tracking, globals):
+        if not self.save_to_disk:
+            return
+        graph_path = os.path.join(self.save_dir, self.name + '.bin')
+        print(f'Saving dataset to {os.path.join(self.save_dir, self.name + f"_{self.process_chunks[chunk_id]}.bin")}')
+        dgl.save_graphs(str(graph_path).replace('.bin', f'_{self.process_chunks[chunk_id]}.bin'), graphs, {'labels': labels, 'tracking': tracking, 'global': globals})
+    def has_cache(self):
+        print(f'Checking for cache of {self.name}')
+        if not self.save_to_disk:
+            print('Skipping load.')
+            return False
+        if self.chunks == 1:
+            graph_path = os.path.join(self.save_dir, self.name + '.bin')
+            return os.path.exists(graph_path)
+        else:
+            for i in range(len(self.process_chunks)):
+                graph_path = os.path.join(self.save_dir, self.name + f'_{self.process_chunks[i]}.bin')
+                if not os.path.exists(graph_path):
+                    print(f'File {graph_path} does not exist, processing.')
+                    return False
+            return True
+    def load(self):
+        if self.chunks == 1:
+            print(f'Loading dataset from {os.path.join(self.save_dir, self.name + ".bin")}')
+            graphs, label_dict = dgl.load_graphs(os.path.join(self.save_dir, self.name + '.bin'))
+            self.graphs = graphs
+            self.labels = label_dict['labels']
+            self.tracking = label_dict['tracking']
+            self.global_features = label_dict['global']
+        else:
+            self.graphs = []
+            self.labels = []
+            self.tracking = []
+            self.global_features = []
+            for i in range(self.chunks):
+                try:
+                    print(f'Loading dataset from {os.path.join(self.save_dir, self.name + f"_{self.process_chunks[i]}.bin")}')
+                    graphs, label = dgl.load_graphs(os.path.join(self.save_dir, self.name + f'_{self.process_chunks[i]}.bin'))
+                    self.graphs.extend(graphs)
+                    self.labels.append(label['labels'])
+                    self.tracking.append(label['tracking'])
+                    self.global_features.append(label['global'])
+                except Exception as e:
+                    print(e)
+            self.labels = torch.cat(self.labels)
+            self.tracking = torch.cat(self.tracking)
+            self.global_features = torch.cat(self.global_features)
+    def __getitem__(self, idx):
+        return self.graphs[idx], self.labels[idx], self.tracking[idx], self.global_features[idx]
+    def __len__(self):
+        return len(self.graphs)
+#Dataset with edge features added (deta, dphi, dR)
+class EdgeDataset(RootDataset):
+    def make_graph(self, ch):
+        g = super().make_graph(ch)
+        u, v = g.edges()
+        deta = g.ndata['features'][u, 1] - g.ndata['features'][v, 1]
+        dphi = g.ndata['features'][u, 2] - g.ndata['features'][v, 2]
+        dphi = torch.where(dphi > np.pi, dphi - 2*np.pi, dphi)
+        dphi = torch.where(dphi < -np.pi, dphi + 2*np.pi, dphi)
+        dR   = torch.sqrt(deta**2 + dphi**2)
+        g.edata['features'] = torch.stack([deta, dphi, dR], dim=1)
+        return g
+class tHbbEdgeDataset(RootDataset):
+    def __init__(self, exclude_branches=None, **kwargs):
+        self.exclude_branches = exclude_branches
+        super().__init__(**kwargs)
+    def get_list_of_branches(self):
+        br = super().get_list_of_branches()
+        for sector in self.exclude_branches:
+            if sector == None:
+                continue
+            for excl in sector:
+                if type(excl) == str:
+                    br.append(excl)
+        return br
+    def make_graph(self, ch):
+        features, lengths = node_features_from_tree(ch, self.node_branch_names, self.node_branch_types, self.node_feature_scales)
+        include_mask = torch.ones(features.shape[0], dtype=torch.bool)
+        node_idx = 0
+        for sector, length in zip(self.exclude_branches, lengths):
+            if sector == None:
+                node_idx += length
+                continue
+            for excl in sector:
+                if type(excl) == int:
+                    include_mask[excl + node_idx] = False
+                elif type(excl) == str:
+                    include_mask[getattr(self.chain, excl) + node_idx] = False
+        g = full_connected_graph(features[include_mask].shape[0], self_loops=False)
+        g.ndata['features'] = features[include_mask]
+        u, v = g.edges()
+        deta = g.ndata['features'][u, 1] - g.ndata['features'][v, 1]
+        dphi = g.ndata['features'][u, 2] - g.ndata['features'][v, 2]
+        dphi = torch.where(dphi > np.pi, dphi - 2*np.pi, dphi)
+        dphi = torch.where(dphi < -np.pi, dphi + 2*np.pi, dphi)
+        dR   = torch.sqrt(deta**2 + dphi**2)
+        g.edata['features'] = torch.stack([deta, dphi, dR], dim=1)
+        return g
+class LazyDataset(EdgeDataset):
+    def __init__(self, buffer_size = 2, **kwargs):
+        self.buffer = [None,] * buffer_size
+        self.buffer_ptr = 0
+        self.get_item_calls = 0
+        self.buffer_indices = [-1,] * buffer_size
+        super().__init__(**kwargs)
+    def __getitem__(self, idx):
+        self.get_item_calls += 1
+        chunk_idx = -1
+        sum = 0
+        ev_idx = -999
+        for i, count in enumerate(self.counts):
+            sum += count
+            if idx < sum:
+                chunk_idx = i
+                ev_idx = idx - sum + count
+                break
+        buf_idx = self.buffer_get(chunk_idx)
+        if ev_idx >= len(self.buffer[buf_idx][0]):
+            print(f'Getting event {ev_idx} from chunk {chunk_idx} from buffer {buf_idx}. Calls: {self.get_item_calls}')
+            print(len(self.buffer))
+            print(self.counts)
+            print(len(self.buffer[buf_idx][0]))
+        return self.buffer[buf_idx][0][ev_idx], self.buffer[buf_idx][1]['labels'][ev_idx], self.buffer[buf_idx][1]['tracking'][ev_idx], self.buffer[buf_idx][1]['global'][ev_idx]
+    def buffer_get(self, buffer_idx):
+        if buffer_idx in self.buffer_indices:
+            for i in range(len(self.buffer)):
+                if self.buffer_indices[i] == buffer_idx:
+                    return i
+        else:
+            print(f'Loading dataset from {os.path.join(self.save_dir, self.name + f"_{buffer_idx}.bin")}', flush=True)
+            self.buffer_ptr = (self.buffer_ptr + 1) % len(self.buffer)
+            self.buffer[self.buffer_ptr] = dgl.load_graphs(os.path.join(self.save_dir, self.name + f'_{buffer_idx}.bin'))
+            self.buffer_indices[self.buffer_ptr] = buffer_idx
+            return self.buffer_ptr
+    def load(self):
+        self.counts = []
+        self.tracking = []
+        try:
+            for i in range(self.chunks):
+                print(f'Loading dataset from {os.path.join(self.save_dir, self.name + f"_{self.process_chunks[i]}.bin")}')
+                l = dgl.data.graph_serialize.load_labels_v2(os.path.join(self.save_dir, self.name + f'_{self.process_chunks[i]}.bin'))
+                self.counts.append(len(l['tracking']))
+                self.tracking.append(l['tracking'])
+            self.tracking = torch.cat(self.tracking)
+        except Exception as e:
+            print(e)
+    def __len__(self):
+        return sum(self.counts)
+class MultiLabelDataset(EdgeDataset):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+    def get_list_of_branches(self):
+        br =  super().get_list_of_branches()
+        for l in self.label:
+            if isinstance(l, str):
+                br.append(l)
+            if isinstance(l, dict):
+                br.append(l['branch'])
+        return br
+    def get_label(self, ch):
+        label = []
+        for l in self.label:
+            if isinstance(l, str):
+                label.append((getattr(ch, l)))
+            if isinstance(l, dict):
+                label.append(getattr(ch, l['branch'])*float(l['scale']))
+            if isinstance(l, float) or isinstance(l, int):
+                label.append(l)
+        return torch.tensor(label)
+    def process(self):
+        times = [0, 0, 0]
+        oldtime = time.time()
+        if isinstance(self.file_names, str):
+            self.files = glob.glob(os.path.join(self.raw_dir, self.file_names))
+        else:
+            self.files = []
+            for file_name in self.file_names:
+                self.files.extend(glob.glob(os.path.join(self.raw_dir, file_name)))
+        self.chain = ROOT.TChain(self.tree_name)
+        if len(self.files) == 0:
+            print('No files found in {}'.format(os.path.join(self.raw_dir, self.file_names)))
+        for file in self.files:
+            utils.set_timeout(60*2)
+            self.chain.Add(file)
+            utils.unset_timeout()
+        branches = self.get_list_of_branches()
+        self.chain.SetBranchStatus('*', 0)
+        for branch in branches:
+            self.chain.SetBranchStatus(branch, 1)
+        newtime = time.time()
+        times[0] += newtime - oldtime
+        chunks = np.array_split(np.arange(self.chain.GetEntries()), self.chunks)
+        chunks = [chunk for i, chunk in enumerate(chunks) if i in self.process_chunks]
+        self.graph_chunks = []
+        self.label_chunks = []
+        self.tracking_chunks = []
+        self.global_chunks = []
+        chunk_id = -1
+        for chunk in chunks:
+            chunk_id += 1
+            graphs = []
+            labels = []
+            tracking = []
+            globals = []
+            for ientry in chunk:
+                if (ientry % 10000 == 0):
+                    print('Processing event {}/{}'.format(ientry, self.chain.GetEntries()), flush=True)
+                self.chain.GetEntry(ientry)
+                passed = True
+                for selection in self.selections:
+                    if not check_selection(self.chain, selection):
+                        passed = False
+                        continue
+                oldtime = newtime
+                newtime = time.time()
+                times[1] += newtime - oldtime
+                if passed:
+                    graphs.append(self.make_graph(self.chain))
+                    labels.append(self.get_label(self.chain))
+                    tracking.append(torch.zeros(len(self.tracking_info), dtype=torch.double))
+                    globals.append(torch.zeros(len(self.global_features)))
+                    for i_ti, tr_branch in enumerate(self.tracking_info):
+                        if isinstance(tr_branch, str):
+                            tracking[-1][i_ti] = getattr(self.chain, tr_branch)
+                        else:
+                            tracking[-1][i_ti] = tr_branch
+                    for i_gl, gl_branch in enumerate(self.global_features):
+                        globals[-1][i_gl] = getattr(self.chain, gl_branch)
+                oldtime = newtime
+                newtime = time.time()
+                times[2] += newtime - oldtime
+            labels = torch.stack(labels)
+            self.save_chunk(chunk_id, graphs, labels, torch.stack(tracking), torch.stack(globals))
+            # self.graph_chunks.append(graphs)
+            # self.label_chunks.append(labels)
+            # self.tracking_chunks.append(torch.stack(tracking))
+            # self.global_chunks.append(torch.stack(globals))
+            # self.counts.append(len(graphs))
+        return
+        self.graphs = self.graph_chunks[0]
+        for chunk in self.graph_chunks[1:]:
+            self.graphs += chunk
+        self.labels = torch.cat(self.label_chunks)
+        self.tracking = torch.cat(self.tracking_chunks)
+        self.global_features = torch.cat(self.global_chunks)
+        print('Time spent: Creating TChain: {}s, Getting Entries and Selection: {}s, Graph Creation: {}s'.format(*times))
+        print('Time spent in node_features_from_tree: {}s, full_connected_graph: {}s'.format(*self.times))
+class LazyMultiLabelDataset(MultiLabelDataset, LazyDataset):
+    def __init__(self, buffer_size = 2, **kwargs):
+        LazyDataset.__init__(self, buffer_size=buffer_size, **kwargs)
+class MultiLabeltHbbDataset(MultiLabelDataset, tHbbEdgeDataset):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+    def get_list_of_branches(self):
+        br =  super().get_list_of_branches()
+        for sector in self.exclude_branches:
+            if sector == None:
+                continue
+            for excl in sector:
+                if type(excl) == str:
+                    br.append(excl)
+        return br
+class AugmentedDataset(RootDataset):
+    def __init__(self, seed = 2, feature_index = None, node_mapping = None, **kwargs):
+        self.seed = seed
+        np.random.seed(seed)
+        if(feature_index == None):
+            self.feature_index = {"pt": 0, "eta": 1, "phi": 2, "energy": 3, "btag": 4, "charge": 5, "node_type": 6}
+        if (node_mapping == None):
+            self.node_mapping = {"jet": 0, "ele": 1, "mu": 2, "ph": 3, "MET": 4}
+        super().__init__(**kwargs)
+    def detector_noise(self, node_features):
+        noise = np.zeros_like(node_features)
+        node_types = node_features[:, self.feature_index["node_type"]]
+        pts = node_features[:, self.feature_index["pt"]]
+        etas = node_features[:, self.feature_index["eta"]]
+        energies = node_features[:, self.feature_index["energy"]]
+        # Noise calculation for jets
+        jet_mask = (node_types == self.node_mapping["jet"])
+        jet_pts = pts[jet_mask]
+        jet_etas = etas[jet_mask]
+        if (jet_mask.sum() > 0):
+            jet_resolutions = np.where(
+                jet_pts <= 0.1, 0.0,
+                np.where(
+                    np.abs(jet_etas) <= 0.5, np.sqrt(0.06**2 + jet_pts**2 * 1.3e-3**2),
+                    np.where(
+                        np.abs(jet_etas) <= 1.5, np.sqrt(0.10**2 + jet_pts**2 * 1.7e-3**2),
+                        np.where(
+                            np.abs(jet_etas) <= 2.5, np.sqrt(0.25**2 + jet_pts**2 * 3.1e-3**2),
+                            0.0
+                        )
+                    )
+                )
+            )
+            noise[jet_mask, self.feature_index["pt"]] = np.random.normal(loc=0.0, scale=jet_resolutions)
+        # Noise calculation for electrons
+        ele_mask = (node_types == self.node_mapping["ele"])
+        ele_pts = pts[ele_mask]
+        ele_etas = etas[ele_mask]
+        if (ele_mask.sum() > 0):
+            ele_resolutions = np.where(
+                np.abs(ele_etas) <= 0.5, np.sqrt(0.03**2 + ele_pts**2 * 1.3e-3**2),
+                np.where(
+                    np.abs(ele_etas) <= 1.5, np.sqrt(0.05**2 + ele_pts**2 * 1.7e-3**2),
+                    np.where(
+                        np.abs(ele_etas) <= 2.5, np.sqrt(0.15**2 + ele_pts**2 * 3.1e-3**2),
+                        0.0
+                    )
+                )
+            )
+            noise[ele_mask, self.feature_index["pt"]] = np.random.normal(loc=0.0, scale=ele_resolutions)
+        # Noise calculation for muons
+        mu_mask = (node_types == self.node_mapping["mu"])
+        mu_pts = pts[mu_mask]
+        mu_etas = etas[mu_mask]
+        if (mu_mask.sum() > 0):
+            mu_resolutions = np.where(
+                np.abs(mu_etas) <= 0.5, np.sqrt(0.01**2 + mu_pts**2 * 1.0e-4**2),
+                np.where(
+                    np.abs(mu_etas) <= 1.5, np.sqrt(0.015**2 + mu_pts**2 * 1.5e-4**2),
+                    np.where(
+                        np.abs(mu_etas) <= 2.5, np.sqrt(0.025**2 + mu_pts**2 * 3.5e-4**2),
+                        0.0
+                    )
+                )
+            )
+            noise[mu_mask, self.feature_index["pt"]] = np.random.normal(loc=0.0, scale=mu_resolutions)
+        # Noise calculation for photons
+        ph_mask = (node_types == self.node_mapping["ph"])
+        ph_etas = etas[ph_mask]
+        ph_energies = energies[ph_mask]
+        if (ph_mask.sum() > 0):
+            ph_resolutions = np.where(
+                np.abs(ph_etas) <= 3.2, np.sqrt(ph_energies**2 * 0.0017**2 + ph_energies * 0.101**2),
+                np.where(
+                    np.abs(ph_etas) <= 4.9, np.sqrt(ph_energies**2 * 0.0350**2 + ph_energies * 0.285**2),
+                    0.0
+                )
+            )
+            noise[ph_mask, self.feature_index["energy"]] = np.random.normal(loc=0.0, scale=ph_resolutions)
+        return noise
+    def make_graph(self, ch):
+        g = super().make_graph(ch)
+        g.ndata['augmented_features'] = g.ndata['features']
+        num_nodes = len(g.ndata['features'][:, 0])
+        # Rotations: phi -> phi + delta_phi
+        phi_index = self.feature_index["phi"]
+        # Generate a single delta_phi for all nodes
+        delta_phi = np.random.uniform(low=-np.pi, high=np.pi)
+        # Apply the same delta_phi to all nodes
+        g.ndata['augmented_features'][:, phi_index] = (g.ndata['augmented_features'][:, phi_index] + delta_phi + np.pi) % (2 * np.pi) - np.pi
+        # Reflections: eta -> -1 * eta, phi -> -1 * phi
+        eta_index = self.feature_index["eta"]
+        eta_reflection = np.random.choice([-1, 1])
+        phi_reflection = np.random.choice([-1, 1])
+        g.ndata['augmented_features'][:, eta_index] = g.ndata['augmented_features'][:, eta_index] * eta_reflection
+        g.ndata['augmented_features'][:, phi_index] = g.ndata['augmented_features'][:, phi_index] * phi_reflection
+        # Detector Noise: pt -> pt + normal(pt, noise(pt))
+        noise = self.detector_noise(g.ndata['augmented_features'])
+        g.ndata['augmented_features'] = g.ndata['augmented_features'] + noise
+        pt_index = self.feature_index["pt"]
+        if (g.ndata['augmented_features'][-1][self.feature_index["node_type"]] == self.node_mapping["MET"]):
+            # Initialize sums of px and py
+            sum_px = 0
+            sum_py = 0
+            # Loop over all nodes except the last one (MET node)
+            for i in range(len(g.ndata['augmented_features']) - 1):
+                pt = g.ndata['augmented_features'][i][pt_index]
+                phi = g.ndata['augmented_features'][i][phi_index]
+                # Compute px and py
+                px = pt * np.cos(phi)
+                py = pt * np.sin(phi)
+                # Sum px and py
+                sum_px += px
+                sum_py += py
+            # Calculate MET
+            g.ndata['augmented_features'][-1][pt_index] = np.sqrt(sum_px**2 + sum_py**2)
+        u, v = g.edges()
+        deta = g.ndata['features'][u, 1] - g.ndata['features'][v, 1]
+        dphi = g.ndata['features'][u, 2] - g.ndata['features'][v, 2]
+        dphi = torch.where(dphi > np.pi, dphi - 2*np.pi, dphi)
+        dphi = torch.where(dphi < -np.pi, dphi + 2*np.pi, dphi)
+        dR   = torch.sqrt(deta**2 + dphi**2)
+        g.edata['features'] = torch.stack([deta, dphi, dR], dim=1)
+        deta = g.ndata['augmented_features'][u, 1] - g.ndata['augmented_features'][v, 1]
+        dphi = g.ndata['augmented_features'][u, 2] - g.ndata['augmented_features'][v, 2]
+        dphi = torch.where(dphi > np.pi, dphi - 2*np.pi, dphi)
+        dphi = torch.where(dphi < -np.pi, dphi + 2*np.pi, dphi)
+        dR   = torch.sqrt(deta**2 + dphi**2)
+        g.edata['augmented_features'] = torch.stack([deta, dphi, dR], dim=1)
+        return g

root_gnn_base/photon_ID_dataset.py ADDED Viewed

	@@ -0,0 +1,44 @@

+from root_gnn_base import dataset
+import dgl
+import torch
+import numpy as np
+def radius_graph(features, radii, self_loops=False):
+    senders = []
+    receivers = []
+    n_nodes = features.shape[0]
+    senders = np.arange(n_nodes*n_nodes) // n_nodes
+    receivers = np.arange(n_nodes*n_nodes) % n_nodes
+    if not self_loops and n_nodes > 1:
+        mask = senders != receivers
+        senders = senders[mask]
+        receivers = receivers[mask]
+    for k, r in radii.items():
+        d = features[senders, k] - features[receivers, k]
+        mask = np.abs(d) < r
+        senders = senders[mask]
+        receivers = receivers[mask]
+    return dgl.graph((senders, receivers))
+class PhotonIDDataset(dataset.LazyMultiLabelDataset):
+    def __init__(self, eta_radius, phi_radius, **kwargs):
+        self.eta_radius = eta_radius
+        self.phi_radius = phi_radius
+        super().__init__(**kwargs)
+    def make_graph(self, ch):
+        features, _ = dataset.node_features_from_tree(ch, self.node_branch_names, self.node_branch_types, self.node_feature_scales)
+        features = features[features[:,0] != 0]
+        #Delta Eta, Delta Phi, Adjacent Layer
+        g = radius_graph(features, {1: self.eta_radius, 2: self.phi_radius, 6: 1.1}, self_loops=True) #Self loops ensure last cell is included even if disconnected
+        g.ndata['features'] = features
+        u, v = g.edges()
+        deta = features[u, 1] - features[v, 1]
+        dphi = g.ndata['features'][u, 2] - g.ndata['features'][v, 2]
+        dphi = torch.where(dphi > np.pi, dphi - 2*np.pi, dphi)
+        dphi = torch.where(dphi < -np.pi, dphi + 2*np.pi, dphi)
+        dR   = torch.sqrt(deta**2 + dphi**2)
+        dx   = features[u, 3] - features[v, 3]
+        dy   = features[u, 4] - features[v, 4]
+        dz   = features[u, 5] - features[v, 5]
+        g.edata['features'] = torch.stack([deta, dphi, dR, dx, dy, dz], dim=1)
+        return g

root_gnn_base/similarity.py ADDED Viewed

	@@ -0,0 +1,158 @@

+import numpy as np
+import scipy
+from sklearn.decomposition import PCA
+from sklearn.metrics.pairwise import cosine_similarity
+from sklearn.metrics.pairwise import euclidean_distances
+from sklearn.preprocessing import StandardScaler
+from scipy.stats import wasserstein_distance
+def cka(rep_a, rep_b, size=None):
+    """
+    Computes the Centered Kernel Alignment (CKA) between two large representation matrices rep_a and rep_b.
+    If size is provided, it performs CKA on a randomly selected subset of the data.
+    Parameters:
+    rep_a : np.ndarray
+        First representation matrix of size (n_samples, n_features_a).
+    rep_b : np.ndarray
+        Second representation matrix of size (n_samples, n_features_b).
+    size : int, optional
+        Number of samples to use for the CKA calculation. If None, use the full dataset.
+    Returns:
+    float
+        CKA similarity between rep_a and rep_b.
+    """
+    def gram_linear(x):
+        """Compute the Gram (kernel) matrix using a linear kernel."""
+        return x @ x.T
+    def center_gram(gram):
+        """Center the Gram matrix."""
+        n = gram.shape[0]
+        identity = np.eye(n)
+        ones = np.ones((n, n)) / n
+        return gram - ones @ gram - gram @ ones + ones @ gram @ ones
+    # If sample_size is specified, randomly sample a subset of the data
+    if size is not None and size < rep_a.shape[0]:
+        indices = np.random.choice(rep_a.shape[0], size, replace=False)
+        rep_a = rep_a[indices]
+        rep_b = rep_b[indices]
+    # Compute the Gram matrices
+    gram_a = gram_linear(rep_a)
+    gram_b = gram_linear(rep_b)
+    # Center the Gram matrices
+    centered_gram_a = center_gram(gram_a)
+    centered_gram_b = center_gram(gram_b)
+    # Compute the CKA similarity
+    numerator = np.sum(centered_gram_a * centered_gram_b)
+    denominator = np.sqrt(np.sum(centered_gram_a**2) * np.sum(centered_gram_b**2))
+    return numerator / denominator if denominator != 0 else 0
+def cca(X, Y, size = None, num_components=10):
+    """
+    Perform Canonical Correlation Analysis (CCA) between two datasets.
+    Parameters:
+    X : np.ndarray
+        First dataset, shape (n_samples, n_features_X).
+    Y : np.ndarray
+        Second dataset, shape (n_samples, n_features_Y).
+    num_components : int
+        Number of CCA components to return.
+    Returns:
+    w_X : np.ndarray
+        Canonical weights for the first dataset, shape (n_features_X, num_components).
+    w_Y : np.ndarray
+        Canonical weights for the second dataset, shape (n_features_Y, num_components).
+    corrs : np.ndarray
+        Array of canonical correlations for each component.
+    """
+    # If sample size is specified, randomly sample a subset of the data
+    if size is not None and size < X.shape[0]:
+        indices = np.random.choice(X.shape[0], size, replace=False)
+        X = X[indices]
+        Y = Y[indices]
+    # Standardize both datasets (mean = 0, variance = 1)
+    scaler_X = StandardScaler()
+    scaler_Y = StandardScaler()
+    X = scaler_X.fit_transform(X)
+    Y = scaler_Y.fit_transform(Y)
+    # Covariance matrices
+    C_XX = np.cov(X, rowvar=False)  # Covariance of X
+    C_YY = np.cov(Y, rowvar=False)  # Covariance of Y
+    C_XY = np.cov(X, Y, rowvar=False)[:X.shape[1], X.shape[1]:]  # Cross-covariance of X and Y
+    # Regularization term to avoid singular matrices
+    reg = 1e-6
+    inv_C_XX = np.linalg.inv(C_XX + reg * np.eye(C_XX.shape[0]))
+    inv_C_YY = np.linalg.inv(C_YY + reg * np.eye(C_YY.shape[0]))
+    # Solve the generalized eigenvalue problem for CCA
+    # (inv_C_XX @ C_XY @ inv_C_YY @ C_XY.T) and vice versa for Y
+    A = inv_C_XX @ C_XY @ inv_C_YY @ C_XY.T
+    B = inv_C_YY @ C_XY.T @ inv_C_XX @ C_XY
+    # Perform eigenvalue decomposition
+    eigvals_X, eigvecs_X = np.linalg.eigh(A)
+    eigvals_Y, eigvecs_Y = np.linalg.eigh(B)
+    # Sort the eigenvalues and eigenvectors in descending order
+    idx_X = np.argsort(eigvals_X)[::-1]
+    idx_Y = np.argsort(eigvals_Y)[::-1]
+    eigvecs_X = eigvecs_X[:, idx_X]
+    eigvecs_Y = eigvecs_Y[:, idx_Y]
+    # Canonical weights (the first `num_components` components)
+    w_X = eigvecs_X[:, :num_components]
+    w_Y = eigvecs_Y[:, :num_components]
+    # Canonical correlations (square root of the eigenvalues, constrained to [0,1])
+    corrs = np.sqrt(np.clip(eigvals_X[:num_components], 0, 1))
+    return np.mean(corrs)
+    return w_X, w_Y, corrs
+def pca(X, Y, size=1000, n_components=3, bins=30):
+    pca_X = PCA(n_components=n_components)
+    X_pca = pca_X.fit_transform(X)
+    pca_Y = PCA(n_components=n_components)
+    Y_pca = pca_Y.fit_transform(Y)
+    # Step 2: Determine common bin edges based on the range of PCA components
+    min_value = min(X_pca.min(), Y_pca.min())
+    max_value = max(X_pca.max(), Y_pca.max())
+    bin_edges = np.linspace(min_value, max_value, bins + 1)
+    # Step 3: Calculate histograms for each PCA component using the same bins
+    histograms_X = [np.histogram(X_pca[:, i], bins=bin_edges, density=True)[0] for i in range(n_components)]
+    histograms_Y = [np.histogram(Y_pca[:, i], bins=bin_edges, density=True)[0] for i in range(n_components)]
+    # Step 4: Calculate Wasserstein distance between corresponding histograms
+    total_distance = 0
+    for i in range(n_components):
+        total_distance += wasserstein_distance(histograms_X[i], histograms_Y[i])
+    # Step 5: Normalize the total distance for a similarity score
+    # Calculate the maximum possible distance (theoretical max could be based on histogram size)
+    # This could be replaced with a more complex calculation if necessary.
+    max_distance = 1.0  # Replace this with a suitable maximum based on your dataset properties.
+    similarity_score = 1 - (total_distance / max_distance)
+    return max(0, min(1, similarity_score))  # Ensure the score stays in [0, 1]

root_gnn_base/uproot_dataset.py ADDED Viewed

	@@ -0,0 +1,54 @@

+from root_gnn_base import dataset
+import torch
+import uproot
+import glob
+import os
+import awkward as ak
+import numpy as np
+import time
+def node_features_from_ak(ch, node_branch_names, node_branch_types, node_feature_scales):
+    node_types = []
+    n_types = len(node_branch_names[0])
+    for i in range(n_types):
+        features = []
+        branch_type = node_branch_types[i]
+        for j in range(len(node_branch_names)):
+            if node_branch_names[j] == 'CALC_E':
+                features.append(features[0] * np.cosh(features[1]))
+            elif node_branch_names[j] == 'NODE_TYPE':
+                features.append(ak.full_like(features[0], i))
+            elif isinstance(node_branch_names[j][i], str):
+                features.append(ch[node_branch_names[j][i]])
+            elif isinstance(node_branch_names[j][i], (int, float)):
+                features.append(ak.full_like(features[0], node_branch_names[j][i]))
+        if branch_type == 'single':
+            features = [f[:,np.newaxis] for f in features]
+        node_types.append(ak.Array(features))
+    node_features = ak.concatenate(node_types, axis=2) * node_feature_scales #axis order at this point is (feature, event, node)
+    return node_features
+class UprootDataset(dataset.RootDataset):
+    def process(self):
+        starttime = time.time()
+        self.files = glob.glob(os.path.join(self.raw_dir, self.file_names))
+        branches = self.get_list_of_branches()
+        self.chain = uproot.concatenate([f + ':' + self.tree_name for f in self.files], branches, num_workers=4)
+        node_features = node_features_from_ak(self.chain, self.node_branch_names, self.node_branch_types, self.node_feature_scales)
+        loadtime = time.time()
+        n_nodes = ak.num(node_features[0], axis=1) #number of nodes for each event
+        ftime = time.time()
+        self.graphs = [dataset.full_connected_graph(n, False) for n in n_nodes]
+        itime = time.time()
+        for i in range(len(self.graphs)):
+            if i % 10000 == 0:
+                print(f'Processing event {i}/{len(self.graphs)}')
+            self.graphs[i].ndata['features'] = torch.transpose(torch.tensor(node_features[:,i,:]),0,1).to(torch.float)
+        self.label = torch.stack([torch.full((len(self.graphs),),torch.tensor(self.label)), torch.tensor(ak.values_astype(self.chain[self.fold_var], np.int64))], dim=1)
+        gtime = time.time()
+        print()
+        print(f'load time: {loadtime - starttime} s')
+        print(f'feature time: {ftime - loadtime} s')
+        print(f'graph time: {itime - ftime} s')
+        print(f'graph data time: {gtime - itime} s')

root_gnn_base/utils.py ADDED Viewed

	@@ -0,0 +1,307 @@

+import importlib
+import yaml
+import os
+import torch
+import numpy as np
+import matplotlib.pyplot as plt
+import dgl
+import signal
+def buildFromConfig(conf, run_time_args = {}):
+    if 'module' in conf:
+        module = importlib.import_module(conf['module'])
+        cls = getattr(module, conf['class'])
+        return cls(**conf['args'], **run_time_args)
+    else:
+        print('No module specified in config. Returning None.')
+def cycler(iterable):
+    while True:
+        #print('Cycler is cycling...')
+        for i in iterable:
+            yield i
+def include_config(conf):
+    if 'include' in conf:
+        for i in conf['include']:
+            with open(i) as f:
+                conf.update(yaml.load(f, Loader=yaml.FullLoader))
+        del conf['include']
+def load_config(config_file):
+    with open(config_file) as f:
+        conf = yaml.load(f, Loader=yaml.FullLoader)
+    include_config(conf)
+    return conf
+#Timeout function from https://stackoverflow.com/questions/492519/timeout-on-a-function-call
+class TimeoutException(Exception):
+    pass
+def timeout_handler(signum, frame):
+    raise TimeoutException()
+def set_timeout(timeout):
+    signal.signal(signal.SIGALRM, timeout_handler)
+    signal.alarm(timeout)
+def unset_timeout():
+    signal.alarm(0)
+    signal.signal(signal.SIGALRM, signal.SIG_DFL)
+def make_padding_graph(batch, pad_nodes, pad_edges):
+    senders = []
+    receivers = []
+    senders = torch.arange(0,pad_edges) // pad_nodes
+    receivers = torch.arange(1,pad_edges+1) % pad_nodes
+    if pad_nodes < 0 or pad_edges < 0 or pad_edges > pad_nodes * pad_nodes / 2:
+        print('Batch is larger than padding size or e > n^2/2. Repeating edges as necessary.')
+        print(f'Batch nodes: {batch.num_nodes()}, Batch edges: {batch.num_edges()}, Padding nodes: {pad_nodes}, Padding edges: {pad_edges}')
+        senders = senders % pad_nodes
+    padg = dgl.graph((senders[:pad_edges], receivers[:pad_edges]), num_nodes = pad_nodes)
+    for k in batch.ndata.keys():
+        padg.ndata[k] = torch.zeros( (pad_nodes, batch.ndata[k].shape[1]) )
+    for k in batch.edata.keys():
+        padg.edata[k] = torch.zeros( (pad_edges, batch.edata[k].shape[1]) )
+    return dgl.batch([batch, padg.to(batch.device)])
+def pad_size(graphs, edges, nodes, edge_per_graph=3, node_per_graph=14):
+    pad_nodes = ((nodes // (node_per_graph * graphs))+1) * graphs * node_per_graph
+    pad_edges = ((edges // (edge_per_graph * graphs))+1) * graphs * edge_per_graph
+    return pad_nodes, pad_edges
+def pad_batch_to_step_per_graph(batch, edge_per_graph=3, node_per_graph=14):
+    n_graphs = batch.batch_num_nodes().shape[0]
+    pad_nodes = (batch.num_nodes() + node_per_graph * n_graphs) % int(n_graphs * node_per_graph)
+    pad_edges = (batch.num_edges() + edge_per_graph * n_graphs) % int(n_graphs * edge_per_graph)
+    return make_padding_graph(batch, pad_nodes, pad_edges)
+def pad_batch(batch, edges = 104000, nodes = 16000):
+    if edges == 0 and nodes == 0:
+        return batch
+    pad_nodes = 0
+    pad_edges = 0
+    pad_nodes = nodes - batch.num_nodes()
+    pad_edges = edges - batch.num_edges()
+    return make_padding_graph(batch, pad_nodes, pad_edges)
+def pad_batch_num_nodes(batch, max_num_nodes, hid_size = 64):
+    print(f"Padding each graph to have {max_num_nodes} nodes")
+    unbatched = dgl.unbatch(batch)
+    for g in unbatched:
+        num_nodes_to_add = max_num_nodes - g.number_of_nodes()
+        if num_nodes_to_add > 0:
+            g.add_nodes(num_nodes_to_add)  # Add isolated nodes
+    batch = dgl.batch(unbatched)
+    padding_mask = torch.zeros((batch.ndata['features'].shape[0]), dtype=torch.bool)
+    global_update_weights = torch.ones((batch.ndata['features'].shape[0], hid_size))
+    for i in range(len(batch.ndata['features'])):
+        if (torch.count_nonzero(batch.ndata['features'][i]) == 0):
+            padding_mask[i] = True
+            global_update_weights[i] = 0
+    batch.ndata['w'] = global_update_weights
+    batch.ndata['padding_mask'] = padding_mask
+    return batch
+def fold_selection(fold_config, sample):
+    n_folds = fold_config['n_folds']
+    folds_opt = fold_config[sample]
+    folds = []
+    if type(folds_opt) == int:
+        return lambda x : x.tracking[:,0] % n_folds == folds_opt
+    elif type(folds_opt) == list:
+        print("fold type is list")
+        print(f"fold_config = {fold_config}")
+        print(f"folds_opt = {folds_opt}")
+        return lambda x : sum([x.tracking[:,0] % n_folds == f for f in folds_opt]) == 1
+    else:
+        raise ValueError("Invalid fold selection option with type {}".format(type(folds_opt)))
+def fold_selection_name(fold_config, sample):
+    n_folds = fold_config['n_folds']
+    folds_opt = fold_config[sample]
+    if type(folds_opt) == int:
+        return f'n_{n_folds}_f_{folds_opt}'
+    elif type(folds_opt) == list:
+        return f'n_{n_folds}_f_{"_".join([str(f) for f in folds_opt])}'
+    else:
+        raise ValueError("Invalid fold selection option with type {}".format(type(folds_opt)))
+#Return the index and checkpoint of the last epoch.
+def get_last_epoch(config, max_ep = -1, device = None):
+    last_epoch = -1
+    checkpoint = None
+    if max_ep < 0:
+        max_ep = config['Training']['epochs']
+    for ep in range(max_ep):
+        if os.path.exists(os.path.join(config['Training_Directory'], f'model_epoch_{ep}.pt')):
+            last_epoch = ep
+        else:
+            print(f'Epoch {ep} not found. Stopping at epoch {last_epoch}')
+            print('File not found: ', os.path.join(config['Training_Directory'], f'model_epoch_{ep}.pt'))
+            break
+    if last_epoch >= 0:
+        checkpoint = torch.load(os.path.join(config['Training_Directory'], f'model_epoch_{last_epoch}.pt'), map_location=device)
+    return last_epoch, checkpoint
+#Return the index and checkpoint of the last epoch.
+def get_specific_epoch(config, target_epoch, device = None, from_ryan = False):
+    last_epoch = -1
+    checkpoint = None
+    for ep in range(target_epoch + 1):
+        if (from_ryan):
+            if os.path.exists(os.path.join('/global/cfs/cdirs/atlas/berobert/root_gnn_dgl/' + config['Training_Directory'], f'model_epoch_{ep}.pt')):
+                last_epoch = ep
+            else:
+                print(f'Epoch {ep} not found. Stopping at epoch {last_epoch}')
+                print('File not found: ', os.path.join('/global/cfs/cdirs/atlas/berobert/root_gnn_dgl/' + config['Training_Directory'], f'model_epoch_{ep}.pt'))
+                break
+        else:
+            if os.path.exists(os.path.join(config['Training_Directory'], f'model_epoch_{ep}.pt')):
+                last_epoch = ep
+            else:
+                print(f'Epoch {ep} not found. Stopping at epoch {last_epoch}')
+                print('File not found: ', os.path.join(config['Training_Directory'], f'model_epoch_{ep}.pt'))
+                break
+    if last_epoch >= 0:
+        if (from_ryan):
+            checkpoint = torch.load('/global/cfs/cdirs/atlas/berobert/root_gnn_dgl/' + os.path.join(config['Training_Directory'], f'model_epoch_{last_epoch}.pt'), map_location=device)
+        else:
+            checkpoint = torch.load(os.path.join(config['Training_Directory'], f'model_epoch_{last_epoch}.pt'), map_location=device)
+    return last_epoch, checkpoint
+#Convert training logs into dict for plotting.
+def read_log(config):
+    lines = []
+    with open(config['Training_Directory'] + '/training.log', 'r') as f:
+        lines = f.readlines()
+    lines = [ l for l in lines if 'Epoch' in l ]
+    nlines = len(lines)
+    labels = []
+    for field in lines[0].split('|'):
+        labels.append(field.split()[0])
+    log = {label : np.zeros(nlines) for label in labels}
+    for i, line in enumerate(lines):
+        for field in line.split('|'):
+            spl = field.split()
+            log[spl[0]][i] = float(spl[1])
+    return log
+#Plot training logs.
+def plot_log(log, output_file):
+    fig, ax = plt.subplots(2, 2, figsize=(10,10))
+    #Time
+    ax[0][0].plot(log['Epoch'], np.cumsum(log['Time']), label='Time')
+    ax[0][0].set_xlabel('Epoch')
+    ax[0][0].set_ylabel('Time (s)')
+    ax[0][0].legend()
+    """
+    ax[0][0].plot(log['Epoch'], log['LR'], label='Learning Rate')
+    ax[0][0].set_xlabel('Epoch')
+    ax[0][0].set_ylabel('Learning Rate')
+    ax[0][0].set_yscale('log')
+    ax[0][0].legend()
+    """
+    #Loss
+    ax[0][1].plot(log['Epoch'], log['Loss'], label='Train Loss')
+    ax[0][1].plot(log['Epoch'], log['Test_Loss'], label='Test Loss')
+    ax[0][1].set_xlabel('Epoch')
+    ax[0][1].set_ylabel('Loss')
+    ax[0][1].legend()
+    #Accuracy
+    ax[1][0].plot(log['Epoch'], log['Accuracy'], label='Test Accuracy')
+    ax[1][0].set_xlabel('Epoch')
+    ax[1][0].set_ylabel('Accuracy')
+    ax[1][0].set_ylim((0.44, 0.56))
+    ax[1][0].legend()
+    #AUC
+    ax[1][1].plot(log['Epoch'], log['Test_AUC'], label='Test AUC')
+    ax[1][1].set_xlabel('Epoch')
+    ax[1][1].set_ylabel('AUC')
+    ax[1][1].legend()
+    fig.savefig(output_file)
+class EarlyStop():
+    def __init__(self, patience=15, threshold=1e-8, mode='min'):
+        self.patience = patience
+        self.threshold = threshold
+        self.mode = mode
+        self.count = 0
+        self.current_best = np.inf if mode == 'min' else -np.inf
+        self.should_stop = False
+    def update(self, value):
+        if self.mode == 'min':  # Minimizing loss
+            if value < self.current_best - self.threshold:
+                self.current_best = value
+                self.count = 0
+            else:
+                self.count += 1
+        elif self.mode == 'max':  # Maximizing metric
+            if value > self.current_best + self.threshold:
+                self.current_best = value
+                self.count = 0
+            else:
+                self.count += 1
+        # Check if patience is exceeded
+        if self.count >= self.patience:
+            self.should_stop = True
+    def reset(self):
+        self.count = 0
+        self.current_best = np.inf if self.mode == 'min' else -np.inf
+        self.should_stop = False
+    def to_str(self):
+        status = (
+            f"EarlyStop Status:\n"
+            f"  Mode: {'Minimize' if self.mode == 'min' else 'Maximize'}\n"
+            f"  Patience: {self.patience}\n"
+            f"  Threshold: {self.threshold:.3e}\n"
+            f"  Current Best: {self.current_best:.6f}\n"
+            f"  Consecutive Epochs Without Improvement: {self.count}\n"
+            f"  Stopping Triggered: {'Yes' if self.should_stop else 'No'}"
+        )
+        return status
+    def to_dict(self):
+        return {
+            'patience': self.patience,
+            'threshold': self.threshold,
+            'mode': self.mode,
+            'count': self.count,
+            'current_best': self.current_best,
+            'should_stop': self.should_stop,
+        }
+    @classmethod
+    def load_from_dict(cls, state_dict):
+        instance = cls(
+            patience=state_dict['patience'],
+            threshold=state_dict['threshold'],
+            mode=state_dict['mode']
+        )
+        instance.count = state_dict['count']
+        instance.current_best = state_dict['current_best']
+        instance.should_stop = state_dict['should_stop']
+        return instance
+def graph_augmentation(graph):
+    print("Augmenting Graph")
+    return

scripts/find_free_port.py ADDED Viewed

	@@ -0,0 +1,12 @@

+# find_free_port.py
+def find_free_port():
+    import socket
+    from contextlib import closing
+    with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
+        s.bind(('', 0))
+        s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+        return str(s.getsockname()[1])
+if __name__ == "__main__":
+    print(find_free_port())

scripts/inference.py ADDED Viewed

	@@ -0,0 +1,289 @@

+import sys
+import os
+file_path = os.getcwd()
+sys.path.append(file_path)
+import argparse
+import yaml
+import torch
+import dgl
+from dgl.data import DGLDataset
+from dgl.dataloading import GraphDataLoader
+from torch.utils.data import SubsetRandomSampler, SequentialSampler
+def my_error_handler(level, abort, location, msg):
+    # Log the error message to a file instead of printing
+    with open("error_log.txt", "a") as log_file:
+        log_file.write(f"Error in {location}: {msg}\n")
+    # Optionally, print the error message to the console
+    # print(f"Error in {location}: {msg}")
+    # Decide whether to abort based on the error level
+    if abort:
+        raise RuntimeError(f"Fatal error in {location}: {msg}")
+class CustomPreBatchedDataset(DGLDataset):
+    def __init__(self, start_dataset, batch_size, mask_fn=None, drop_last=False, shuffle=False, **kwargs):
+        self.start_dataset = start_dataset
+        self.batch_size = batch_size
+        self.mask_fn = mask_fn or (lambda x: torch.ones(len(x), dtype=torch.bool))
+        self.drop_last = drop_last
+        self.shuffle = shuffle
+        super().__init__(name=start_dataset.name + '_custom_prebatched', save_dir=start_dataset.save_dir)
+    def process(self):
+        mask = self.mask_fn(self.start_dataset)
+        indices = torch.arange(len(self.start_dataset))[mask]
+        print(f"Number of elements after masking: {len(indices)}")  # Debugging print
+        if self.shuffle:
+            sampler = SubsetRandomSampler(indices)
+        else:
+            sampler = SequentialSampler(indices)
+        self.dataloader = GraphDataLoader(
+            self.start_dataset,
+            sampler=sampler,
+            batch_size=self.batch_size,
+            drop_last=self.drop_last
+        )
+        print(f"Batch size set in DataLoader: {self.batch_size}")  # Debugging print
+    def __getitem__(self, idx):
+        if isinstance(idx, int):
+            idx = [idx]
+        sampler = SequentialSampler(idx)
+        dloader = GraphDataLoader(self.start_dataset, sampler=sampler, batch_size=self.batch_size, drop_last=False)
+        return next(iter(dloader))
+    def __len__(self):
+        return len(self.start_dataset)
+def include_config(conf):
+    if 'include' in conf:
+        for i in conf['include']:
+            with open(i) as f:
+                conf.update(yaml.load(f, Loader=yaml.FullLoader))
+        del conf['include']
+def load_config(config_file):
+    with open(config_file) as f:
+        conf = yaml.load(f, Loader=yaml.FullLoader)
+    include_config(conf)
+    return conf
+def main():
+    parser = argparse.ArgumentParser()
+    add_arg = parser.add_argument
+    add_arg('--config', type=str, required=True)
+    add_arg('--target', type=str, required=True)
+    add_arg('--destination', type=str, default='')
+    add_arg('--chunkno', type=int, default=0)
+    add_arg('--chunks', type=int, default=1)
+    add_arg('--write', action='store_true')
+    add_arg('--ckpt', type=int, default=-1)
+    add_arg('--clobber', action='store_true')
+    add_arg('--tree', type=str, default='')
+    add_arg('--branch_name', type=str, default='score')
+    args = parser.parse_args()
+    config = load_config(args.config)
+    if args.destination == '':
+        args.destination = os.path.join(config['Training_Directory'], 'inference/', os.path.split(args.target)[1])
+    else:
+        args.destination = args.destination
+    if not args.write:
+        args.destination = args.destination.replace('.root', '') + f'_chunk{args.chunkno}.npz'
+    if os.path.exists(args.destination):
+        print(f'File {args.destination} already exists.')
+        if args.clobber:
+            print('Clobbering.')
+        else:
+            print('Exiting.')
+            return
+    else:
+        print(f'Writing to {args.destination}')
+    import time
+    start  = time.time()
+    import ROOT
+    import torch
+    from array import array
+    import numpy as np
+    from root_gnn_base import batched_dataset as dataset
+    from root_gnn_base import utils
+    end = time.time()
+    print('Imports finished in {:.2f} seconds'.format(end - start))
+    start = time.time()
+    dset_config = config['Datasets'][list(config['Datasets'].keys())[0]]
+    if dset_config['class'] == 'LazyDataset':
+        dset_config['class'] = 'EdgeDataset'
+    elif dset_config['class'] == 'LazyMultiLabelDataset':
+        dset_config['class'] = 'MultiLabelDataset'
+    elif dset_config['class'] == 'PhotonIDDataset':
+        dset_config['class'] = 'UnlazyPhotonIDDataset'
+    elif dset_config['class'] == 'kNNDataset':
+        dset_config['class'] = 'UnlazyKNNDataset'
+    dset_config['args']['raw_dir'] = os.path.split(args.target)[0]
+    dset_config['args']['file_names'] = os.path.split(args.target)[1]
+    dset_config['args']['save'] = False
+    dset_config['args']['chunks'] = args.chunks
+    dset_config['args']['process_chunks'] = [args.chunkno,]
+    dset_config['args']['selections'] = []
+    dset_config['args']['save_dir'] = os.path.dirname(args.destination)
+    if args.tree != '':
+        dset_config['args']['tree_name'] = args.tree
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    dstart = time.time()
+    dset = utils.buildFromConfig(dset_config)
+    dend = time.time()
+    print('Dataset finished in {:.2f} seconds'.format(dend - dstart))
+    print(dset)
+    batch_size = config['Training']['batch_size']
+    lstart = time.time()
+    loader = CustomPreBatchedDataset(dset, batch_size)
+    loader.process()
+    # loader = dataset.PreBatchedDataset(dset, batch_size, shuffle=False, drop_last=False, save_to_disk=False, chunks = 1, num_workers=0)
+    lend = time.time()
+    print('Loader finished in {:.2f} seconds'.format(lend - lstart))
+    sample_graph, _, _, global_sample = loader[0]
+    print('dset length =', len(dset))
+    print('loader length =', len(loader))
+    model = utils.buildFromConfig(config['Model'], {'sample_graph' : sample_graph, 'sample_global': global_sample}).to(device)
+    if args.ckpt < 0:
+        ep, checkpoint = utils.get_last_epoch(config, args.ckpt, device=device)
+    else:
+        ep, checkpoint = utils.get_specific_epoch(config, args.ckpt, device=device)
+    #Bad filler for models which were compiled. Have to remove this prefix.
+    mds_copy = {}
+    for key in checkpoint['model_state_dict'].keys():
+        newkey = key.replace('module.', '')
+        newkey = newkey.replace('_orig_mod.', '')
+        mds_copy[newkey] = checkpoint['model_state_dict'][key]
+    model.load_state_dict(mds_copy)
+    model.eval()
+    end = time.time()
+    print('Model and dataset finished in {:.2f} seconds'.format(end - start))
+    print('Starting inference')
+    start = time.time()
+    finish_fn = torch.nn.Sigmoid()
+    if 'Loss' in config:
+        finish_fn = utils.buildFromConfig(config['Loss']['finish'])
+    scores = []
+    labels = []
+    tracking_info = []
+    ibatch = 0
+    for batch, label, track, globals in loader.dataloader:
+        batch = batch.to(device)
+        pred = model(batch, globals.to(device))
+        ibatch += 1
+        # scores.append(finish_fn(pred).detach().cpu().numpy())
+        if (finish_fn.__class__.__name__ == "ContrastiveClusterFinish"):
+            scores.append(pred.detach().cpu().numpy())
+        else:
+            scores.append(finish_fn(pred).detach().cpu().numpy())
+        labels.append(label.detach().cpu().numpy())
+        tracking_info.append(track.detach().cpu().numpy())
+    # for batch, label, track, globals in loader:
+    #     batch = batch.to(device)
+    #     pred = model(batch, globals.to(device))
+    #     print(f'Batch size: {batch.batch_size if hasattr(batch, "batch_size") else "Unavailable"}')
+    #     print(f'Prediction shape: {pred.shape}')
+    #     ibatch += 1
+    #     scores.append(finish_fn(pred).detach().cpu().numpy())
+    #     labels.append(label.detach().cpu().numpy())
+    #     tracking_info.append(track.detach().cpu().numpy())
+    #     exit()
+    score_size = scores[0].shape[1]
+    scores = np.concatenate(scores)
+    labels = np.concatenate(labels)
+    tracking_info = np.concatenate(tracking_info)
+    end = time.time()
+    print('Inference finished in {:.2f} seconds'.format(end - start))
+    if args.write:
+        # ROOT.SetErrorHandler(my_error_handler)
+        ROOT.gErrorIgnoreLevel = ROOT.kFatal
+        # ROOT.gSystem.RedirectOutput("/dev/null", "w")
+        # Open the original ROOT file
+        infile = ROOT.TFile.Open(args.target)
+        tree = infile.Get(dset_config['args']['tree_name'])
+        # Create the destination directory if it doesn't exist
+        os.makedirs(os.path.split(args.destination)[0], exist_ok=True)
+        # Create a new ROOT file to write the modified tree
+        outfile = ROOT.TFile.Open(args.destination, 'RECREATE')
+        # Clone the original tree, including data
+        outtree = tree.CloneTree(0)  # Clone all entries
+        # Determine if scores is a list of single values or vectors
+        from ROOT import std
+        if isinstance(scores[0], (list, tuple, np.ndarray)):  # Check if scores contains vectors
+            # Create a new branch for scores as a vector of floats
+            scores_branch_vec = std.vector('float')()
+            outtree.Branch(args.branch_name, scores_branch_vec)
+            is_vector = True
+        else:  # Scores contains single values
+            # Create a new branch for scores as a single float
+            score_branch_arr = array('f', [0])
+            outtree.Branch(args.branch_name, score_branch_arr, f'{args.branch_name}/F')
+            is_vector = False
+        # Write scores to the new branch
+        print(f'Writing {len(scores)} scores to tree')
+        for i in range(tree.GetEntries()):
+            tree.GetEntry(i)
+            if is_vector:
+                # Clear the vector
+                scores_branch_vec.clear()
+                # Add all elements from scores[i] to the vector
+                for value in scores[i]:
+                    scores_branch_vec.push_back(float(value))  # Use push_back to add elements one by one
+            else:
+                # Fill the score branch with the current single score
+                score_branch_arr[0] = float(scores[i])  # Ensure the value is a float
+            # Fill the output tree with all branches, including the new scores branch
+            outtree.Fill()
+        # Write the modified tree to the new file
+        print(f'Writing to file {args.destination}')
+        print(f'Input entries: {tree.GetEntries()}, Output entries: {outtree.GetEntries()}')
+        outtree.Write()
+        outfile.Close()
+        infile.Close()
+    else:
+        os.makedirs(os.path.split(args.destination)[0], exist_ok=True)
+        np.savez(args.destination, scores=scores, labels=labels, tracking_info=tracking_info)
+if __name__ == '__main__':
+    main()

scripts/prep_data.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import sys
+import os
+file_path = os.getcwd()
+sys.path.append(file_path)
+import root_gnn_base.utils as utils
+import argparse
+from root_gnn_base.batched_dataset import PreBatchedDataset
+from root_gnn_base.batched_dataset import LazyPreBatchedDataset
+def main():
+    parser = argparse.ArgumentParser()
+    add_arg = parser.add_argument
+    add_arg('--config', type=str, required=True)
+    add_arg('--dataset', type=str, required=True)
+    add_arg('--chunk', type=int, default=0)
+    add_arg('--shuffle_mode', action='store_true', help='Shuffle the dataset before training.')
+    args = parser.parse_args()
+    config = utils.load_config(args.config)
+    dset_config = config['Datasets'][args.dataset]
+    batch_size = config['Training']['batch_size']
+    if not args.shuffle_mode:
+        dset = utils.buildFromConfig(dset_config, {'process_chunks': [args.chunk,]})
+    else:
+        dset = utils.buildFromConfig(dset_config)
+        if 'batch_size' in dset_config:
+            batch_size = dset_config['batch_size']
+        shuffle_chunks = dset_config.get('shuffle_chunks', 10)
+        padding_mode = dset_config.get('padding_mode', 'STEPS')
+        fold_conf = dset_config["folding"]
+        print(f"shuffle_chunks = {shuffle_chunks}, args.chunk = {args.chunk}, padding_mode = {padding_mode}")
+        if dset_config["class"] == "LazyMultiLabelDataset":
+            LazyPreBatchedDataset(start_dataset = dset, batch_size = batch_size, mask_fn = utils.fold_selection(fold_conf, "train"), suffix = utils.fold_selection_name(fold_conf, "train"), chunks = shuffle_chunks, chunkno = args.chunk, padding_mode = padding_mode)
+            LazyPreBatchedDataset(start_dataset = dset, batch_size = batch_size, mask_fn = utils.fold_selection(fold_conf, "test"),  suffix = utils.fold_selection_name(fold_conf, 'test'), chunks = shuffle_chunks, chunkno = args.chunk, padding_mode = padding_mode)
+        else:
+            PreBatchedDataset(dset, batch_size, utils.fold_selection(fold_conf, "train"), suffix = utils.fold_selection_name(fold_conf, "train"), chunks = shuffle_chunks, chunkno = args.chunk, padding_mode = padding_mode)
+            PreBatchedDataset(dset, batch_size, utils.fold_selection(fold_conf, "test"),  suffix = utils.fold_selection_name(fold_conf, 'test'), chunks = shuffle_chunks, chunkno = args.chunk, padding_mode = padding_mode)
+if __name__ == "__main__":
+    main()

scripts/training_script.py ADDED Viewed

	@@ -0,0 +1,755 @@

+import argparse
+import time
+import datetime
+import yaml
+import os
+start_time = time.time()
+import dgl
+import torch
+import torch.nn as nn
+import sys
+file_path = os.getcwd()
+sys.path.append(file_path)
+import root_gnn_base.batched_dataset as datasets
+from root_gnn_base import utils
+import root_gnn_base.custom_scheduler as lr_utils
+from models import GCN
+import numpy as np
+from sklearn.metrics import roc_auc_score
+import resource
+import gc
+import torch.distributed as dist
+import torch.multiprocessing as mp
+from torch.utils.data.distributed import DistributedSampler
+from torch.nn.parallel import DistributedDataParallel as DDP
+print("import time: {:.4f} s".format(time.time() - start_time))
+def mem():
+    print(f'Current memory usage: {resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024 / 1024} GB')
+def gpu_mem():
+    print()
+    print('GPU Memory Usage:')
+    sum = 0
+    # for obj in gc.get_objects():
+    #     try:
+    #         if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)):
+    #             print(obj.numel() if len(obj.size()) > 0 else 0, type(obj), obj.size())
+    #             sum += obj.numel() if len(obj.size()) > 0 else 0
+    #     except:
+    #         pass
+    print(f'Current GPU memory usage: {torch.cuda.memory_allocated() / 1024 / 1024 / 1024} GB')
+    print(f'Current GPU cache usage: {torch.cuda.memory_cached() / 1024 / 1024 / 1024} GB')
+    print(f'Current GPU max memory usage: {torch.cuda.max_memory_allocated() / 1024 / 1024 / 1024} GB')
+    print(f'Current GPU max cache usage: {torch.cuda.max_memory_cached() / 1024 / 1024 / 1024} GB')
+    print(f'Numel in current tensors: {sum}')
+    mem()
+## epoch stores the epoch number I want to evaluate the model at
+def evaluate(val_loaders, model, config, device, epoch = -1):
+    print("Evaluating")
+    if (epoch != -1) :
+        print(f"Evalulating at epoch {epoch}")
+        last_ep, checkpoint = utils.get_specific_epoch(config, epoch, from_ryan=False)
+        print(f"Evaluating at epoch = {last_ep}")
+    else:
+        starting_epoch = 0
+        last_ep, checkpoint = utils.get_last_epoch(config)
+    if checkpoint != None:
+        ep = last_ep
+        state_dict = checkpoint['model_state_dict']
+        new_state_dict = {}
+        for k, v in state_dict.items():
+            new_key = k.replace('module.', '')
+            new_state_dict[new_key] = v
+        model.load_state_dict(new_state_dict)
+        starting_epoch = checkpoint['epoch'] + 1
+        print(f"Loaded epoch {checkpoint['epoch']} from checkpoint")
+    if 'Loss' not in config:
+        loss_fcn = nn.BCEWithLogitsLoss()
+    else:
+        loss_fcn = utils.buildFromConfig(config['Loss'])
+    if len(val_loaders) == 0:
+        return "No validation data"
+    start = time.time()
+    scores = []
+    labels = []
+    weights = []
+    before_decoder = []
+    after_decoder = []
+    tracking = []
+    batch_size = config["Training"]["batch_size"]
+    batch_limit = int(np.ceil(1e5 / batch_size))
+    model.eval()
+    with torch.no_grad():
+        for loader in val_loaders:
+            batch_count = 0
+            for batch, label, track, global_feats in loader:
+                #Don't use compiled model for testing since we can't control the batch size.
+                #We could before, but it assumes each dataset has the same number of batches...
+                before_global_decoder, after_global_decoder, after_classify = model.representation(batch.to(device), global_feats.to(device))
+                scores.append(after_classify.to("cpu"))
+                before_decoder.append(before_global_decoder.to("cpu"))
+                after_decoder.append(after_global_decoder.to("cpu"))
+                labels.append(label.to("cpu"))
+                weights.append(track[:,1].to("cpu"))
+                tracking.append(track.to("cpu"))
+                batch_count += 1
+                if batch_count >= batch_limit:
+                    break
+    if scores == []: #If validation set is empty.
+        return
+    logits = torch.concatenate(scores)
+    scores = torch.sigmoid(logits)
+    labels = torch.concatenate(labels)
+    weights = torch.concatenate(weights)
+    before_decoder = torch.concatenate(before_decoder)
+    after_decoder = torch.concatenate(after_decoder)
+    tracking = torch.concatenate(tracking)
+    logits = logits.to("cpu").numpy()
+    scores = scores.to("cpu").numpy()
+    labels = labels.to("cpu").numpy()
+    before_decoder = before_decoder.to("cpu").numpy()
+    after_decoder = after_decoder.to("cpu").numpy()
+    tracking = tracking.to("cpu").numpy()
+    # Save the NumPy arrays to a .npz file
+    outfile = f"{config['Training_Directory']}/evaluation_{epoch}.npz"
+    np.savez(outfile, logits=logits, scores=scores, labels=labels, before_decoder=before_decoder, after_decoder=after_decoder, tracking=tracking)
+    print(f"saved scores to {outfile}")
+    return
+def train(train_loaders, test_loaders, model, device, config, args, rank):
+    nocompile = args.nocompile
+    restart = args.restart
+    # define train/val samples, loss function and optimizer
+    if 'Loss' not in config:
+        loss_fcn = nn.BCEWithLogitsLoss()
+        finish_fn = torch.nn.Sigmoid()
+    else:
+        loss_fcn = utils.buildFromConfig(config['Loss'])
+        finish_fn = utils.buildFromConfig(config['Loss']['finish'])
+    optimizer = torch.optim.Adam(model.parameters(), lr=config['Training']['learning_rate'])
+    if 'gamma' in config['Training']:
+        gamma = config['Training']['gamma']
+    else:
+        gamma = 1
+    if 'dynamic_lr' in config['Training']:
+        factor = config['Training']['dynamic_lr']['factor']
+        patience = config['Training']['dynamic_lr']['patience']
+    else:
+        factor = 1
+        patience = 1
+    early_termination = utils.EarlyStop()
+    if 'early_termination' in config['Training']:
+        early_termination.patience = config['Training']['early_termination']['patience']
+        early_termination.threshold = config['Training']['early_termination']['threshold']
+        early_termination.mode = config['Training']['early_termination']['mode']
+    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = gamma)
+    #scheduler_reset =  custom_scheduler.Dynamic_LR(optimizer, 'max', factor = factor, patience = patience)
+    custom_scheduler = None
+    if ('custom_scheduler' in config['Training']):
+        run_time_args = {}
+        scheduler_class = config['Training']['custom_scheduler']['class']
+        if (scheduler_class == 'Dynamic_LR' or
+                scheduler_class == 'Dynamic_LR_AND_Partial_Reset' or
+                    scheduler_class == 'Dynamic_LR_AND_Full_Reset'):
+            run_time_args={'optimizer': optimizer}
+        custom_scheduler = utils.buildFromConfig(config['Training']['custom_scheduler'], run_time_args=run_time_args)
+    starting_epoch = 0
+    if not restart:
+        last_ep, checkpoint = utils.get_last_epoch(config)
+        if checkpoint != None:
+            ep = starting_epoch - 1
+            if nocompile:
+                new_state_dict = {}
+                for k, v in checkpoint['model_state_dict'].items():
+                    new_key = k.replace('module.', '')
+                    new_state_dict[new_key] = v
+                checkpoint['model_state_dict'] = new_state_dict
+                if (args.multinode or args.multigpu):
+                    new_state_dict = {}
+                    for k, v in checkpoint['model_state_dict'].items():
+                        new_key = 'module.' + k
+                        new_state_dict[new_key] = v
+                    checkpoint['model_state_dict'] = new_state_dict
+                model.load_state_dict(checkpoint['model_state_dict'])
+            else:
+                model._orig_mod.load_state_dict(checkpoint['model_state_dict'])
+            optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
+            starting_epoch = checkpoint['epoch'] + 1
+            if 'early_stop' in checkpoint:
+                early_termination = utils.EarlyStop.load_from_dict(checkpoint['early_stop'])
+                print(early_termination.to_str())
+                print("EarlyStop state restored successfully.")
+                if early_termination.should_stop:
+                    print(f"Early Termination at Epoch {epoch}")
+                    return
+            else:
+                print("'early_stop' not found in checkpoint. Initializing a new EarlyStop instance.")
+                early_termination = utils.EarlyStop()
+            print(f"Loaded epoch {checkpoint['epoch']} from checkpoint")
+        log = open(config['Training_Directory'] + '/training.log', 'a', buffering=1)
+    else:
+        log = open(config['Training_Directory'] + '/training.log', 'w', buffering=1)
+    train_cyclers = []
+    for loader in train_loaders:
+        train_cyclers.append(utils.cycler((loader)))
+    if args.savecache:
+        max_batch = [None,] * len(train_loaders)
+        for dset_i, loader in enumerate(train_loaders):
+            mbs = 0
+            for batch_i, batch in enumerate(loader):
+                if batch[0].num_nodes() > mbs:
+                    mbs = batch[0].num_nodes()
+                    max_batch[dset_i] = batch[0]
+                    print(f'Max batch size for dataset {dset_i}: {mbs}')
+        big_batch = dgl.batch(max_batch).to(device)
+        with torch.no_grad():
+            model(big_batch)
+    cumulative_times = [0,0,0,0,0]
+    log.write(f'Training {config["Training_Name"]} {datetime.datetime.now()} \n')
+    print(f"Starting training for {config['Training']['epochs']} epochs")
+    if hasattr(train_loaders[0].dataset, 'padding_mode'):
+        is_padded = train_loaders[0].dataset.padding_mode != 'NONE'
+        if (train_loaders[0].dataset.padding_mode == 'NODE'):
+            is_padded = False
+    else:
+        is_padded = False
+    lr_utils.print_LR(optimizer)
+    # torch.save({
+    #             'epoch': 0,
+    #             'model_state_dict': model.state_dict(),
+    #             'optimizer_state_dict': optimizer.state_dict(),
+    #             }, os.path.join(config['Training_Directory'], f"model_epoch_{0}.pt"))
+    # exit()
+    # training loop
+    # gpu_mem()
+    for epoch in range(starting_epoch, config['Training']['epochs']):
+        start = time.time()
+        run = start
+        if (args.multigpu or args.multinode):
+            dist.barrier()
+        if (epoch == 2):
+            # torch.cuda.cudart().cudaProfilerStart()
+            pass
+        # training
+        model.train()
+        ibatch = 0
+        total_loss = 0
+        for batched_graph, labels, _, global_feats in train_loaders[0]:
+            # # need to fix padded case
+            # if is_padded:
+            #     tglobals.append(torch.zeros(1, len(global_feats[0])))
+            batch_start = time.time()
+            logits = torch.tensor([])
+            tlabels = torch.tensor([])
+            batch_lengths = []
+            for cycler in train_cyclers:
+                graph, label, _, global_feats = next(cycler)
+                graph = graph.to(device)
+                label = label.to(device)
+                global_feats = global_feats.to(device)
+                if is_padded: #Padding the globals to match padded graphs.
+                    global_feats = torch.concatenate((global_feats, torch.zeros(1, len(global_feats[0])).to(device)))
+                load = time.time()
+                if (len(logits) == 0):
+                    logits = model(graph, global_feats)
+                    tlabels = label
+                else:
+                    logits = torch.concatenate((logits, model(graph, global_feats)), dim=0)
+                    tlabels = torch.concatenate((tlabels, label), dim=0)
+                batch_lengths.append(logits.shape[0] - 1)
+            if is_padded:
+                keepmask = torch.full_like(logits[:,0], True, dtype=torch.bool)
+                keepmask[batch_lengths] = False
+                logits = logits[keepmask]
+            tlabels = tlabels.to(torch.float)
+            if logits.shape[1] == 1 and loss_fcn.__class__.__name__ == 'BCEWithLogitsLoss':
+                logits = logits[:,0]
+                tlabels = tlabels.to(torch.float)
+            if loss_fcn.__class__.__name__ == 'CrossEntropyLoss':
+                tlabels = tlabels.to(torch.long)
+            loss = loss_fcn(logits, tlabels.to(device)) # changed logits from logits[:,0] and left labels as int for multiclass. Does this break binary? Yes.
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+            total_loss += loss.detach().cpu().item()
+            ibatch += 1
+            cumulative_times[0] += batch_start - run
+            cumulative_times[1] += load - batch_start
+            run = time.time()
+            cumulative_times[2] += run - load
+            if ibatch % 1000 == 0:
+                print(f'Batch {ibatch} out of {len(train_loaders[0])}', end='\r')
+                # gpu_mem()
+        if (args.multigpu):
+            print(f'Rank {rank} Epoch Done.')
+        elif (args.multinode):
+            print(f'Rank {args.global_rank} Epoch Done.')
+        else:
+            print("Epoch Done.")
+        # validation
+        scores = []
+        labels = []
+        weights = []
+        model.eval()
+        with torch.no_grad():
+            for loader in test_loaders:
+                for batch, label, track, global_feats in loader:
+                    #Don't use compiled model for testing since we can't control the batch size.
+                    #We could before, but it assumes each dataset has the same number of batches...
+                    if is_padded:
+                        global_feats = torch.cat([global_feats, torch.zeros(1, len(global_feats[0]))])
+                    if nocompile:
+                        batch_scores = model(batch.to(device), global_feats.to(device))
+                    else:
+                        batch_scores = model._orig_mod(batch.to(device), global_feats.to(device))
+                    if is_padded:
+                        scores.append(batch_scores[:-1,:])
+                    else:
+                        scores.append(batch_scores)
+                    labels.append(label)
+                    weights.append(track[:,1])
+        eval_end = time.time()
+        cumulative_times[3] += eval_end - run
+        if scores == []: #If validation set is empty.
+            continue
+        logits = torch.concatenate(scores).to(device)
+        labels = torch.concatenate(labels).to(device)
+        weights = torch.concatenate(weights).to(device)
+        if (args.multigpu or args.multinode):
+            gathered_logits = [torch.zeros_like(logits) for _ in range(dist.get_world_size())]
+            gathered_labels = [torch.zeros_like(labels) for _ in range(dist.get_world_size())]
+            gathered_weights = [torch.zeros_like(weights) for _ in range(dist.get_world_size())]
+        if (args.multigpu or args.multinode):
+            dist.barrier()
+            if (args.multigpu and rank != 0) or (args.multinode and args.global_rank != 0):
+                dist.gather(logits, dst=0)
+                dist.gather(labels, dst=0)
+                dist.gather(weights, dst=0)
+                continue
+            else:
+                dist.gather(logits, gather_list=gathered_logits)
+                dist.gather(labels, gather_list=gathered_labels)
+                dist.gather(weights, gather_list=gathered_weights)
+            logits = torch.concatenate(gathered_logits)
+            labels = torch.concatenate(gathered_labels)
+            weights = torch.concatenate(gathered_weights)
+        wgt_mask = weights > 0
+        print(f"Num batches trained = {ibatch}")
+        #Note: This section is a bit ugly. Very conditional. Should maybe config defined behavior?
+        if (loss_fcn.__class__.__name__ == "ContrastiveClusterLoss"):
+            scores = logits
+            preds = scores
+            accuracy = 0
+            test_auc = 0
+            acc = 0
+            contrastive_cluster_loss = finish_fn(logits)
+        elif (loss_fcn.__class__.__name__ == "MultiLabelLoss"):
+            scores = finish_fn(logits)
+            preds = torch.round(scores)
+            multilabel_accuracy = []
+            threshold = 0.1  # 10% threshold
+            for i in range(len(labels[0])):
+                # accurate_count = torch.sum(torch.abs(preds[:, i].to("cpu") - labels[:, i].to("cpu")) / labels[:, i].to("cpu") <= threshold)
+                # multilabel_accruacy.append(accurate_count / len(labels))
+                multilabel_accuracy.append(torch.sum(preds[:, i].to("cpu") == labels[:, i].to("cpu")) / len(labels))
+            test_auc = 0
+            acc = np.mean(multilabel_accuracy)
+        elif logits.shape[1] == 1 and loss_fcn.__class__.__name__ == 'BCEWithLogitsLoss': #Proxy for binary classification.
+            test_auc = 0
+            acc = 0
+            logits = logits[:,0]
+            scores = finish_fn(logits)
+            labels =labels.to(torch.float)
+            preds = scores > 0.5
+            test_auc = roc_auc_score(labels[wgt_mask].to("cpu") == 1, scores[wgt_mask].to("cpu"), sample_weight=weights[wgt_mask].to("cpu"))
+            acc = torch.sum(preds.to("cpu") == labels.to("cpu")) / len(labels)
+        elif logits.shape[1] == 1 and loss_fcn.__class__.__name__ == 'MSELoss':
+            logits = logits[:,0]
+            scores = finish_fn(logits)
+            labels = labels.to(torch.float)
+            acc = 0
+            test_auc = 0
+        else:
+            preds = torch.argmax(logits, dim=1)
+            scores = finish_fn(logits)
+            if labels.dim() == 1: #Multi-class
+                acc = torch.sum(preds.to("cpu") == labels.to("cpu")) / len(labels) #TODO: Make each class weighted equally?
+                labels = labels.to("cpu")
+                weights = weights.to("cpu")
+                logits = logits.to("cpu")
+                wgt_mask = wgt_mask.to("cpu")
+                labels_onehot = np.zeros((len(labels), len(scores[0])))
+                labels_onehot[np.arange(len(labels)), labels] = 1
+                try:
+                    #test_auc = roc_auc_score(labels[wgt_mask].to("cpu") == 1, scores[wgt_mask].to("cpu"), multi_class='ovr', sample_weight=weights[wgt_mask].to("cpu"))
+                    test_auc = roc_auc_score(labels_onehot[wgt_mask], scores[wgt_mask].to("cpu"), multi_class='ovr', sample_weight=weights[wgt_mask].to("cpu"))
+                except ValueError:
+                    test_auc = np.nan
+            else: #Multi-loss
+                acc = torch.sum(preds.to("cpu") == labels[:,0].to("cpu")) / len(labels)
+                try:
+                    test_auc = roc_auc_score(labels[:,0][wgt_mask].to("cpu") == 1, scores[wgt_mask].to("cpu"), multi_class='ovr', sample_weight=weights[wgt_mask].to("cpu"))
+                except ValueError:
+                    test_auc = np.nan
+        # print(f"logits = {logits[:10]}")
+        # print(f"preds = {preds[:2]}")
+        # print(f"labels = {labels[:10]}")
+        # print(f"len(Unique logits) = {len(torch.unique(logits))}")
+        # print(f"Average of labels = {torch.mean(labels)}")
+        # print(f"unique logits = {torch.unique(logits)[0]:.4f}, {torch.unique(logits)[-1]:.4f}")
+        if (loss_fcn.__class__.__name__ == "MultiLabelLoss"):
+            multilabel_log_str = "MultiLabel_Accuracy "
+            for accuracy in multilabel_accuracy:
+                multilabel_log_str += f" | {accuracy:.4f}"
+            log.write(multilabel_log_str + '\n')
+            print(multilabel_log_str, flush=True)
+        elif (loss_fcn.__class__.__name__ == "ContrastiveClusterLoss"):
+            contrastive_cluster_log_str = "ContrastiveClusterLoss "
+            contrastive_cluster_log_str += f"Contrastive Loss: {contrastive_cluster_loss[0]:.4f}, Clustering Loss: {contrastive_cluster_loss[1]:.4f}, Variance Loss: {contrastive_cluster_loss[2]:.4f}"
+            log.write(contrastive_cluster_log_str + '\n')
+            print(contrastive_cluster_log_str, flush=True)
+        # test_loss = loss_fcn(logits, labels.to(device))
+        test_loss = loss_fcn(logits, labels)
+        end = time.time()
+        log_str = "Epoch {:05d} | LR {:.4e} | Loss {:.4f} | Accuracy {:.4f} | Test_Loss {:.4f} | Test_AUC {:.4f} | Time {:.4f} s".format(
+                epoch, optimizer.param_groups[0]['lr'], total_loss/ibatch, acc, test_loss, test_auc, end - start
+        )
+        log.write(log_str + '\n')
+        print(log_str, flush=True)
+        state_dict = model.state_dict()
+        if not nocompile:
+            state_dict = model._orig_mod.state_dict()
+        new_state_dict = {}
+        for k, v in state_dict.items():
+            new_key = k.replace('module.', '')
+            new_state_dict[new_key] = v
+        state_dict = new_state_dict
+        # print('Testing done')
+        # gpu_mem()
+        if epoch == 2:
+            # torch.cuda.cudart().cudaProfilerStop()
+            pass
+        torch.save({
+                'epoch': epoch,
+                'model_state_dict': state_dict,
+                'optimizer_state_dict': optimizer.state_dict(),
+                'early_stop': early_termination.to_dict()
+                }, os.path.join(config['Training_Directory'], f"model_epoch_{epoch}.pt"))
+        np.savez(os.path.join(config['Training_Directory'], f'model_epoch_{epoch}.npz'), scores=scores.to("cpu"), labels=labels.to("cpu"))
+        save_end = time.time()
+        cumulative_times[4] += save_end - eval_end
+        early_termination.update(test_loss)
+        if early_termination.should_stop:
+            log_str = f"Early Termination at Epoch {epoch}"
+            log.write(log_str + "\n")
+            print(log_str)
+            log_str = early_termination.to_str()
+            log.write(log_str + "\n")
+            print(log_str)
+            break
+        if (custom_scheduler):
+            custom_scheduler.step(model, {'test_auc':test_auc})
+        scheduler.step()
+    print(f"Load: {cumulative_times[0]:.4f} s")
+    print(f"Batch: {cumulative_times[1]:.4f} s")
+    print(f"Train: {cumulative_times[2]:.4f} s")
+    print(f"Eval: {cumulative_times[3]:.4f} s")
+    print(f"Save: {cumulative_times[4]:.4f} s")
+    log.close()
+def find_free_port():
+    import socket
+    from contextlib import closing
+    with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
+        s.bind(('', 0))
+        s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+        return str(s.getsockname()[1])
+def init_process_group(world_size, rank, port):
+    os.environ['MASTER_ADDR'] = 'localhost'
+    # os.environ['MASTER_PORT'] = find_free_port()
+    os.environ['MASTER_PORT'] = port
+    dist.init_process_group(
+        backend="nccl",  # change to 'nccl' for multiple GPUs (other was gloo)
+        init_method='env://',
+        world_size=world_size,
+        rank=rank,
+        timeout=datetime.timedelta(seconds=300),
+    )
+def main(rank=0, args=None, world_size=1, port=24500, seed=12345):
+    #Prevent simultaneous file access
+    #sleep_time = 120 * rank
+    #time.sleep(sleep_time)
+    #Load config file
+    config = utils.load_config(args.config)
+    if (args.directory):
+        print(f"New training directory: { config['Training_Directory'] + args.directory}")
+        config['Training_Directory'] = config['Training_Directory'] + args.directory
+    if not os.path.exists(config['Training_Directory']):
+        os.makedirs(config['Training_Directory'], exist_ok=True)
+    with open(config['Training_Directory'] + '/config.yaml', 'w') as f:
+        yaml.dump(config, f)
+    batch_size = config["Training"]["batch_size"]
+    if(args.plot):
+        rl = utils.read_log(config)
+        utils.plot_log(rl, config['Training_Directory'] + '/training.png')
+        print('Log at ' + config['Training_Directory'] + '/training.log')
+        print('Plotted at ' + config['Training_Directory'] + '/training.png')
+        exit()
+    if (args.multigpu):
+        print(f"Setting up multigpu")
+        start_time = time.time()
+        init_process_group(world_size, rank, port)
+        print("multigpu setup time: {:.4f} s".format(time.time() - start_time))
+        device = torch.device(f'cuda:{rank}')
+        torch.cuda.device(device)
+    elif (args.multinode):
+        device = torch.device(f'cuda:{rank}')
+        torch.cuda.device(device)
+        print(f"global rank = {args.global_rank}, local rank = {rank}, device = {device}")
+    else:
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    if (args.cpu):
+        print(f"Using CPU")
+        device = "cpu"
+    train_loaders = []
+    test_loaders = []
+    val_loaders = []
+    load_start = time.time()
+    torch.backends.cuda.matmul.allow_tf32 = True
+    ldr_type = datasets.LazyPreBatchedDataset if args.lazy else datasets.PreBatchedDataset
+    #Load datasets
+    if (pargs.statistics):
+        pargs.statistics = int(pargs.statistics)
+        print(f"Training Dataset Size: {pargs.statistics}")
+        num_batches = int(np.ceil(pargs.statistics / batch_size))
+        np.random.seed(pargs.seed)
+    for dset_conf in config["Datasets"]:
+        dset = utils.buildFromConfig(config["Datasets"][dset_conf])
+        if 'batch_size' in config["Datasets"][dset_conf]:
+            batch_size = config["Datasets"][dset_conf]['batch_size']
+        fold_conf = config["Datasets"][dset_conf]["folding"]
+        shuffle_chunks = config["Datasets"][dset_conf].get("shuffle_chunks", 10)
+        padding_mode = config["Datasets"][dset_conf].get("padding_mode", "STEPS")
+        mask_fn = utils.fold_selection(fold_conf, "train")
+        if args.preshuffle:
+            # ldr = ldr_type(start_dataset=dset, batch_size=batch_size, mask_fn=mask_fn, suffix = utils.fold_selection_name(fold_conf, 'train'), chunks = shuffle_chunks, padding_mode = padding_mode, use_ddp = args.multigpu, rank=rank, world_size=world_size)
+            ldr = ldr_type(start_dataset=dset, batch_size=batch_size, mask_fn=mask_fn, suffix = utils.fold_selection_name(fold_conf, 'train'), chunks = shuffle_chunks, padding_mode = padding_mode)
+            gsamp, _, _, global_samp = ldr[0]
+            sampler = None
+            if (pargs.statistics):
+                sampler = np.random.choice(range(len(ldr)), size=num_batches)
+            if (args.multigpu):
+                sampler = DistributedSampler(ldr, num_replicas=world_size, rank=rank, shuffle=False, drop_last=True)
+                # num_batches = len(ldr)
+                # sampler = list(sampler)
+                # if (sampler[0] >= num_batches % world_size):
+                #     sampler.pop()
+            if (args.multinode):
+                sampler = DistributedSampler(ldr, num_replicas=world_size, rank=pargs.global_rank, shuffle=False, drop_last=True)
+            train_loaders.append(torch.utils.data.DataLoader(ldr, batch_size = None, num_workers = 0, sampler = sampler))
+            sampler = None
+            ldr = ldr_type(start_dataset=dset, batch_size=batch_size, mask_fn=mask_fn, suffix = utils.fold_selection_name(fold_conf, 'test'), chunks = shuffle_chunks, padding_mode = padding_mode)
+            if (args.multigpu):
+                sampler = DistributedSampler(ldr, num_replicas=world_size, rank=rank, shuffle=False, drop_last=True)
+                # num_batches = len(ldr)
+                # sampler = list(sampler)
+                # if (rank >= num_batches % world_size):
+                #     sampler.pop()
+            if (args.multinode):
+                sampler = DistributedSampler(ldr, num_replicas=world_size, rank=pargs.global_rank, shuffle=False, drop_last=True)
+            test_loaders.append(torch.utils.data.DataLoader(ldr, batch_size = None, num_workers = 0, sampler=sampler))
+            if "validation" in fold_conf:
+                val_loaders.append(torch.utils.data.DataLoader((ldr_type(start_dataset=dset, batch_size=batch_size, mask_fn=utils.fold_selection(fold_conf, "validation"), suffix = utils.fold_selection_name(fold_conf, 'validation'), chunks = shuffle_chunks, padding_mode = padding_mode, rank=rank, world_size=1)), batch_size = None, num_workers = 0, sampler = sampler))
+            else:
+                print("No validation set for dataset ", dset_conf)
+        else:
+            train_loaders.append(datasets.GetBatchedLoader(dset, batch_size, utils.fold_selection(fold_conf, "train")))
+            gsamp, _, _, global_samp = dset[0]
+            test_loaders.append(datasets.GetBatchedLoader(dset, batch_size, utils.fold_selection(fold_conf, "test")))
+            if "validation" in fold_conf:
+                val_loaders.append(datasets.GetBatchedLoader(dset, batch_size, utils.fold_selection(fold_conf, "validation")))
+            else:
+                print("No validation set for dataset ", dset_conf)
+    load_end = time.time()
+    print("Load time: {:.4f} s".format(load_end - load_start))
+    model = utils.buildFromConfig(config["Model"], {'sample_graph': gsamp, 'sample_global': global_samp, 'seed': seed}).to(device)
+    if not args.nocompile:
+        model = torch.compile(model)
+    if args.multigpu:
+        print(f"Trying to create DDP model")
+        start_time = time.time()
+        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[device])
+        print("model creation time: {:.4f} s".format(time.time() - start_time))
+    if (args.multinode):
+        print(f"Trying to create DDP model")
+        start_time = time.time()
+        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[device])
+        print("model creation time: {:.4f} s".format(time.time() - start_time))
+    # total_params = 0
+    # for param_dict in model.parameters():
+    #     for param in param_dict['params']:
+    #         if param.requires_grad:
+    #             total_params += param.numel()
+    # print(f"Number of trainable parameters = {total_params}")
+    if(type(model) == GCN.Clustering):
+        print("clustering")
+    if args.evaluate != None:
+        evaluate(test_loaders, model, config, device, args.evaluate)
+        exit()
+    # model training
+    print("Training...")
+    gpu_mem()
+    train(train_loaders, test_loaders, model, device, config, args, rank)
+    # test the model
+    # print("Testing...")
+    # evaluate(val_loaders, model, config, device)
+    # if args.multigpu or args.multinode:
+    #     dist.destroy_process_group()
+    # if rank == 0:
+    #     rl = utils.read_log(config)
+    #     utils.plot_log(rl, config['Training_Directory'] + '/training.png')
+    #     print('Log at ' + config['Training_Directory'] + '/training.log')
+    #     print('Plotted at ' + config['Training_Directory'] + '/training.png')
+if __name__ == "__main__":
+    #Handle CLI arguments
+    parser = argparse.ArgumentParser()
+    add_arg = parser.add_argument
+    add_arg("--config", type=str, help="Config file.", required=True)
+    add_arg("--restart", action="store_true", help="Restart training from scratch.")
+    add_arg("--preshuffle", action="store_true", help="Shuffle data before training.")
+    add_arg("--lazy", action="store_true", help="Lazy loading of data.")
+    add_arg("--nocompile", action="store_true", help="Disable JIT compilation.")
+    add_arg("--evaluate", type = int, help="Skip training and go to evaluation.")
+    add_arg("--plot", action="store_true", help="Plot training logs.")
+    add_arg("--multigpu", action="store_true", help="Use multiple GPUs.")
+    add_arg("--multinode", action="store_true", help="Use multiple nodes.")
+    add_arg("--savecache", action="store_true", help="")
+    add_arg("--cpu", action="store_true", help="Uses the cpu only")
+    add_arg("--statistics", type=float, help="Size of training data")
+    add_arg("--directory", type=str, help="Append to Training Directory")
+    add_arg("--seed", type=int, default=2, help="Sets random seed")
+    pargs = parser.parse_args()
+    if pargs.multigpu:
+        port = find_free_port()
+        torch.backends.cudnn.enabled = False
+        mp.spawn(main, args=(pargs, 4, port), nprocs=4, join=True)
+    if pargs.multinode:
+        global_rank = int(os.environ["RANK"])
+        local_rank = int(os.environ["LOCAL_RANK"])
+        world_size = int(os.environ["WORLD_SIZE"])
+        print(f"global_rank = {global_rank}, local_rank = {local_rank}, world_size = {world_size}")
+        dist.init_process_group(backend="nccl")
+        torch.backends.cudnn.enabled = False
+        pargs.global_rank = global_rank
+        main(rank = local_rank, args=pargs, world_size=world_size)
+    else:
+        main(0, pargs)