added root_gnn_dgl directory

Browse files

Files changed (15) hide show

models/GCN.py +0 -1944
models/__pycache__/GCN.cpython-38.pyc +0 -0
models/__pycache__/loss.cpython-38.pyc +0 -0
models/loss.py +0 -311
root_gnn_base/batched_dataset.py +0 -190
root_gnn_base/custom_scheduler.py +0 -565
root_gnn_base/dataset.py +0 -685
root_gnn_base/photon_ID_dataset.py +0 -44
root_gnn_base/similarity.py +0 -158
root_gnn_base/uproot_dataset.py +0 -54
root_gnn_base/utils.py +0 -307
scripts/find_free_port.py +0 -12
scripts/inference.py +0 -289
scripts/prep_data.py +0 -43
scripts/training_script.py +0 -755

models/GCN.py DELETED Viewed

@@ -1,1944 +0,0 @@
-import dgl
-import dgl.nn as dglnn
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import sys
-import os
-file_path = os.getcwd()
-sys.path.append(file_path)
-import root_gnn_base.dataset as datasets
-from root_gnn_base import utils
-import gc
-def Make_SLP(in_size, out_size, activation = nn.ReLU, dropout = 0):
-    layers = []
-    layers.append(nn.Linear(in_size, out_size))
-    layers.append(activation())
-    layers.append(nn.Dropout(dropout))
-    return layers
-def Make_MLP(in_size, hid_size, out_size, n_layers, activation = nn.ReLU, dropout = 0):
-    layers = []
-    if n_layers > 1:
-        layers += Make_SLP(in_size, hid_size, activation, dropout)
-        for i in range(n_layers-2):
-            layers += Make_SLP(hid_size, hid_size, activation, dropout)
-        layers += Make_SLP(hid_size, out_size, activation, dropout)
-    else:
-        layers += Make_SLP(in_size, out_size, activation, dropout)
-    layers.append(torch.nn.LayerNorm(out_size))
-    return nn.Sequential(*layers)
-class MLP(nn.Module):
-    def __init__(self, in_size, hid_size, out_size, n_layers, activation = nn.ReLU, dropout = 0, **kwargs):
-        super().__init__()
-        print(f'Unused args while creating MLP: {kwargs}')
-        self.layers = Make_MLP(in_size, hid_size, hid_size, n_layers-1, activation, dropout)
-        self.linear = nn.Linear(hid_size, out_size)
-    def forward(self, x):
-        return self.linear(self.layers(x))
-def broadcast_global_to_nodes(g, globals):
-    boundaries = g.batch_num_nodes()
-    return torch.repeat_interleave(globals, boundaries, dim=0)
-def broadcast_global_to_edges(g, globals):
-    boundaries = g.batch_num_edges()
-    return torch.repeat_interleave(globals, boundaries, dim=0)
-def copy_v(edges):
-    return {'m_v': edges.dst['h']}
-def partial_reset(model : nn.Module):
-    in_size = len(model.classify.weight[0])
-    out_size = len(model.classify.weight)
-    device = next(model.classify.parameters()).device
-    torch.manual_seed(2)
-    model.classify = nn.Linear(in_size, out_size)
-    model.classify.to(device)
-    print(model.classify.weight)
-def print_model(model: nn.Module):
-    print(model)
-def print_mlp(layer):
-    for l in layer.children():
-        if isinstance(l, nn.Linear):
-            print(l.state_dict())
-        else:
-            print(l)
-# This function returns a model with the whole GNN completely reset
-def full_reset(model : nn.Module):
-    mlp_list = [model.node_encoder, model.edge_encoder, model.global_encoder,
-                model.node_update, model.edge_update, model.global_update,
-                model.global_decoder]
-    for mlp in mlp_list:
-        for layer in mlp.children():
-            if hasattr(layer, 'reset_parameters'):
-                layer.reset_parameters()
-    partial_reset(model)
-class GCN(nn.Module):
-    def __init__(self, in_size, hid_size, out_size, n_layers, **kwargs):
-        super().__init__()
-        print(f'Unused args while creating GCN: {kwargs}')
-        self.n_layers = n_layers
-        self.layers = nn.ModuleList()
-        # two-layer GCN
-        self.layers.extend(
-            [nn.Linear(in_size, hid_size),] +
-            [nn.Linear(hid_size, hid_size) for i in range(n_layers)] +
-            [dglnn.GraphConv(hid_size, hid_size) for i in range(n_layers)] +
-            [nn.Linear(hid_size, hid_size) for i in range(n_layers)]
-        )
-        self.classify = nn.Linear(hid_size, out_size)
-        #self.dropout = nn.Dropout(0.05)
-    def forward(self, g):
-        h = g.ndata['features']
-        for i, layer in enumerate(self.layers):
-            if i >= self.n_layers + 1 and i < self.n_layers * 2 + 1:
-                h = layer(g, h)
-            else:
-                h = layer(h)
-            h = F.relu(h)
-        with g.local_scope():
-            g.ndata['h'] = h
-            # Calculate graph representation by average readout.
-            hg = dgl.mean_nodes(g, 'h')
-            return self.classify(hg)
-class GCN_global(nn.Module):
-    def __init__(self, in_size, hid_size=4, out_size=1, n_layers=1, dropout=0, **kwargs):
-        super().__init__()
-        print(f'Unused args while creating GCN: {kwargs}')
-        self.n_layers = n_layers
-        #encoder
-        self.node_encoder = Make_MLP(in_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.global_encoder = Make_MLP(1, hid_size, hid_size, n_layers, dropout=dropout)
-        #GCN
-        self.node_update = Make_MLP(hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.global_update = Make_MLP(2*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.conv = dglnn.GraphConv(hid_size, hid_size)
-        #decoder
-        self.global_decoder = Make_MLP(hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.classify = nn.Linear(hid_size, out_size)
-    def forward(self, g):
-        h = self.node_encoder(g.ndata['features'])
-        h_global = self.global_encoder(g.batch_num_nodes()[:, None].to(torch.float))
-        for i in range(self.n_layers):
-            h = self.node_update(h)
-            h = self.conv(g, h)
-            g.ndata['h'] = h
-            h_global = self.global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h')), dim = 1))
-        h_global = self.global_decoder(h_global)
-        return self.classify(h_global)
-class GCN_global_2way(nn.Module):
-    def __init__(self, in_size, hid_size=4, out_size=1, n_layers=1, dropout=0, **kwargs):
-        super().__init__()
-        print(f'Unused args while creating GCN: {kwargs}')
-        self.n_layers = n_layers
-        #encoder
-        self.node_encoder = Make_MLP(in_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.global_encoder = Make_MLP(1, hid_size, hid_size, n_layers, dropout=dropout)
-        #GCN
-        self.node_update = Make_MLP(2*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.global_update = Make_MLP(2*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.conv = dglnn.GraphConv(hid_size, hid_size)
-        #decoder
-        self.global_decoder = Make_MLP(hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.classify = nn.Linear(hid_size, out_size)
-    def forward(self, g):
-        h = self.node_encoder(g.ndata['features'])
-        h_global = self.global_encoder(g.batch_num_nodes()[:, None].to(torch.float))
-        for i in range(self.n_layers):
-            h = self.node_update(torch.cat((h, broadcast_global_to_nodes(g, h_global)), dim = 1))
-            h = self.conv(g, h)
-            g.ndata['h'] = h
-            h_global = self.global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h')), dim = 1))
-        h_global = self.global_decoder(h_global)
-        return self.classify(h_global)
-class Edge_Network(nn.Module):
-    def __init__(self, sample_graph, sample_global, hid_size, out_size, n_layers, n_proc_steps, dropout=0, **kwargs):
-        super().__init__()
-        print(f'Unused args while creating GCN: {kwargs}')
-        self.n_layers = n_layers
-        self.n_proc_steps = n_proc_steps
-        self.layers = nn.ModuleList()
-        if (len(sample_global) == 0):
-            self.has_global = False
-        else:
-            self.has_global = sample_global.shape[1] != 0
-        gl_size = sample_global.shape[1] if self.has_global else 1
-        #encoder
-        self.node_encoder = Make_MLP(sample_graph.ndata['features'].shape[1], hid_size, hid_size, n_layers, dropout=dropout)
-        self.edge_encoder = Make_MLP(sample_graph.edata['features'].shape[1], hid_size, hid_size, n_layers, dropout=dropout)
-        self.global_encoder = Make_MLP(gl_size, hid_size, hid_size, n_layers, dropout=dropout)
-        #GNN
-        self.node_update = Make_MLP(3*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.edge_update = Make_MLP(4*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.global_update = Make_MLP(3*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        #decoder
-        self.global_decoder = Make_MLP(hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.classify = nn.Linear(hid_size, out_size)
-    def forward(self, g, global_feats):
-        h = self.node_encoder(g.ndata['features'])
-        e = self.edge_encoder(g.edata['features'])
-        g.ndata['h'] = h
-        g.edata['e'] = e
-        if not self.has_global:
-            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
-        batch_num_nodes = None
-        sum_weights = None
-        if "w" in g.ndata:
-            batch_indices = g.batch_num_nodes()
-            # Find non-zero rows (non-padded nodes)
-            non_padded_nodes_mask = torch.any(g.ndata['features'] != 0, dim=1)
-            # Split the mask according to the batch indices
-            batch_num_nodes = []
-            start_idx = 0
-            for num_nodes in batch_indices:
-                end_idx = start_idx + num_nodes
-                non_padded_count = non_padded_nodes_mask[start_idx:end_idx].sum().item()
-                batch_num_nodes.append(non_padded_count)
-                start_idx = end_idx
-            batch_num_nodes = torch.tensor(batch_num_nodes, device = g.ndata['features'].device)
-            sum_weights = batch_num_nodes[:, None].repeat(1, 64)
-            global_feats = batch_num_nodes[:, None].to(torch.float)
-        h_global = self.global_encoder(global_feats)
-        for i in range(self.n_proc_steps):
-            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
-            g.apply_edges(copy_v)
-            g.edata['e'] = self.edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1))
-            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
-            g.ndata['h'] = self.node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1))
-            if "w" in g.ndata:
-                mean_nodes = dgl.sum_nodes(g, 'h', 'w') / sum_weights
-                h_global = self.global_update(torch.cat((h_global, mean_nodes, dgl.mean_edges(g, 'e')), dim = 1))
-            else:
-                h_global = self.global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1))
-        h_global = self.global_decoder(h_global)
-        return self.classify(h_global)
-    def representation(self, g, global_feats):
-        h = self.node_encoder(g.ndata['features'])
-        e = self.edge_encoder(g.edata['features'])
-        g.ndata['h'] = h
-        g.edata['e'] = e
-        if not self.has_global:
-            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
-        batch_num_nodes = None
-        sum_weights = None
-        if "w" in g.ndata:
-            batch_indices = g.batch_num_nodes()
-            # Find non-zero rows (non-padded nodes)
-            non_padded_nodes_mask = torch.any(g.ndata['features'] != 0, dim=1)
-            # Split the mask according to the batch indices
-            batch_num_nodes = []
-            start_idx = 0
-            for num_nodes in batch_indices:
-                end_idx = start_idx + num_nodes
-                non_padded_count = non_padded_nodes_mask[start_idx:end_idx].sum().item()
-                batch_num_nodes.append(non_padded_count)
-                start_idx = end_idx
-            batch_num_nodes = torch.tensor(batch_num_nodes, device = g.ndata['features'].device)
-            sum_weights = batch_num_nodes[:, None].repeat(1, 64)
-            global_feats = batch_num_nodes[:, None].to(torch.float)
-        h_global = self.global_encoder(global_feats)
-        for i in range(self.n_proc_steps):
-            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
-            g.apply_edges(copy_v)
-            g.edata['e'] = self.edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1))
-            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
-            g.ndata['h'] = self.node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1))
-            if "w" in g.ndata:
-                mean_nodes = dgl.sum_nodes(g, 'h', 'w') / sum_weights
-                h_global = self.global_update(torch.cat((h_global, mean_nodes, dgl.mean_edges(g, 'e')), dim = 1))
-            else:
-                h_global = self.global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1))
-        before_global_decoder = h_global
-        after_global_decoder = self.global_decoder(before_global_decoder)
-        after_classify = self.classify(after_global_decoder)
-        return before_global_decoder, after_global_decoder, after_classify
-    def __str__(self):
-        layer_names = ["node_encoder", "edge_encoder", "global_encoder",
-                  "node_update", "edge_update", "global_update", "global_decoder"]
-        layers = [self.node_encoder, self.edge_encoder, self.global_encoder,
-                  self.node_update, self.edge_update, self.global_update, self.global_decoder]
-        for i in range(len(layers)):
-            print(layer_names[i])
-            for layer in layers[i].children():
-                if isinstance(layer, nn.Linear):
-                    print(layer.state_dict())
-        print("classify")
-        print(self.classify.weight)
-        return ""
-class Transferred_Learning(nn.Module):
-    def __init__(self, pretraining_path, pretraining_model, sample_graph, sample_global, hid_size, out_size, n_layers, n_proc_steps, dropout=0, **kwargs):
-        super().__init__()
-        print(f'Unused args while creating GCN: {kwargs}')
-        self.n_layers = n_layers
-        self.n_proc_steps = n_proc_steps
-        self.layers = nn.ModuleList()
-        if (len(sample_global) == 0):
-            self.has_global = False
-        else:
-            self.has_global = sample_global.shape[1] != 0
-        gl_size = sample_global.shape[1] if self.has_global else 1
-        self.pretrained_model = utils.buildFromConfig(pretraining_model, {'sample_graph': sample_graph, 'sample_global': sample_global})
-        checkpoint = torch.load(pretraining_path)
-        self.pretrained_model.load_state_dict(checkpoint['model_state_dict'])
-        pretrained_layers = list(self.pretrained_model.children())
-        pretrained_layers = pretrained_layers[:-1]
-        self.pretrained_model = nn.Sequential(*pretrained_layers)
-        # Freeze Weights
-        for param in self.pretrained_model.parameters():
-            param.requires_grad = False  # Freeze all layers
-        self.global_decoder = Make_MLP(pretraining_model['args']['hid_size'], hid_size, hid_size, n_layers, dropout=dropout)
-        self.classify = nn.Linear(hid_size, out_size)
-    def TL_node_encoder(self, x):
-        for layer in self.pretrained_model[1]:
-            x = layer(x)
-        return x
-    def TL_edge_encoder(self, x):
-        for layer in self.pretrained_model[2]:
-            x = layer(x)
-        return x
-    def TL_global_encoder(self, x):
-        for layer in self.pretrained_model[3]:
-            x = layer(x)
-        return x
-    def TL_node_update(self, x):
-        for layer in self.pretrained_model[4]:
-            x = layer(x)
-        return x
-    def TL_edge_update(self, x):
-        for layer in self.pretrained_model[5]:
-            x = layer(x)
-        return x
-    def TL_global_update(self, x):
-        for layer in self.pretrained_model[6]:
-            x = layer(x)
-        return x
-    def TL_global_decoder(self, x):
-        for layer in self.pretrained_model[7]:
-            x = layer(x)
-        return x
-    def forward(self, g, global_feats):
-        h = self.TL_node_encoder(g.ndata['features'])
-        e = self.TL_edge_encoder(g.edata['features'])
-        g.ndata['h'] = h
-        g.edata['e'] = e
-        if not self.has_global:
-            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
-        h_global = self.TL_global_encoder(global_feats)
-        for i in range(self.n_proc_steps):
-            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
-            g.apply_edges(copy_v)
-            g.edata['e'] = self.TL_edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1))
-            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
-            g.ndata['h'] = self.TL_node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1))
-            h_global = self.TL_global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1))
-        h_global = self.TL_global_decoder(h_global)
-        return self.classify(self.global_decoder(h_global))
-class Transferred_Learning_Graph(nn.Module):
-    def __init__(self, pretraining_path, pretraining_model, sample_graph, sample_global, hid_size, out_size, n_layers, n_proc_steps, additional_proc_steps=1, dropout=0, **kwargs):
-        super().__init__()
-        print(f'Unused args while creating GCN: {kwargs}')
-        self.n_layers = n_layers
-        self.n_proc_steps = n_proc_steps
-        self.layers = nn.ModuleList()
-        if (len(sample_global) == 0):
-            self.has_global = False
-        else:
-            self.has_global = sample_global.shape[1] != 0
-        gl_size = sample_global.shape[1] if self.has_global else 1
-        self.pretrained_model = utils.buildFromConfig(pretraining_model, {'sample_graph': sample_graph, 'sample_global': sample_global})
-        checkpoint = torch.load(pretraining_path)
-        self.pretrained_model.load_state_dict(checkpoint['model_state_dict'])
-        pretrained_layers = list(self.pretrained_model.children())
-        pretrained_layers = pretrained_layers[:-1]
-        self.pretrained_model = nn.Sequential(*pretrained_layers)
-        self.additional_proc_steps = additional_proc_steps
-        # Freeze Weights
-        for param in self.pretrained_model.parameters():
-            param.requires_grad = False  # Freeze all layers
-        #GNN
-        self.node_update = Make_MLP(3*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.edge_update = Make_MLP(4*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.global_update = Make_MLP(3*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        #decoder
-        self.global_decoder = Make_MLP(hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.classify = nn.Linear(hid_size, out_size)
-    def TL_node_encoder(self, x):
-        for layer in self.pretrained_model[1]:
-            x = layer(x)
-        return x
-    def TL_edge_encoder(self, x):
-        for layer in self.pretrained_model[2]:
-            x = layer(x)
-        return x
-    def TL_global_encoder(self, x):
-        for layer in self.pretrained_model[3]:
-            x = layer(x)
-        return x
-    def TL_node_update(self, x):
-        for layer in self.pretrained_model[4]:
-            x = layer(x)
-        return x
-    def TL_edge_update(self, x):
-        for layer in self.pretrained_model[5]:
-            x = layer(x)
-        return x
-    def TL_global_update(self, x):
-        for layer in self.pretrained_model[6]:
-            x = layer(x)
-        return x
-    def forward(self, g, global_feats):
-        h = self.TL_node_encoder(g.ndata['features'])
-        e = self.TL_edge_encoder(g.edata['features'])
-        g.ndata['h'] = h
-        g.edata['e'] = e
-        if not self.has_global:
-            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
-        h_global = self.TL_global_encoder(global_feats)
-        for i in range(self.n_proc_steps):
-            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
-            g.apply_edges(copy_v)
-            g.edata['e'] = self.TL_edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1))
-            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
-            g.ndata['h'] = self.TL_node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1))
-            h_global = self.TL_global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1))
-        for j in range(self.additional_proc_steps):
-            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
-            g.apply_edges(copy_v)
-            g.edata['e'] = self.edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1))
-            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
-            g.ndata['h'] = self.node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1))
-            h_global = self.global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1))
-        h_global = self.global_decoder(h_global)
-        return self.classify(h_global)
-class Transferred_Learning_Parallel(nn.Module):
-    def __init__(self, pretraining_path, pretraining_model, sample_graph, sample_global, hid_size, out_size, n_layers, n_proc_steps, dropout=0, **kwargs):
-        super().__init__()
-        print(f'Unused args while creating GCN: {kwargs}')
-        self.n_layers = n_layers
-        self.n_proc_steps = n_proc_steps
-        self.layers = nn.ModuleList()
-        self.has_global = sample_global.shape[1] != 0
-        gl_size = sample_global.shape[1] if self.has_global else 1
-        self.pretrained_model = utils.buildFromConfig(pretraining_model, {'sample_graph': sample_graph, 'sample_global': sample_global})
-        checkpoint = torch.load(pretraining_path)
-        self.pretrained_model.load_state_dict(checkpoint['model_state_dict'])
-        pretrained_layers = list(self.pretrained_model.children())
-        pretrained_layers = pretrained_layers[:-1]
-        self.pretrained_model = nn.Sequential(*pretrained_layers)
-        # Freeze Weights
-        for param in self.pretrained_model.parameters():
-            param.requires_grad = False  # Freeze all layers
-        #encoder
-        self.node_encoder = Make_MLP(sample_graph.ndata['features'].shape[1], hid_size, hid_size, n_layers, dropout=dropout)
-        self.edge_encoder = Make_MLP(sample_graph.edata['features'].shape[1], hid_size, hid_size, n_layers, dropout=dropout)
-        self.global_encoder = Make_MLP(gl_size, hid_size, hid_size, n_layers, dropout=dropout)
-        #GNN
-        self.node_update = Make_MLP(3*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.edge_update = Make_MLP(4*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.global_update = Make_MLP(3*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        #decoder
-        self.global_decoder = Make_MLP(hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.classify = nn.Linear(hid_size + pretraining_model['args']['hid_size'], out_size)
-    def TL_node_encoder(self, x):
-        for layer in self.pretrained_model[1]:
-            x = layer(x)
-        return x
-    def TL_edge_encoder(self, x):
-        for layer in self.pretrained_model[2]:
-            x = layer(x)
-        return x
-    def TL_global_encoder(self, x):
-        for layer in self.pretrained_model[3]:
-            x = layer(x)
-        return x
-    def TL_node_update(self, x):
-        for layer in self.pretrained_model[4]:
-            x = layer(x)
-        return x
-    def TL_edge_update(self, x):
-        for layer in self.pretrained_model[5]:
-            x = layer(x)
-        return x
-    def TL_global_update(self, x):
-        for layer in self.pretrained_model[6]:
-            x = layer(x)
-        return x
-    def TL_global_decoder(self, x):
-        for layer in self.pretrained_model[7]:
-            x = layer(x)
-        return x
-    def Pretrained_Output(self, g):
-        h = self.TL_node_encoder(g.ndata['features'])
-        e = self.TL_edge_encoder(g.edata['features'])
-        g.ndata['h'] = h
-        g.edata['e'] = e
-        if not self.has_global:
-            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
-        h_global = self.TL_global_encoder(global_feats)
-        for i in range(self.n_proc_steps):
-            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
-            g.apply_edges(copy_v)
-            g.edata['e'] = self.TL_edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1))
-            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
-            g.ndata['h'] = self.TL_node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1))
-            h_global = self.TL_global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1))
-        h_global = self.TL_global_decoder(h_global)
-        return h_global
-    def forward(self, g, global_feats):
-        pretrained_global = self.Pretrained_Output(g.clone())
-        h = self.node_encoder(g.ndata['features'])
-        e = self.edge_encoder(g.edata['features'])
-        g.ndata['h'] = h
-        g.edata['e'] = e
-        if not self.has_global:
-            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
-        h_global = self.global_encoder(global_feats)
-        for i in range(self.n_proc_steps):
-            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
-            g.apply_edges(copy_v)
-            g.edata['e'] = self.edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1))
-            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
-            g.ndata['h'] = self.node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1))
-            h_global = self.global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1))
-        h_global = self.global_decoder(h_global)
-        return self.classify(torch.cat((pretrained_global, h_global), dim = 1))
-class Transferred_Learning_Sequential(nn.Module):
-    def __init__(self, pretraining_path, pretraining_model, sample_graph, sample_global, hid_size, out_size, n_layers, n_proc_steps, dropout=0, **kwargs):
-        super().__init__()
-        print(f'Unused args while creating GCN: {kwargs}')
-        self.n_layers = n_layers
-        self.n_proc_steps = n_proc_steps
-        self.layers = nn.ModuleList()
-        self.has_global = sample_global.shape[1] != 0
-        gl_size = sample_global.shape[1] if self.has_global else 1
-        self.pretrained_model = utils.buildFromConfig(pretraining_model, {'sample_graph': sample_graph, 'sample_global': sample_global})
-        checkpoint = torch.load(pretraining_path)
-        self.pretrained_model.load_state_dict(checkpoint['model_state_dict'])
-        pretrained_layers = list(self.pretrained_model.children())
-        pretrained_layers = pretrained_layers[:-1]
-        self.pretrained_model = nn.Sequential(*pretrained_layers)
-        # Freeze Weights
-        for param in self.pretrained_model.parameters():
-            param.requires_grad = False  # Freeze all layers
-        #encoder
-        self.mlp = Make_MLP(pretraining_model['args']['hid_size'], hid_size, hid_size, n_layers, dropout=dropout)
-        self.classify = nn.Linear(hid_size, out_size)
-    def TL_node_encoder(self, x):
-        for layer in self.pretrained_model[1]:
-            x = layer(x)
-        return x
-    def TL_edge_encoder(self, x):
-        for layer in self.pretrained_model[2]:
-            x = layer(x)
-        return x
-    def TL_global_encoder(self, x):
-        for layer in self.pretrained_model[3]:
-            x = layer(x)
-        return x
-    def TL_node_update(self, x):
-        for layer in self.pretrained_model[4]:
-            x = layer(x)
-        return x
-    def TL_edge_update(self, x):
-        for layer in self.pretrained_model[5]:
-            x = layer(x)
-        return x
-    def TL_global_update(self, x):
-        for layer in self.pretrained_model[6]:
-            x = layer(x)
-        return x
-    def TL_global_decoder(self, x):
-        for layer in self.pretrained_model[7]:
-            x = layer(x)
-        return x
-    def Pretrained_Output(self, g):
-        h = self.TL_node_encoder(g.ndata['features'])
-        e = self.TL_edge_encoder(g.edata['features'])
-        g.ndata['h'] = h
-        g.edata['e'] = e
-        if not self.has_global:
-            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
-        h_global = self.TL_global_encoder(global_feats)
-        for i in range(self.n_proc_steps):
-            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
-            g.apply_edges(copy_v)
-            g.edata['e'] = self.TL_edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1))
-            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
-            g.ndata['h'] = self.TL_node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1))
-            h_global = self.TL_global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1))
-        h_global = self.TL_global_decoder(h_global)
-        return h_global
-    def forward(self, g, global_feats):
-        pretrained_global = self.Pretrained_Output(g.clone())
-        global_features = self.mlp(pretrained_global)
-        return self.classify(global_features)
-class Transferred_Learning_Message_Passing(nn.Module):
-    def __init__(self, pretraining_path, pretraining_model, sample_graph, sample_global, hid_size, out_size, n_layers, n_proc_steps, dropout=0, **kwargs):
-        super().__init__()
-        print(f'Unused args while creating GCN: {kwargs}')
-        self.n_layers = n_layers
-        self.n_proc_steps = n_proc_steps
-        self.layers = nn.ModuleList()
-        self.has_global = sample_global.shape[1] != 0
-        gl_size = sample_global.shape[1] if self.has_global else 1
-        self.pretrained_model = utils.buildFromConfig(pretraining_model, {'sample_graph': sample_graph, 'sample_global': sample_global})
-        checkpoint = torch.load(pretraining_path)
-        self.pretrained_model.load_state_dict(checkpoint['model_state_dict'])
-        pretrained_layers = list(self.pretrained_model.children())
-        pretrained_layers = pretrained_layers[:-1]
-        self.pretrained_model = nn.Sequential(*pretrained_layers)
-        # Freeze Weights
-        for param in self.pretrained_model.parameters():
-            param.requires_grad = False  # Freeze all layers
-        #encoder
-        self.mlp = Make_MLP(pretraining_model['args']['hid_size']*pretraining_model['args']['n_proc_steps'], hid_size, hid_size, n_layers, dropout=dropout)
-        self.classify = nn.Linear(hid_size, out_size)
-    def TL_node_encoder(self, x):
-        for layer in self.pretrained_model[1]:
-            x = layer(x)
-        return x
-    def TL_edge_encoder(self, x):
-        for layer in self.pretrained_model[2]:
-            x = layer(x)
-        return x
-    def TL_global_encoder(self, x):
-        for layer in self.pretrained_model[3]:
-            x = layer(x)
-        return x
-    def TL_node_update(self, x):
-        for layer in self.pretrained_model[4]:
-            x = layer(x)
-        return x
-    def TL_edge_update(self, x):
-        for layer in self.pretrained_model[5]:
-            x = layer(x)
-        return x
-    def TL_global_update(self, x):
-        for layer in self.pretrained_model[6]:
-            x = layer(x)
-        return x
-    def TL_global_decoder(self, x):
-        for layer in self.pretrained_model[7]:
-            x = layer(x)
-        return x
-    def Pretrained_Output(self, g):
-        message_passing = None
-        h = self.TL_node_encoder(g.ndata['features'])
-        e = self.TL_edge_encoder(g.edata['features'])
-        g.ndata['h'] = h
-        g.edata['e'] = e
-        if not self.has_global:
-            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
-        h_global = self.TL_global_encoder(global_feats)
-        for i in range(self.n_proc_steps):
-            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
-            g.apply_edges(copy_v)
-            g.edata['e'] = self.TL_edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1))
-            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
-            g.ndata['h'] = self.TL_node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1))
-            h_global = self.TL_global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1))
-            if (message_passing is None):
-                message_passing = h_global.clone()
-            else:
-                message_passing = torch.cat((message_passing, h_global.clone()), dim=1)
-        h_global = self.TL_global_decoder(h_global)
-        return message_passing
-    def forward(self, g, global_feats):
-        pretrained_global = self.Pretrained_Output(g.clone())
-        #print(f"message_passing layers have size = {pretrained_global.shape}")
-        #print(pretrained_global)
-        global_features = self.mlp(pretrained_global)
-        return self.classify(global_features)
-class Transferred_Learning_Message_Passing_Parallel(nn.Module):
-    def __init__(self, pretraining_path, pretraining_model, sample_graph, sample_global, hid_size, out_size, n_layers, n_proc_steps, dropout=0, **kwargs):
-        super().__init__()
-        print(f'Unused args while creating GCN: {kwargs}')
-        self.n_layers = n_layers
-        self.n_proc_steps = n_proc_steps
-        self.layers = nn.ModuleList()
-        self.has_global = sample_global.shape[1] != 0
-        gl_size = sample_global.shape[1] if self.has_global else 1
-        self.pretrained_model = utils.buildFromConfig(pretraining_model, {'sample_graph': sample_graph, 'sample_global': sample_global})
-        checkpoint = torch.load(pretraining_path)
-        self.pretrained_model.load_state_dict(checkpoint['model_state_dict'])
-        pretrained_layers = list(self.pretrained_model.children())
-        pretrained_layers = pretrained_layers[:-1]
-        self.pretrained_model = nn.Sequential(*pretrained_layers)
-        #encoder
-        self.node_encoder = Make_MLP(sample_graph.ndata['features'].shape[1], hid_size, hid_size, n_layers, dropout=dropout)
-        self.edge_encoder = Make_MLP(sample_graph.edata['features'].shape[1], hid_size, hid_size, n_layers, dropout=dropout)
-        self.global_encoder = Make_MLP(gl_size, hid_size, hid_size, n_layers, dropout=dropout)
-        #GNN
-        self.node_update = Make_MLP(3*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.edge_update = Make_MLP(4*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.global_update = Make_MLP(3*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        #decoder
-        self.global_decoder = Make_MLP(hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        # Freeze Weights
-        for param in self.pretrained_model.parameters():
-            param.requires_grad = False  # Freeze all layers
-        self.classify = nn.Linear(pretraining_model['args']['hid_size']*pretraining_model['args']['n_proc_steps'] + hid_size, out_size)
-    def TL_node_encoder(self, x):
-        for layer in self.pretrained_model[1]:
-            x = layer(x)
-        return x
-    def TL_edge_encoder(self, x):
-        for layer in self.pretrained_model[2]:
-            x = layer(x)
-        return x
-    def TL_global_encoder(self, x):
-        for layer in self.pretrained_model[3]:
-            x = layer(x)
-        return x
-    def TL_node_update(self, x):
-        for layer in self.pretrained_model[4]:
-            x = layer(x)
-        return x
-    def TL_edge_update(self, x):
-        for layer in self.pretrained_model[5]:
-            x = layer(x)
-        return x
-    def TL_global_update(self, x):
-        for layer in self.pretrained_model[6]:
-            x = layer(x)
-        return x
-    def TL_global_decoder(self, x):
-        for layer in self.pretrained_model[7]:
-            x = layer(x)
-        return x
-    def Pretrained_Output(self, g):
-        message_passing = None
-        h = self.TL_node_encoder(g.ndata['features'])
-        e = self.TL_edge_encoder(g.edata['features'])
-        g.ndata['h'] = h
-        g.edata['e'] = e
-        if not self.has_global:
-            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
-        h_global = self.TL_global_encoder(global_feats)
-        for i in range(self.n_proc_steps):
-            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
-            g.apply_edges(copy_v)
-            g.edata['e'] = self.TL_edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1))
-            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
-            g.ndata['h'] = self.TL_node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1))
-            h_global = self.TL_global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1))
-            if (message_passing is None):
-                message_passing = h_global.clone()
-            else:
-                message_passing = torch.cat((message_passing, h_global.clone()), dim=1)
-        h_global = self.TL_global_decoder(h_global)
-        return message_passing
-    def forward(self, g, global_feats):
-        pretrained_message = self.Pretrained_Output(g.clone())
-        h = self.node_encoder(g.ndata['features'])
-        e = self.edge_encoder(g.edata['features'])
-        g.ndata['h'] = h
-        g.edata['e'] = e
-        if not self.has_global:
-            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
-        h_global = self.global_encoder(global_feats)
-        for i in range(self.n_proc_steps):
-            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
-            g.apply_edges(copy_v)
-            g.edata['e'] = self.edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1))
-            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
-            g.ndata['h'] = self.node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1))
-            h_global = self.global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1))
-        h_global = self.global_decoder(h_global)
-        return self.classify(torch.cat((pretrained_message, h_global), dim = 1))
-class Transferred_Learning_Finetuning(nn.Module):
-    def __init__(self, pretraining_path, pretraining_model, sample_graph, sample_global, hid_size, out_size, n_layers, n_proc_steps, dropout=0, frozen_pretraining=False, **kwargs):
-        super().__init__()
-        print(f'Unused args while creating GCN: {kwargs}')
-        self.n_layers = n_layers
-        self.n_proc_steps = n_proc_steps
-        self.layers = nn.ModuleList()
-        if (len(sample_global) == 0):
-            self.has_global = False
-        else:
-            self.has_global = sample_global.shape[1] != 0
-        gl_size = sample_global.shape[1] if self.has_global else 1
-        self.pretrained_model = utils.buildFromConfig(pretraining_model, {'sample_graph': sample_graph, 'sample_global': sample_global})
-        checkpoint = torch.load(pretraining_path)
-        self.pretrained_model.load_state_dict(checkpoint['model_state_dict'])
-        pretrained_layers = list(self.pretrained_model.children())
-        pretrained_layers = pretrained_layers[:-1]
-        self.pretrained_model = nn.Sequential(*pretrained_layers)
-        print(f"Freeze Pretraining = {frozen_pretraining}")
-        if (frozen_pretraining):
-            for param in self.pretrained_model.parameters():
-                param.requires_grad = False  # Freeze all layers
-            for param in self.pretrained_model[7]:
-                param.requires_grad = True
-        torch.manual_seed(2)
-        self.classify = nn.Linear(pretraining_model['args']['hid_size'], out_size)
-    def TL_node_encoder(self, x):
-        for layer in self.pretrained_model[1]:
-            x = layer(x)
-        return x
-    def TL_edge_encoder(self, x):
-        for layer in self.pretrained_model[2]:
-            x = layer(x)
-        return x
-    def TL_global_encoder(self, x):
-        for layer in self.pretrained_model[3]:
-            x = layer(x)
-        return x
-    def TL_node_update(self, x):
-        for layer in self.pretrained_model[4]:
-            x = layer(x)
-        return x
-    def TL_edge_update(self, x):
-        for layer in self.pretrained_model[5]:
-            x = layer(x)
-        return x
-    def TL_global_update(self, x):
-        for layer in self.pretrained_model[6]:
-            x = layer(x)
-        return x
-    def TL_global_decoder(self, x):
-        for layer in self.pretrained_model[7]:
-            x = layer(x)
-        return x
-    def Pretrained_Output(self, g):
-        h = self.TL_node_encoder(g.ndata['features'])
-        e = self.TL_edge_encoder(g.edata['features'])
-        g.ndata['h'] = h
-        g.edata['e'] = e
-        if not self.has_global:
-            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
-        h_global = self.TL_global_encoder(global_feats)
-        for i in range(self.n_proc_steps):
-            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
-            g.apply_edges(copy_v)
-            g.edata['e'] = self.TL_edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1))
-            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
-            g.ndata['h'] = self.TL_node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1))
-            h_global = self.TL_global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1))
-        h_global = self.TL_global_decoder(h_global)
-        return h_global
-    def forward(self, g, global_feats):
-        h_global = self.Pretrained_Output(g.clone())
-        return self.classify(h_global)
-    def representation(self, g, global_feats):
-        h = self.TL_node_encoder(g.ndata['features'])
-        e = self.TL_edge_encoder(g.edata['features'])
-        g.ndata['h'] = h
-        g.edata['e'] = e
-        if not self.has_global:
-            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
-        h_global = self.TL_global_encoder(global_feats)
-        for i in range(self.n_proc_steps):
-            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
-            g.apply_edges(copy_v)
-            g.edata['e'] = self.TL_edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1))
-            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
-            g.ndata['h'] = self.TL_node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1))
-            h_global = self.TL_global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1))
-        before_global_decoder = h_global
-        after_global_decoder = self.TL_global_decoder(before_global_decoder)
-        after_classify = self.classify(after_global_decoder)
-        return before_global_decoder, after_global_decoder, after_classify
-    def __str__(self):
-        layer_names = ["node_encoder", "edge_encoder", "global_encoder",
-                  "node_update", "edge_update", "global_update", "global_decoder"]
-        layers = [self.pretrained_model[1], self.pretrained_model[2], self.pretrained_model[3],
-                  self.pretrained_model[4], self.pretrained_model[5], self.pretrained_model[6],
-                  self.pretrained_model[7]]
-        for i in range(len(layers)):
-            print(layer_names[i])
-            for layer in layers[i].children():
-                if isinstance(layer, nn.Linear):
-                    print(layer.state_dict())
-        print("classify")
-        print(self.classify.weight)
-        return ""
-class Transferred_Learning_Parallel_Finetuning(nn.Module):
-    def __init__(self, pretraining_path, pretraining_model, sample_graph, sample_global, hid_size, out_size, n_layers, n_proc_steps, dropout=0, learning_rate=0.0001, **kwargs):
-        super().__init__()
-        print(f'Unused args while creating GCN: {kwargs}')
-        self.learning_rate = learning_rate
-        self.parallel_params = []
-        self.finetuning_params = []
-        self.n_layers = n_layers
-        self.n_proc_steps = n_proc_steps
-        self.layers = nn.ModuleList()
-        self.has_global = sample_global.shape[1] != 0
-        gl_size = sample_global.shape[1] if self.has_global else 1
-        self.pretrained_model = utils.buildFromConfig(pretraining_model, {'sample_graph': sample_graph, 'sample_global': sample_global})
-        checkpoint = torch.load(pretraining_path)
-        self.pretrained_model.load_state_dict(checkpoint['model_state_dict'])
-        pretrained_layers = list(self.pretrained_model.children())
-        pretrained_layers = pretrained_layers[:-1]
-        self.pretrained_model = nn.Sequential(*pretrained_layers)
-        self.finetuning_params.append(self.pretrained_model)
-        #encoder
-        self.node_encoder = Make_MLP(sample_graph.ndata['features'].shape[1], hid_size, hid_size, n_layers, dropout=dropout)
-        self.edge_encoder = Make_MLP(sample_graph.edata['features'].shape[1], hid_size, hid_size, n_layers, dropout=dropout)
-        self.global_encoder = Make_MLP(gl_size, hid_size, hid_size, n_layers, dropout=dropout)
-        #GNN
-        self.node_update = Make_MLP(3*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.edge_update = Make_MLP(4*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.global_update = Make_MLP(3*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        #decoder
-        self.global_decoder = Make_MLP(hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.classify = nn.Linear(hid_size + pretraining_model['args']['hid_size'], out_size)
-        self.parallel_params.append(self.node_encoder)
-        self.parallel_params.append(self.edge_encoder)
-        self.parallel_params.append(self.global_encoder)
-        self.parallel_params.append(self.node_update)
-        self.parallel_params.append(self.edge_update)
-        self.parallel_params.append(self.global_update)
-        self.parallel_params.append(self.global_decoder)
-        self.parallel_params.append(self.classify)
-    def TL_node_encoder(self, x):
-        for layer in self.pretrained_model[1]:
-            x = layer(x)
-        return x
-    def TL_edge_encoder(self, x):
-        for layer in self.pretrained_model[2]:
-            x = layer(x)
-        return x
-    def TL_global_encoder(self, x):
-        for layer in self.pretrained_model[3]:
-            x = layer(x)
-        return x
-    def TL_node_update(self, x):
-        for layer in self.pretrained_model[4]:
-            x = layer(x)
-        return x
-    def TL_edge_update(self, x):
-        for layer in self.pretrained_model[5]:
-            x = layer(x)
-        return x
-    def TL_global_update(self, x):
-        for layer in self.pretrained_model[6]:
-            x = layer(x)
-        return x
-    def TL_global_decoder(self, x):
-        for layer in self.pretrained_model[7]:
-            x = layer(x)
-        return x
-    def Pretrained_Output(self, g):
-        h = self.TL_node_encoder(g.ndata['features'])
-        e = self.TL_edge_encoder(g.edata['features'])
-        g.ndata['h'] = h
-        g.edata['e'] = e
-        if not self.has_global:
-            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
-        h_global = self.TL_global_encoder(global_feats)
-        for i in range(self.n_proc_steps):
-            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
-            g.apply_edges(copy_v)
-            g.edata['e'] = self.TL_edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1))
-            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
-            g.ndata['h'] = self.TL_node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1))
-            h_global = self.TL_global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1))
-        h_global = self.TL_global_decoder(h_global)
-        return h_global
-    def forward(self, g, global_feats):
-        pretrained_global = self.Pretrained_Output(g.clone())
-        h = self.node_encoder(g.ndata['features'])
-        e = self.edge_encoder(g.edata['features'])
-        g.ndata['h'] = h
-        g.edata['e'] = e
-        if not self.has_global:
-            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
-        h_global = self.global_encoder(global_feats)
-        for i in range(self.n_proc_steps):
-            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
-            g.apply_edges(copy_v)
-            g.edata['e'] = self.edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1))
-            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
-            g.ndata['h'] = self.node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1))
-            h_global = self.global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1))
-        h_global = self.global_decoder(h_global)
-        return self.classify(torch.cat((pretrained_global, h_global), dim = 1))
-    def parameters(self, recurse: bool = True):
-        params = []
-        for model_section in self.parallel_params:
-            if (type(self.learning_rate) == dict and self.learning_rate["trainable_lr"]):
-                params.append({'params': model_section.parameters(), 'lr': self.learning_rate["trainable_lr"]})
-            else:
-                params.append({'params': model_section.parameters(), 'lr': 0.0001})
-        for model_section in self.finetuning_params:
-            if (type(self.learning_rate) == dict and self.learning_rate["finetuning_lr"]):
-                params.append({'params': model_section.parameters(), 'lr': self.learning_rate["finetuning_lr"]})
-            else:
-                params.append({'params': model_section.parameters(), 'lr': 0.0001})
-        return params
-class Attention(nn.Module):
-    def __init__(self, sample_graph, sample_global, hid_size, out_size, n_layers, n_proc_steps, dropout=0, num_heads = 1, **kwargs):
-        super().__init__()
-        print(f'Unused args while creating GCN: {kwargs}')
-        self.n_layers = n_layers
-        self.n_proc_steps = n_proc_steps
-        self.layers = nn.ModuleList()
-        self.has_global = sample_global.shape[1] != 0
-        gl_size = sample_global.shape[1] if self.has_global else 1
-        #encoder
-        self.node_encoder = Make_MLP(sample_graph.ndata['features'].shape[1], hid_size, hid_size, n_layers, dropout=dropout)
-        self.global_encoder = Make_MLP(gl_size, hid_size, hid_size, n_layers, dropout=dropout)
-        #GNN
-        self.node_update = Make_MLP(2*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.global_update = Make_MLP(2*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        #decoder
-        self.global_decoder = Make_MLP(hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.classify = nn.Linear(hid_size, out_size)
-        #attention
-        self.multihead_attn = nn.MultiheadAttention(hid_size, num_heads, dropout=dropout, batch_first=True)
-        self.queries = nn.Linear(hid_size, hid_size)
-        self.keys = nn.Linear(hid_size, hid_size)
-        self.values = nn.Linear(hid_size, hid_size)
-    def forward(self, g, global_feats):
-        h = self.node_encoder(g.ndata['features'])
-        g.ndata['h'] = h
-        if not self.has_global:
-            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
-        batch_num_nodes = None
-        sum_weights = None
-        if "w" in g.ndata:
-            batch_indices = g.batch_num_nodes()
-            # Find non-zero rows (non-padded nodes)
-            non_padded_nodes_mask = torch.any(g.ndata['features'] != 0, dim=1)
-            # Split the mask according to the batch indices
-            batch_num_nodes = []
-            start_idx = 0
-            for num_nodes in batch_indices:
-                end_idx = start_idx + num_nodes
-                non_padded_count = non_padded_nodes_mask[start_idx:end_idx].sum().item()
-                batch_num_nodes.append(non_padded_count)
-                start_idx = end_idx
-            batch_num_nodes = torch.tensor(batch_num_nodes, device = g.ndata['features'].device)
-            sum_weights = batch_num_nodes[:, None].repeat(1, 64)
-            global_feats = batch_num_nodes[:, None].to(torch.float)
-        h_global = self.global_encoder(global_feats)
-        h_original_shape = h.shape
-        num_graphs = len(dgl.unbatch(g))
-        num_nodes = g.batch_num_nodes()[0].item()
-        padding_mask = g.ndata['padding_mask'] > 0
-        padding_mask = torch.reshape(padding_mask, (num_graphs, num_nodes))
-        h = g.ndata['h']
-        query = self.queries(h)
-        key = self.keys(h)
-        value = self.values(h)
-        query = torch.reshape(query, (num_graphs, num_nodes, h_original_shape[1]))
-        key = torch.reshape(key, (num_graphs, num_nodes, h_original_shape[1]))
-        value = torch.reshape(value, (num_graphs, num_nodes, h_original_shape[1]))
-        h, _ = self.multihead_attn(query, key, value, key_padding_mask=padding_mask)
-        h = torch.reshape(h, h_original_shape)
-        h = self.node_update(torch.cat((h, broadcast_global_to_nodes(g, h_global)), dim = 1))
-        g.ndata['h'] = h
-        mean_nodes = dgl.sum_nodes(g, 'h', 'w') / sum_weights
-        h_global = self.global_update(torch.cat((h_global, mean_nodes), dim = 1))
-        h_global = self.global_decoder(h_global)
-        return self.classify(h_global)
-class Attention_Edge_Network(nn.Module):
-    def __init__(self, sample_graph, sample_global, hid_size, out_size, n_layers, n_proc_steps, dropout=0, num_heads = 1, **kwargs):
-        super().__init__()
-        print(f'Unused args while creating GCN: {kwargs}')
-        self.n_layers = n_layers
-        self.n_proc_steps = n_proc_steps
-        self.layers = nn.ModuleList()
-        self.has_global = sample_global.shape[1] != 0
-        gl_size = sample_global.shape[1] if self.has_global else 1
-        #encoder
-        self.node_encoder = Make_MLP(sample_graph.ndata['features'].shape[1], hid_size, hid_size, n_layers, dropout=dropout)
-        self.edge_encoder = Make_MLP(sample_graph.edata['features'].shape[1], hid_size, hid_size, n_layers, dropout=dropout)
-        self.global_encoder = Make_MLP(gl_size, hid_size, hid_size, n_layers, dropout=dropout)
-        #GNN
-        self.node_update = Make_MLP(3*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.edge_update = Make_MLP(4*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.global_update = Make_MLP(3*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        #decoder
-        self.global_decoder = Make_MLP(hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.classify = nn.Linear(hid_size, out_size)
-        #attention
-        self.multihead_attn = nn.MultiheadAttention(hid_size, num_heads, dropout=dropout, batch_first=True)
-        self.queries = nn.Linear(hid_size, hid_size)
-        self.keys = nn.Linear(hid_size, hid_size)
-        self.values = nn.Linear(hid_size, hid_size)
-    def forward(self, g, global_feats):
-        h = self.node_encoder(g.ndata['features'])
-        e = self.edge_encoder(g.edata['features'])
-        g.ndata['h'] = h
-        g.edata['e'] = e
-        if not self.has_global:
-            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
-        h_global = self.global_encoder(global_feats)
-        h = g.ndata['h']
-        h_original_shape = h.shape
-        num_graphs = len(dgl.unbatch(g))
-        num_nodes = g.batch_num_nodes()[0].item()
-        padding_mask = g.ndata['padding_mask'] > 0
-        padding_mask = torch.reshape(padding_mask, (num_graphs, num_nodes))
-        for i in range(self.n_proc_steps):
-            h = g.ndata['h']
-            query = self.queries(h)
-            key = self.keys(h)
-            value = self.values(h)
-            query = torch.reshape(query, (num_graphs, num_nodes, h_original_shape[1]))
-            key = torch.reshape(key, (num_graphs, num_nodes, h_original_shape[1]))
-            value = torch.reshape(value, (num_graphs, num_nodes, h_original_shape[1]))
-            h, _ = self.multihead_attn(query, key, value, key_padding_mask=padding_mask)
-            h = torch.reshape(h, h_original_shape)
-            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
-            g.apply_edges(copy_v)
-            g.edata['e'] = self.edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1))
-            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
-            g.ndata['h'] = self.node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1))
-            h_global = self.global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h', 'w'), dgl.mean_edges(g, 'e')), dim = 1))
-        h_global = self.global_decoder(h_global)
-        return self.classify(h_global)
-class Attention_Unbatched(nn.Module):
-    def __init__(self, sample_graph, sample_global, hid_size, out_size, n_layers, n_proc_steps, dropout=0, num_heads = 1, **kwargs):
-        super().__init__()
-        print(f'Unused args while creating GCN: {kwargs}')
-        self.n_layers = n_layers
-        self.n_proc_steps = n_proc_steps
-        self.layers = nn.ModuleList()
-        self.has_global = sample_global.shape[1] != 0
-        gl_size = sample_global.shape[1] if self.has_global else 1
-        #encoder
-        self.node_encoder = Make_MLP(sample_graph.ndata['features'].shape[1], hid_size, hid_size, n_layers, dropout=dropout)
-        self.edge_encoder = Make_MLP(sample_graph.edata['features'].shape[1], hid_size, hid_size, n_layers, dropout=dropout)
-        self.global_encoder = Make_MLP(gl_size, hid_size, hid_size, n_layers, dropout=dropout)
-        #GNN
-        self.node_update = Make_MLP(3*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.edge_update = Make_MLP(4*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.global_update = Make_MLP(3*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        #decoder
-        self.global_decoder = Make_MLP(hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.classify = nn.Linear(hid_size, out_size)
-        #attention
-        self.multihead_attn = nn.MultiheadAttention(hid_size, 1, dropout=dropout)
-        self.queries = nn.Linear(hid_size, hid_size)
-        self.keys = nn.Linear(hid_size, hid_size)
-        self.values = nn.Linear(hid_size, hid_size)
-    def forward(self, g, global_feats):
-        h = self.node_encoder(g.ndata['features'])
-        e = self.edge_encoder(g.edata['features'])
-        g.ndata['h'] = h
-        g.edata['e'] = e
-        if not self.has_global:
-            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
-        h_global = self.global_encoder(global_feats)
-        for i in range(self.n_proc_steps):
-            unbatched_g = dgl.unbatch(g)
-            for graph in unbatched_g:
-                h = graph.ndata['h']
-                h, _ = self.multihead_attn(self.queries(h), self.keys(h), self.values(h))
-                graph.ndata['h'] = h
-            g = dgl.batch(unbatched_g)
-            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
-            g.apply_edges(copy_v)
-            g.edata['e'] = self.edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1))
-            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
-            g.ndata['h'] = self.node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1))
-            h_global = self.global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1))
-        h_global = self.global_decoder(h_global)
-        return self.classify(h_global)
-class Transferred_Learning_Attention(nn.Module):
-    def __init__(self, pretraining_path, pretraining_model, sample_graph, sample_global, hid_size, out_size, n_layers, n_proc_steps, num_heads, dropout=0, learning_rate=0.0001, **kwargs):
-        super().__init__()
-        print(f'Unused args while creating GCN: {kwargs}')
-        self.n_layers = n_layers
-        self.n_proc_steps = n_proc_steps
-        self.layers = nn.ModuleList()
-        self.has_global = sample_global.shape[1] != 0
-        gl_size = sample_global.shape[1] if self.has_global else 1
-        self.learning_rate = learning_rate
-        self.pretraining_params = []
-        self.attention_params = []
-        self.pretrained_model = utils.buildFromConfig(pretraining_model, {'sample_graph': sample_graph, 'sample_global': sample_global})
-        checkpoint = torch.load(pretraining_path)
-        self.pretrained_model.load_state_dict(checkpoint['model_state_dict'])
-        pretrained_layers = list(self.pretrained_model.children())
-        pretrained_layers = pretrained_layers[:-1]
-        self.pretrained_model = nn.Sequential(*pretrained_layers)
-        self.pretraining_params.append(self.pretrained_model[1])
-        self.pretraining_params.append(self.pretrained_model[3])
-        self.pretraining_params.append(self.pretrained_model[7])
-        #attention
-        self.multihead_attn = nn.MultiheadAttention(hid_size, num_heads, dropout=dropout, batch_first=True)
-        self.queries = nn.Linear(hid_size, hid_size)
-        self.keys = nn.Linear(hid_size, hid_size)
-        self.values = nn.Linear(hid_size, hid_size)
-        self.node_update = Make_MLP(2*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.global_update = Make_MLP(2*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.classify = nn.Linear(pretraining_model['args']['hid_size'], out_size)
-        self.attention_params.append(self.multihead_attn)
-        self.attention_params.append(self.queries)
-        self.attention_params.append(self.keys)
-        self.attention_params.append(self.values)
-        self.attention_params.append(self.classify)
-        self.attention_params.append(self.node_update)
-        self.attention_params.append(self.global_update)
-    def TL_node_encoder(self, x):
-        for layer in self.pretrained_model[1]:
-            x = layer(x)
-        return x
-    def TL_global_encoder(self, x):
-        for layer in self.pretrained_model[3]:
-            x = layer(x)
-        return x
-    def TL_global_decoder(self, x):
-        for layer in self.pretrained_model[7]:
-            x = layer(x)
-        return x
-    def forward(self, g, global_feats):
-        h = self.TL_node_encoder(g.ndata['features'])
-        g.ndata['h'] = h
-        if not self.has_global:
-            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
-        batch_num_nodes = None
-        sum_weights = None
-        if "w" in g.ndata:
-            batch_indices = g.batch_num_nodes()
-            # Find non-zero rows (non-padded nodes)
-            non_padded_nodes_mask = torch.any(g.ndata['features'] != 0, dim=1)
-            # Split the mask according to the batch indices
-            batch_num_nodes = []
-            start_idx = 0
-            for num_nodes in batch_indices:
-                end_idx = start_idx + num_nodes
-                non_padded_count = non_padded_nodes_mask[start_idx:end_idx].sum().item()
-                batch_num_nodes.append(non_padded_count)
-                start_idx = end_idx
-            batch_num_nodes = torch.tensor(batch_num_nodes, device = g.ndata['features'].device)
-            sum_weights = batch_num_nodes[:, None].repeat(1, 64)
-            global_feats = batch_num_nodes[:, None].to(torch.float)
-        h_global = self.TL_global_encoder(global_feats)
-        h_original_shape = h.shape
-        num_graphs = len(dgl.unbatch(g))
-        num_nodes = g.batch_num_nodes()[0].item()
-        padding_mask = g.ndata['padding_mask'] > 0
-        padding_mask = torch.reshape(padding_mask, (num_graphs, num_nodes))
-        h = g.ndata['h']
-        query = self.queries(h)
-        key = self.keys(h)
-        value = self.values(h)
-        query = torch.reshape(query, (num_graphs, num_nodes, h_original_shape[1]))
-        key = torch.reshape(key, (num_graphs, num_nodes, h_original_shape[1]))
-        value = torch.reshape(value, (num_graphs, num_nodes, h_original_shape[1]))
-        h, _ = self.multihead_attn(query, key, value, key_padding_mask=padding_mask)
-        h = torch.reshape(h, h_original_shape)
-        h = self.node_update(torch.cat((h, broadcast_global_to_nodes(g, h_global)), dim = 1))
-        g.ndata['h'] = h
-        mean_nodes = dgl.sum_nodes(g, 'h', 'w') / sum_weights
-        h_global = self.global_update(torch.cat((h_global, mean_nodes), dim = 1))
-        h_global = self.TL_global_decoder(h_global)
-        return self.classify(h_global)
-    def parameters(self, recurse: bool = True):
-        params = []
-        for model_section in self.pretraining_params:
-            if (type(self.learning_rate) == dict and self.learning_rate["pretraining_lr"]):
-                params.append({'params': model_section.parameters(), 'lr': self.learning_rate["pretraining_lr"]})
-            else:
-                params.append({'params': model_section.parameters(), 'lr': 0.0001})
-        for model_section in self.attention_params:
-            if (type(self.learning_rate) == dict and self.learning_rate["attention_lr"]):
-                params.append({'params': model_section.parameters(), 'lr': self.learning_rate["attention_lr"]})
-            else:
-                params.append({'params': model_section.parameters(), 'lr': 0.0001})
-        return params
-class Multimodel_Transferred_Learning(nn.Module):
-    def __init__(self, pretraining_path, pretraining_model, sample_graph, sample_global, hid_size, out_size, n_layers, n_proc_steps, dropout=0, frozen_pretraining=True, learning_rate=None, **kwargs):
-        super().__init__()
-        print(f'Unused args while creating GCN: {kwargs}')
-        self.n_layers = n_layers
-        self.n_proc_steps = n_proc_steps
-        self.layers = nn.ModuleList()
-        self.has_global = sample_global.shape[1] != 0
-        gl_size = sample_global.shape[1] if self.has_global else 1
-        self.learning_rate = learning_rate
-        input_size = 0
-        self.pretraining_params = []
-        self.model_params = []
-        self.pretrained_models = []
-        for model, path in zip(pretraining_model, pretraining_path):
-            input_size += model['args']['hid_size']
-            model = utils.buildFromConfig(model, {'sample_graph': sample_graph, 'sample_global': sample_global})
-            checkpoint = torch.load(path)['model_state_dict']
-            new_state_dict = {}
-            for k, v in checkpoint.items():
-                new_key = k.replace('module.', '')
-                new_state_dict[new_key] = v
-            model.load_state_dict(new_state_dict)
-            pretrained_layers = list(model.children())
-            pretrained_layers = pretrained_layers[:-1]
-            model = nn.Sequential(*pretrained_layers)
-            # Freeze Weights
-            print(f"Freeze Pretraining = {frozen_pretraining}")
-            if (frozen_pretraining):
-                for param in model.parameters():
-                    param.requires_grad = False  # Freeze all layers
-            self.pretraining_params.append(model)
-            self.pretrained_models.append(model)
-        print(f"len(pretrained_models) = {len(self.pretrained_models)}")
-        print(f"input size = {input_size}")
-        self.final_mlp = Make_MLP(input_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.classify = nn.Linear(hid_size, out_size)
-        self.model_params.append(self.final_mlp)
-        self.model_params.append(self.classify)
-    def TL_node_encoder(self, x, model_idx):
-        try:
-            for layer in self.pretrained_models[model_idx][1]:
-                x = layer(x)
-            return x
-        except (NotImplementedError, IndexError):
-            for layer in self.pretrained_models[model_idx][1][1]:
-                x = layer(x)
-            return x
-    def TL_edge_encoder(self, x, model_idx):
-        try:
-            for layer in self.pretrained_models[model_idx][2]:
-                x = layer(x)
-            return x
-        except (NotImplementedError, IndexError):
-            for layer in self.pretrained_models[model_idx][1][2]:
-                x = layer(x)
-            return x
-    def TL_global_encoder(self, x, model_idx):
-        try:
-            for layer in self.pretrained_models[model_idx][3]:
-                x = layer(x)
-            return x
-        except (NotImplementedError, IndexError):
-            for layer in self.pretrained_models[model_idx][1][3]:
-                x = layer(x)
-            return x
-    def TL_node_update(self, x, model_idx):
-        try:
-            for layer in self.pretrained_models[model_idx][4]:
-                x = layer(x)
-            return x
-        except (NotImplementedError, IndexError):
-            for layer in self.pretrained_models[model_idx][1][4]:
-                x = layer(x)
-            return x
-    def TL_edge_update(self, x, model_idx):
-        try:
-            for layer in self.pretrained_models[model_idx][5]:
-                x = layer(x)
-            return x
-        except (NotImplementedError, IndexError):
-            for layer in self.pretrained_models[model_idx][1][5]:
-                x = layer(x)
-            return x
-    def TL_global_update(self, x, model_idx):
-        try:
-            for layer in self.pretrained_models[model_idx][6]:
-                x = layer(x)
-            return x
-        except (NotImplementedError, IndexError):
-            for layer in self.pretrained_models[model_idx][1][6]:
-                x = layer(x)
-            return x
-    def TL_global_decoder(self, x, model_idx):
-        try:
-            for layer in self.pretrained_models[model_idx][7]:
-                x = layer(x)
-            return x
-        except (NotImplementedError, IndexError):
-            for layer in self.pretrained_models[model_idx][1][7]:
-                x = layer(x)
-            return x
-    def Pretrained_Output(self, g, model_idx):
-        h = self.TL_node_encoder(g.ndata['features'], model_idx)
-        e = self.TL_edge_encoder(g.edata['features'], model_idx)
-        g.ndata['h'] = h
-        g.edata['e'] = e
-        if not self.has_global:
-            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
-        h_global = self.TL_global_encoder(global_feats, model_idx)
-        for i in range(self.n_proc_steps):
-            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
-            g.apply_edges(copy_v)
-            g.edata['e'] = self.TL_edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1), model_idx)
-            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
-            g.ndata['h'] = self.TL_node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1), model_idx)
-            h_global = self.TL_global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1), model_idx)
-        # h_global = self.TL_global_decoder(h_global, model_idx)
-        return h_global
-    def forward(self, g, global_feats):
-        h_global = []
-        for i in range(len(self.pretrained_models)):
-            h_global.append(self.Pretrained_Output(g.clone(), i))
-        h_global = torch.concatenate(h_global, dim=1)
-        return self.classify(self.final_mlp(h_global))
-    def to(self, device):
-        for i in range(len(self.pretrained_models)):
-            self.pretrained_models[i].to(device)
-        self.classify.to(device)
-        self.final_mlp.to(device)
-        return self
-    def parameters(self, recurse: bool = True):
-        params = []
-        for model_section in self.pretraining_params:
-            if (type(self.learning_rate) == dict and self.learning_rate["pretraining_lr"]):
-                params.append({'params': model_section.parameters(), 'lr': self.learning_rate["pretraining_lr"]})
-            else:
-                params.append({'params': model_section.parameters(), 'lr': 0.00001})
-        for model_section in self.model_params:
-            if (type(self.learning_rate) == dict and self.learning_rate["model_lr"]):
-                params.append({'params': model_section.parameters(), 'lr': self.learning_rate["model_lr"]})
-            else:
-                params.append({'params': model_section.parameters(), 'lr': 0.0001})
-        return params
-class MultiModel(nn.Module):
-    def __init__(self, pretraining_path, pretraining_model, sample_graph, sample_global, hid_size, out_size, n_layers, n_proc_steps, dropout=0, frozen_pretraining=True, learning_rate=None, **kwargs):
-        super().__init__()
-        print(f'Unused args while creating GCN: {kwargs}')
-        self.n_layers = n_layers
-        self.n_proc_steps = n_proc_steps
-        self.layers = nn.ModuleList()
-        self.has_global = sample_global.shape[1] != 0
-        gl_size = sample_global.shape[1] if self.has_global else 1
-        self.learning_rate = learning_rate
-        input_size = 0
-        self.model_params = []
-        self.pretraining_params = []
-        self.pretrained_models = []
-        for model, path in zip(pretraining_model, pretraining_path):
-            input_size += model['args']['hid_size']
-            model = utils.buildFromConfig(model, {'sample_graph': sample_graph, 'sample_global': sample_global})
-            checkpoint = torch.load(path)['model_state_dict']
-            new_state_dict = {}
-            for k, v in checkpoint.items():
-                new_key = k.replace('module.', '')
-                new_state_dict[new_key] = v
-            model.load_state_dict(new_state_dict)
-            pretrained_layers = list(model.children())
-            pretrained_layers = pretrained_layers[:-1]
-            model = nn.Sequential(*pretrained_layers)
-            # Freeze Weights
-            print(f"Freeze Pretraining = {frozen_pretraining}")
-            if (frozen_pretraining):
-                for param in model.parameters():
-                    param.requires_grad = False  # Freeze all layers
-            self.pretraining_params.append(model)
-            self.pretrained_models.append(model)
-        print(f"len(pretrained_models) = {len(self.pretrained_models)}")
-        print(f"input size = {input_size}")
-        #encoder
-        self.node_encoder = Make_MLP(sample_graph.ndata['features'].shape[1], hid_size, hid_size, n_layers, dropout=dropout)
-        self.edge_encoder = Make_MLP(sample_graph.edata['features'].shape[1], hid_size, hid_size, n_layers, dropout=dropout)
-        self.global_encoder = Make_MLP(gl_size, hid_size, hid_size, n_layers, dropout=dropout)
-        #GNN
-        self.node_update = Make_MLP(3*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.edge_update = Make_MLP(4*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.global_update = Make_MLP(3*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.final_mlp = Make_MLP(input_size + hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.classify = nn.Linear(hid_size, out_size)
-        self.model_params.append(self.final_mlp)
-        self.model_params.append(self.classify)
-    def TL_node_encoder(self, x, model_idx):
-        try:
-            for layer in self.pretrained_models[model_idx][1]:
-                x = layer(x)
-            return x
-        except (NotImplementedError, IndexError):
-            for layer in self.pretrained_models[model_idx][1][1]:
-                x = layer(x)
-            return x
-    def TL_edge_encoder(self, x, model_idx):
-        try:
-            for layer in self.pretrained_models[model_idx][2]:
-                x = layer(x)
-            return x
-        except (NotImplementedError, IndexError):
-            for layer in self.pretrained_models[model_idx][1][2]:
-                x = layer(x)
-            return x
-    def TL_global_encoder(self, x, model_idx):
-        try:
-            for layer in self.pretrained_models[model_idx][3]:
-                x = layer(x)
-            return x
-        except (NotImplementedError, IndexError):
-            for layer in self.pretrained_models[model_idx][1][3]:
-                x = layer(x)
-            return x
-    def TL_node_update(self, x, model_idx):
-        try:
-            for layer in self.pretrained_models[model_idx][4]:
-                x = layer(x)
-            return x
-        except (NotImplementedError, IndexError):
-            for layer in self.pretrained_models[model_idx][1][4]:
-                x = layer(x)
-            return x
-    def TL_edge_update(self, x, model_idx):
-        try:
-            for layer in self.pretrained_models[model_idx][5]:
-                x = layer(x)
-            return x
-        except (NotImplementedError, IndexError):
-            for layer in self.pretrained_models[model_idx][1][5]:
-                x = layer(x)
-            return x
-    def TL_global_update(self, x, model_idx):
-        try:
-            for layer in self.pretrained_models[model_idx][6]:
-                x = layer(x)
-            return x
-        except (NotImplementedError, IndexError):
-            for layer in self.pretrained_models[model_idx][1][6]:
-                x = layer(x)
-            return x
-    def TL_global_decoder(self, x, model_idx):
-        try:
-            for layer in self.pretrained_models[model_idx][7]:
-                x = layer(x)
-            return x
-        except (NotImplementedError, IndexError):
-            for layer in self.pretrained_models[model_idx][1][7]:
-                x = layer(x)
-            return x
-    def Pretrained_Output(self, g, model_idx):
-        h = self.TL_node_encoder(g.ndata['features'], model_idx)
-        e = self.TL_edge_encoder(g.edata['features'], model_idx)
-        g.ndata['h'] = h
-        g.edata['e'] = e
-        if not self.has_global:
-            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
-        h_global = self.TL_global_encoder(global_feats, model_idx)
-        for i in range(self.n_proc_steps):
-            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
-            g.apply_edges(copy_v)
-            g.edata['e'] = self.TL_edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1), model_idx)
-            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
-            g.ndata['h'] = self.TL_node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1), model_idx)
-            h_global = self.TL_global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1), model_idx)
-        # h_global = self.TL_global_decoder(h_global, model_idx)
-        return h_global
-    def forward(self, g, global_feats):
-        h = self.node_encoder(g.ndata['features'])
-        e = self.edge_encoder(g.edata['features'])
-        g.ndata['h'] = h
-        g.edata['e'] = e
-        if not self.has_global:
-            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
-        h_global = self.global_encoder(global_feats)
-        for i in range(self.n_proc_steps):
-            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
-            g.apply_edges(copy_v)
-            g.edata['e'] = self.edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1))
-            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
-            g.ndata['h'] = self.node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1))
-            h_global = self.global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1))
-        h_global = [h_global]
-        for i in range(len(self.pretrained_models)):
-            h_global.append(self.Pretrained_Output(g.clone(), i))
-        h_global = torch.concatenate(h_global, dim=1)
-        return self.classify(self.final_mlp(h_global))
-    def to(self, device):
-        for i in range(len(self.pretrained_models)):
-            self.pretrained_models[i].to(device)
-        self.classify.to(device)
-        self.final_mlp.to(device)
-        self.node_encoder.to(device)
-        self.edge_encoder.to(device)
-        self.global_encoder.to(device)
-        self.node_update.to(device)
-        self.edge_update.to(device)
-        self.global_update.to(device)
-        return self
-    def parameters(self, recurse: bool = True):
-        params = []
-        for i, model_section in enumerate(self.pretraining_params):
-            if (type(self.learning_rate) == dict and self.learning_rate["pretraining_lr"]):
-                print(f"Pretraining LR = {self.learning_rate['pretraining_lr'][i]}")
-                params.append({'params': model_section.parameters(), 'lr': self.learning_rate["pretraining_lr"][i]})
-            else:
-                print(f"Pretraining LR = 0.00001")
-                params.append({'params': model_section.parameters(), 'lr': 0.00001})
-        for model_section in self.model_params:
-            if (type(self.learning_rate) == dict and self.learning_rate["model_lr"]):
-                print(f"Model LR = {self.learning_rate['model_lr']}")
-                params.append({'params': model_section.parameters(), 'lr': self.learning_rate["model_lr"]})
-            else:
-                print(f"Model LR = 0.0001")
-                params.append({'params': model_section.parameters(), 'lr': 0.0001})
-        return params
-class Clustering(nn.Module):
-    def __init__(self, sample_graph, sample_global, hid_size, out_size, n_layers, n_proc_steps, dropout=0, **kwargs):
-        super().__init__()
-        print(f'Unused args while creating GCN: {kwargs}')
-        self.n_layers = n_layers
-        self.n_proc_steps = n_proc_steps
-        self.layers = nn.ModuleList()
-        if (len(sample_global) == 0):
-            self.has_global = False
-        else:
-            self.has_global = sample_global.shape[1] != 0
-        gl_size = sample_global.shape[1] if self.has_global else 1
-        #encoder
-        self.node_encoder = Make_MLP(sample_graph.ndata['features'].shape[1], hid_size, hid_size, n_layers, dropout=dropout)
-        self.edge_encoder = Make_MLP(sample_graph.edata['features'].shape[1], hid_size, hid_size, n_layers, dropout=dropout)
-        self.global_encoder = Make_MLP(gl_size, hid_size, hid_size, n_layers, dropout=dropout)
-        #GNN
-        self.node_update = Make_MLP(3*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.edge_update = Make_MLP(4*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        self.global_update = Make_MLP(3*hid_size, hid_size, hid_size, n_layers, dropout=dropout)
-        #decoder
-        self.global_decoder = Make_MLP(hid_size, hid_size, out_size, n_layers, dropout=dropout)
-    def model_forward(self, g, global_feats, features = 'features'):
-        h = self.node_encoder(g.ndata[features])
-        e = self.edge_encoder(g.edata[features])
-        g.ndata['h'] = h
-        g.edata['e'] = e
-        if not self.has_global:
-            global_feats = g.batch_num_nodes()[:, None].to(torch.float)
-        batch_num_nodes = None
-        sum_weights = None
-        if "w" in g.ndata:
-            batch_indices = g.batch_num_nodes()
-            # Find non-zero rows (non-padded nodes)
-            non_padded_nodes_mask = torch.any(g.ndata[features] != 0, dim=1)
-            # Split the mask according to the batch indices
-            batch_num_nodes = []
-            start_idx = 0
-            for num_nodes in batch_indices:
-                end_idx = start_idx + num_nodes
-                non_padded_count = non_padded_nodes_mask[start_idx:end_idx].sum().item()
-                batch_num_nodes.append(non_padded_count)
-                start_idx = end_idx
-            batch_num_nodes = torch.tensor(batch_num_nodes, device = g.ndata[features].device)
-            sum_weights = batch_num_nodes[:, None].repeat(1, 64)
-            global_feats = batch_num_nodes[:, None].to(torch.float)
-        h_global = self.global_encoder(global_feats)
-        for i in range(self.n_proc_steps):
-            g.apply_edges(dgl.function.copy_u('h', 'm_u'))
-            g.apply_edges(copy_v)
-            g.edata['e'] = self.edge_update(torch.cat((g.edata['e'], g.edata['m_u'], g.edata['m_v'], broadcast_global_to_edges(g, h_global)), dim = 1))
-            g.update_all(dgl.function.copy_e('e', 'm'), dgl.function.sum('m', 'h_e'))
-            g.ndata['h'] = self.node_update(torch.cat((g.ndata['h'], g.ndata['h_e'], broadcast_global_to_nodes(g, h_global)), dim = 1))
-            if "w" in g.ndata:
-                mean_nodes = dgl.sum_nodes(g, 'h', 'w') / sum_weights
-                h_global = self.global_update(torch.cat((h_global, mean_nodes, dgl.mean_edges(g, 'e')), dim = 1))
-            else:
-                h_global = self.global_update(torch.cat((h_global, dgl.mean_nodes(g, 'h'), dgl.mean_edges(g, 'e')), dim = 1))
-        h_global = self.global_decoder(h_global)
-        return h_global
-    def forward(self, g, global_feats):
-        h_global = self.model_forward(g, global_feats, 'features')
-        h_global_augmented = self.model_forward(g, global_feats, 'augmented_features')
-        return torch.cat((h_global, h_global_augmented), dim=1)
-    def representation(self, g, global_feats):
-        h_global = self.model_forward(g, global_feats, 'features')
-        h_global_augmented = self.model_forward(g, global_feats, 'augmented_features')
-        return h_global, h_global_augmented, torch.cat((h_global, h_global_augmented), dim=1)
-    def __str__(self):
-        layer_names = ["node_encoder", "edge_encoder", "global_encoder",
-                  "node_update", "edge_update", "global_update", "global_decoder"]
-        layers = [self.node_encoder, self.edge_encoder, self.global_encoder,
-                  self.node_update, self.edge_update, self.global_update, self.global_decoder]
-        for i in range(len(layers)):
-            print(layer_names[i])
-            for layer in layers[i].children():
-                if isinstance(layer, nn.Linear):
-                    print(layer.state_dict())
-        print("classify")
-        print(self.classify.weight)
-        return ""

models/__pycache__/GCN.cpython-38.pyc DELETED Viewed

Binary file (57 kB)

models/__pycache__/loss.cpython-38.pyc DELETED Viewed

Binary file (11.4 kB)

models/loss.py DELETED Viewed

@@ -1,311 +0,0 @@
-from torch import nn
-import torch
-from root_gnn_base import utils
-import numpy as np
-class MaskedLoss():
-    def __init__(self, mask = []):
-        self.mask = mask
-    def make_mask(self, targets):
-        mask = torch.ones_like(targets[:,0])
-        for m in self.mask:
-            if m['op'] == 'eq':
-                mask[targets[:,m['idx']] == m['val']] = 0
-            elif m['op'] == 'gt':
-                mask[targets[:,m['idx']] > m['val']] = 0
-            elif m['op'] == 'lt':
-                mask[targets[:,m['idx']] < m['val']] = 0
-            elif m['op'] == 'ge':
-                mask[targets[:,m['idx']] >= m['val']] = 0
-            elif m['op'] == 'le':
-                mask[targets[:,m['idx']] <= m['val']] = 0
-            elif m['op'] == 'ne':
-                mask[targets[:,m['idx']] != m['val']] = 0
-            else:
-                raise ValueError(f'Unknown mask op {m["op"]}')
-        return mask == 1
-class MaskedL1Loss(MaskedLoss):
-    def __init__(self, mask = [], index = 0):
-        super().__init__(mask)
-        self.index = index
-        self.loss = nn.L1Loss()
-    def __call__(self, logits, targets):
-        mask = self.make_mask(targets)
-        return self.loss(logits[mask], targets[mask][:,self.index])
-class BCEWithLogitsLoss():
-    def __init__(self, weight=None, reduction='mean'):
-        self.loss = nn.BCEWithLogitsLoss(weight=weight, reduction=reduction)
-    def __call__(self, logits, targets):
-        return self.loss(logits[:,0], targets.float())
-class MultiScore():
-    def __init__(self, scores):
-        self. score_fcns = []
-        self.start_idx = []
-        self.end_idx = []
-        for score in scores:
-            self.score_fcns.append(utils.buildFromConfig(score))
-            self.start_idx.append(score['start_idx'])
-            self.end_idx.append(score['end_idx'])
-    def __call__(self, last_layer):
-        scores = []
-        for i in range(len(self.score_fcns)):
-            scores.append(self.score_fcns[i](last_layer[:, self.start_idx[i]:self.end_idx[i]]))
-        return torch.cat(scores, dim=1)
-class MultiLoss():
-    def __init__(self, losses):
-        self.loss_fcns = []
-        self.label_start_idx = []
-        self.label_end_idx = []
-        self.output_start_idx = []
-        self.output_end_idx = []
-        self.weights = []
-        self.label_types = []
-        for loss in losses:
-            self.loss_fcns.append(utils.buildFromConfig(loss))
-            self.label_start_idx.append(loss['label_start_idx'])
-            self.label_end_idx.append(loss['label_end_idx'])
-            self.output_start_idx.append(loss['output_start_idx'])
-            self.output_end_idx.append(loss['output_end_idx'])
-            self.weights.append(loss.get('weight', 1.0))
-            self.label_types.append(loss.get('label_type', 'float'))
-    def __call__(self, logits, targets):
-        loss = 0
-        # print(logits.shape, targets.shape)
-        for i in range(len(self.loss_fcns)):
-            if self.label_types[i] == 'int':
-                # print('loss', i, self.label_start_idx[i], self.label_end_idx[i], self.output_start_idx[i], self.output_end_idx[i])
-                # print(logits[:, self.output_start_idx[i]:self.output_end_idx[i]].shape, targets[:, self.label_start_idx[i]].shape)
-                loss += self.weights[i] * self.loss_fcns[i](logits[:, self.output_start_idx[i]:self.output_end_idx[i]], targets[:, self.label_start_idx[i]].to(int))
-            elif self.label_end_idx[i] - self.label_start_idx[i] == 1:
-                loss += self.weights[i] * self.loss_fcns[i](logits[:, self.output_start_idx[i]:self.output_end_idx[i]], targets[:, self.label_start_idx[i]])
-            else:
-                # print('loos', i, self.label_start_idx[i], self.label_end_idx[i], self.output_start_idx[i], self.output_end_idx[i])
-                # print(logits[:, self.output_start_idx[i]:self.output_end_idx[i]].shape, targets[:, self.label_start_idx[i]:self.label_end_idx[i]].shape)
-                loss += self.weights[i] * self.loss_fcns[i](logits[:, self.output_start_idx[i]:self.output_end_idx[i]], targets[:, self.label_start_idx[i]:self.label_end_idx[i]])
-        return loss
-class AdvLoss():
-    def __init__(self, loss, adv_loss, adv_weight=1.0):
-        self.loss_fcn = utils.buildFromConfig(loss)
-        self.adv_loss_fcn = utils.buildFromConfig(adv_loss)
-        self.adv_weight = adv_weight
-    def __call__(self, logits, targets):
-        mask = targets[:,0] == 0
-        loss = self.loss_fcn(logits[:,0], targets[:,0])
-        adv_loss = self.adv_loss_fcn(logits[mask][:,1], targets[mask])
-        return loss - self.adv_weight * adv_loss
-class MassWindowAdvLoss(AdvLoss):
-    def __call__(self, logits, targets):
-        mask = (targets[:,0] == 0) & (targets[:,1] > 5) & (targets[:,1] < 25)
-        print(mask, mask.shape, mask.sum())
-        loss = self.loss_fcn(logits[:,0], targets[:,0])
-        print(loss)
-        adv_loss = self.adv_loss_fcn(logits[mask][:,1], targets[mask][:,1])
-        print(adv_loss)
-        return loss - self.adv_weight * adv_loss
-class KDELoss(MaskedLoss):
-    def __init__(self, mask = [], index = 0):
-        self.index = index
-        super().__init__(mask)
-    def __call__(self, logits, targets):
-        mask = self.make_mask(targets)
-        logits = logits[mask]
-        targets = targets[mask][:,self.index]
-        N = logits.shape[0]
-        masses = targets / torch.sqrt(torch.mean(targets**2))
-        scores = logits[:,0]  / torch.sqrt(torch.mean(logits**2))
-        factor_2d = (1.0*N) ** (-2/6)
-        covs = (factor_2d * torch.var(masses), factor_2d * torch.var(scores))
-        m_diffs = torch.unsqueeze(masses, 1) - torch.unsqueeze(masses, 0)
-        s_diffs = torch.unsqueeze(scores, 1) - torch.unsqueeze(scores, 0)
-        ymm = torch.exp(- (m_diffs**2) / (4 * covs[0]))
-        yss = torch.exp(- (s_diffs**2) / (4 * covs[1]))
-        integral_rho_2d_rho_2d = torch.einsum('ij,ij->', ymm, yss)
-        integral_rho_1d_rho_1d = torch.einsum('ij,kl->', ymm, yss)
-        integral_rho_2d_rho_1d = torch.einsum('ij,ik->', ymm, yss)
-        raw_integral = integral_rho_2d_rho_2d - 2 * integral_rho_2d_rho_1d / N + integral_rho_1d_rho_1d / N**2
-        return raw_integral / (4 * torch.pi * N**2)
-class MultiLabelLoss():
-    def __init__(self, label_names, label_types, label_weights = None):
-        self.loss_fcn = []
-        if (label_weights):
-            self.weights = torch.tensor(label_weights)
-        else:
-            self.weights = torch.ones(len(label_types))
-        for type in label_types:
-            if (type == "r"):
-                self.loss_fcn.append(torch.nn.MSELoss(reduce=False))
-            elif (type == "c"):
-                self.loss_fcn.append(torch.nn.BCEWithLogitsLoss())
-        print(f"self.weights = {self.weights}")
-    def __call__(self, logits, targets):
-        targets = targets.float()
-        loss = torch.zeros(len(logits[:, 0]), device = logits.get_device())
-        for i in range(len(self.loss_fcn)):
-            loss += self.weights[i] * self.loss_fcn[i](logits[:, i], targets[:, i])
-        return torch.mean(loss)
-class MultiLabelFinish():
-    def __init__(self, label_names, label_types):
-        self.finish_fcn = []
-        for type in label_types:
-            if (type == "r"):
-                self.finish_fcn.append(None)
-            elif (type == "c"):
-                self.finish_fcn.append(torch.special.expit)
-    def __call__(self, logits):
-        for i in range(len(self.finish_fcn)):
-            if (self.finish_fcn[i]):
-                logits[:, i] = self.finish_fcn[i](logits[:, i].to(torch.long))
-        return logits
-class ContrastiveClusterLoss():
-    def __init__(self, k=10, temperature=1, alpha=1):
-        self.k = k
-        self.temperature = temperature
-        self.alpha = alpha
-    def __call__(self, logits, targets):
-        targets = targets.float()
-        logits_combined = logits.float()
-        hid_size = int(len(logits[0]) / 2)
-        logits = normalize_embeddings(logits_combined[:, :hid_size])
-        logits_augmented = normalize_embeddings(logits_combined[:, hid_size:])
-        contrastive = contrastive_loss(logits, logits_augmented, self.temperature)
-        clustering, _ = clustering_loss(logits, self.k)
-        variance_loss = variance_regularization(logits) + variance_regularization(logits_augmented)
-        return torch.mean(contrastive + clustering + self.alpha * variance_loss)
-class ContrastiveClusterFinish():
-    def __init__(self, k = 10, temperature = 1, max_cluster_iterations = 10):
-        self.k = k
-        self.temperature = temperature
-        self.max_cluster_iterations = max_cluster_iterations
-        print(f"ContrastiveClusterFinish: k = {k}, temperature = {temperature}")
-    def __call__(self, logits):
-        logits_combined = logits.float()
-        hid_size = int(len(logits[0]) / 2)
-        logits = logits_combined[:, :hid_size]
-        logits_augmented = logits_combined[:, hid_size:]
-        contrastive = contrastive_loss(logits, logits_augmented, self.temperature)
-        clustering, _ = clustering_loss(logits, self.k, self.max_cluster_iterations)
-        variance = variance_regularization(logits) + variance_regularization(logits_augmented)
-        return contrastive, clustering, variance
-def s(z_i, z_j):
-    z_i = torch.tensor(z_i) if not isinstance(z_i, torch.Tensor) else z_i
-    z_j = torch.tensor(z_j) if not isinstance(z_j, torch.Tensor) else z_j
-    return torch.cdist(z_i, z_j, p=2)
-    # dot_product = torch.dot(z_i, z_j)
-    # norm_i = torch.linalg.norm(z_i)
-    # norm_j = torch.linalg.norm(z_j)
-    # return dot_product / (norm_i * norm_j)
-def contrastive_loss(logits, logits_augmented, temperature=1, margin=1.0):
-    logits = torch.tensor(logits) if not isinstance(logits, torch.Tensor) else logits
-    logits_augmented = torch.tensor(logits_augmented) if not isinstance(logits_augmented, torch.Tensor) else logits_augmented
-    z = torch.cat((logits, logits_augmented), dim=0)
-    similarity_matrix = torch.mm(z, z.t()) / temperature
-    norms = torch.linalg.norm(z, dim=1)
-    norm_matrix = torch.ger(norms, norms)
-    similarity_matrix = similarity_matrix / norm_matrix
-    mask = torch.eye(similarity_matrix.size(0), dtype=torch.bool)
-    loss = 0
-    for k in range(len(logits)):
-        numerator = torch.exp(similarity_matrix[k, k + len(logits)])
-        denominator = torch.sum(torch.exp(similarity_matrix[k, ~mask[k]]))
-        loss += -torch.log(numerator / denominator)
-    return loss
-def clustering_loss(logits, k=10, max_iterations=10):
-    # Step 1: Initialize cluster means
-    indices = torch.randperm(logits.size(0))[:k]
-    cluster_means = logits[indices]
-    prev_assignments = None
-    assignment_history = []
-    iteration = 0
-    while iteration < max_iterations:
-        iteration += 1
-        # Step 2: Assign each data point to the nearest cluster mean
-        distances = torch.cdist(logits, cluster_means, p=2)  # Compute distances between logits and cluster means
-        cluster_assignments = torch.argmin(distances, dim=1)  # Assign each point to the nearest cluster mean
-        # Check for convergence: if assignments do not change, break the loop
-        if prev_assignments is not None and torch.equal(cluster_assignments, prev_assignments):
-            break
-        # Check for cycles: if assignments have been seen before, break the loop
-        if any(torch.equal(cluster_assignments, prev) for prev in assignment_history):
-            break
-        assignment_history.append(cluster_assignments.clone())
-        prev_assignments = cluster_assignments.clone()
-        # Step 3: Update cluster means based on assignments
-        new_cluster_means = torch.zeros_like(cluster_means)
-        for i in range(k):
-            assigned_points = logits[cluster_assignments == i]
-            if assigned_points.size(0) > 0:
-                new_cluster_means[i] = assigned_points.mean(dim=0)
-            else:
-                # If no points are assigned to the cluster, reinitialize the mean randomly
-                new_cluster_means[i] = logits[torch.randint(0, logits.size(0), (1,)).item()]
-        cluster_means = new_cluster_means
-    # Step 4: Compute the clustering loss
-    distances = torch.cdist(logits, cluster_means, p=2)
-    min_distances = torch.min(distances, dim=1)[0]
-    loss = torch.sum(min_distances ** 2)
-    return loss, cluster_means
-def normalize_embeddings(embeddings):
-    return embeddings / embeddings.norm(dim=1, keepdim=True)
-def variance_regularization(embeddings):
-    mean_embedding = embeddings.mean(dim=0)
-    variance = ((embeddings - mean_embedding) ** 2).mean()
-    return variance

root_gnn_base/batched_dataset.py DELETED Viewed

@@ -1,190 +0,0 @@
-from dgl.dataloading import GraphDataLoader
-from torch.utils.data.sampler import SubsetRandomSampler
-from torch.utils.data.sampler import SequentialSampler
-from dgl.data import DGLDataset
-import torch
-import time
-import os
-import dgl
-from root_gnn_base import utils
-def GetBatchedLoader(dataset, batch_size, mask_fn = None, drop_last=True, **kwargs):
-    if mask_fn == None:
-        mask_fn = lambda x: torch.ones(len(x), dtype=torch.bool)
-    dloader = GraphDataLoader(dataset, sampler=SubsetRandomSampler(torch.arange(len(dataset))[mask_fn(dataset)]), batch_size=batch_size, drop_last=drop_last, num_workers = 0)
-    return dloader
-#Dataset which contains prebatched shuffled graphs. Cannot be saved to disk, else batching info is lost.
-class PreBatchedDataset(DGLDataset):
-    def __init__(self, start_dataset, batch_size, mask_fn = None, drop_last=True, save_to_disk = True, suffix = '', chunks = 1, chunkno = -1, shuffle = True, padding_mode = 'NONE', **kwargs):
-        print(f'Unused kwargs: {kwargs}')
-        self.start_dataset = start_dataset
-        self.start_dataset.load()
-        self.batch_size = batch_size
-        self.chunks = chunks
-        self.chunkno = chunkno
-        self.mask_fn = mask_fn
-        self.drop_last = drop_last
-        self.graphs = []
-        self.label = []
-        self.padding_mode = padding_mode
-        self.save_to_disk = save_to_disk
-        self.shuffle = shuffle
-        self.suffix = suffix
-        self.current_chunk = None
-        self.current_chunk_idx = -1
-        super().__init__(name = start_dataset.name + '_prebatched_padded', save_dir=start_dataset.save_dir)
-    def process(self):
-        first = 0
-        last = len(self.start_dataset)
-        if self.chunks > 1 and self.chunkno >= 0:
-            first = int(self.chunkno / self.chunks * len(self.start_dataset))
-            last = int((self.chunkno + 1) / self.chunks * len(self.start_dataset))
-        print(f'Processing chunk {self.chunkno} of {self.chunks} from {first} to {last} of {len(self.start_dataset)}')
-        mask = torch.logical_and(torch.logical_and(self.mask_fn(self.start_dataset), torch.arange(len(self.start_dataset)) >= first), torch.arange(len(self.start_dataset)) < last)
-        if self.shuffle:
-            dloader = GraphDataLoader(self.start_dataset, sampler=SubsetRandomSampler(torch.arange(len(self.start_dataset))[mask]), batch_size=self.batch_size, drop_last=self.drop_last)
-        else: #Only don't shuffle if we're doing inference. Then we want all of the events anyways?
-            dloader = GraphDataLoader(self.start_dataset, sampler=SequentialSampler(self.start_dataset), batch_size=self.batch_size, drop_last=self.drop_last)
-        self.graphs = []
-        self.labels = []
-        self.tracking = []
-        self.globals = []
-        self.batch_num_nodes = []
-        self.batch_num_edges = []
-        max_edges = 0
-        max_nodes = 0
-        load_batch_start = time.time()
-        for batch, label, tracking, global_feat in dloader:
-            if batch.num_edges() > max_edges:
-                max_edges = batch.num_edges()
-            if batch.num_nodes() > max_nodes:
-                max_nodes = batch.num_nodes()
-            self.graphs.append(batch)
-            self.labels.append(label)
-            self.tracking.append(tracking)
-            self.globals.append(global_feat)
-        load_batch_end = time.time()
-        print(f'Loaded {len(self.graphs)} batches in {load_batch_end - load_batch_start} seconds')
-        if self.padding_mode == 'STEPS':
-            pad_node, pad_edge = utils.pad_size(self.batch_size, max_edges, max_nodes)
-        elif self.padding_mode == 'FIXED':
-            print('Padding to fixed size. This is currently hardcoded.')
-            pad_node = 16000
-            pad_edge = 104000
-        elif self.padding_mode == 'NONE':
-            pad_node = 0
-            pad_edge = 0
-        else:
-            pad_node = 0
-            pad_edge = 0
-        print(f'Max edges: {max_edges}, Max nodes: {max_nodes}, Padding to {pad_edge} edges and {pad_node} nodes')
-        pad_start = time.time()
-        if self.padding_mode == 'NODE':
-            for i in range(len(self.graphs)):
-                unbatched_g = dgl.unbatch(self.graphs[i])
-                max_num_nodes = max(g.number_of_nodes() for g in unbatched_g)
-                self.graphs[i] = utils.pad_batch_num_nodes(self.graphs[i], max_num_nodes)
-                self.batch_num_nodes.append(self.graphs[i].batch_num_nodes())
-                self.batch_num_edges.append(self.graphs[i].batch_num_edges())
-        else:
-            for i in range(len(self.graphs)):
-                self.graphs[i] = utils.pad_batch(self.graphs[i], pad_edge, pad_node)
-                self.batch_num_nodes.append(self.graphs[i].batch_num_nodes())
-                self.batch_num_edges.append(self.graphs[i].batch_num_edges())
-        pad_end = time.time()
-        print(f'Padded {len(self.graphs)} batches in {pad_end - pad_start} seconds')
-    def save(self):
-        if not self.save_to_disk:
-            return
-        graph_path = os.path.join(self.save_dir, f'{self.name}_{self.chunkno}_{self.suffix}.bin')
-        print(f'Saving dataset to {graph_path}')
-        if len(self.graphs) == 0:
-            return
-        dgl.save_graphs(str(graph_path), self.graphs, {'labels': torch.stack(self.labels), 'batch_num_nodes': torch.stack(self.batch_num_nodes), 'batch_num_edges': torch.stack(self.batch_num_edges), 'tracking': torch.stack(self.tracking), 'globals': torch.stack(self.globals)})
-    def has_cache(self):
-        if not self.save_to_disk:
-            return False
-        for ch in range(self.chunks):
-            graph_path = os.path.join(self.save_dir, f'{self.name}_{ch}_{self.suffix}.bin')
-            if not os.path.exists(graph_path):
-                print(f'Cache file {graph_path} does not exist, not loading from cache.')
-                return False
-        return True
-    def load(self):
-        if not self.save_to_disk:
-            return
-        self.graphs = []
-        label_chunks = []
-        tracking_chunks = []
-        global_chunks = []
-        for ch in range(self.chunks):
-            graph_path = os.path.join(self.save_dir, f'{self.name}_{ch}_{self.suffix}.bin')
-            print(f'Loading dataset from {graph_path}')
-            graphs, label_dict = dgl.load_graphs(graph_path)
-            label_chunks.append(label_dict['labels'])
-            tracking_chunks.append(label_dict['tracking'])
-            global_chunks.append(label_dict['globals'])
-            for g, bnn, bne in zip(graphs, label_dict['batch_num_nodes'], label_dict['batch_num_edges']):
-                g.set_batch_num_nodes(bnn)
-                g.set_batch_num_edges(bne)
-            self.graphs.extend(graphs)
-        self.labels = torch.cat(label_chunks)
-        self.tracking = torch.cat(tracking_chunks)
-        self.globals = torch.cat(global_chunks)
-    def __getitem__(self, idx):
-        return self.graphs[idx], self.labels[idx], self.tracking[idx], self.globals[idx]
-    def __len__(self):
-        return len(self.graphs)
-#Dataset which contains prebatched shuffled graphs. Cannot be saved to disk, else batching info is lost.
-class LazyPreBatchedDataset(PreBatchedDataset):
-    def __init__(self, **kwargs):
-        # print(f'Unused kwargs: {kwargs}')
-        self.current_chunk = None
-        self.current_chunk_idx = -10
-        self.label_chunks = []
-        super().__init__(**kwargs)
-    def load(self):
-        if not self.save_to_disk:
-            return
-        self.label_chunks = []
-        for ch in range(self.chunks):
-            graph_path = os.path.join(self.save_dir, f'{self.name}_{ch}_{self.suffix}.bin')
-            print(f'Loading dataset from {graph_path}')
-            label_dict = dgl.data.graph_serialize.load_labels_v2(graph_path)
-            self.label_chunks.append(label_dict)
-    def __getitem__(self, idx):
-        chunk_idx = -1
-        sum = 0
-        ev_idx = -999
-        for i in range(len(self.label_chunks)):
-            count = len(self.label_chunks[i]['labels'])
-            if idx < sum + count:
-                chunk_idx = i
-                ev_idx = idx - sum
-                break
-            sum += count
-        if chunk_idx != self.current_chunk_idx:
-            # print(f"rank {self.rank} getting data from {self.name}_{chunk_idx}_{self.suffix}.bin")
-            self.current_chunk, _ = dgl.load_graphs(os.path.join(self.save_dir, f'{self.name}_{chunk_idx}_{self.suffix}.bin'))
-            self.current_chunk_idx = chunk_idx
-        g = self.current_chunk[ev_idx]
-        g.set_batch_num_nodes(self.label_chunks[chunk_idx]['batch_num_nodes'][ev_idx])
-        g.set_batch_num_edges(self.label_chunks[chunk_idx]['batch_num_edges'][ev_idx])
-        return g, self.label_chunks[chunk_idx]['labels'][ev_idx], self.label_chunks[chunk_idx]['tracking'][ev_idx], self.label_chunks[chunk_idx]['globals'][ev_idx]
-    def __len__(self):
-        l = 0
-        for chunk in self.label_chunks:
-            l += len(chunk['labels'])
-        return l

root_gnn_base/custom_scheduler.py DELETED Viewed

@@ -1,565 +0,0 @@
-import types
-import math
-import torch
-from torch import inf
-from functools import wraps, partial
-import warnings
-import weakref
-from collections import Counter
-from bisect import bisect_right
-from models import GCN
-### Code from: https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#ReduceLROnPlateau
-Optimizer = torch.optim.Optimizer
-__all__ = ['LambdaLR', 'MultiplicativeLR', 'StepLR', 'MultiStepLR', 'ConstantLR', 'LinearLR',
-           'ExponentialLR', 'SequentialLR', 'CosineAnnealingLR', 'ChainedScheduler', 'ReduceLROnPlateau',
-           'CyclicLR', 'CosineAnnealingWarmRestarts', 'OneCycleLR', 'PolynomialLR', 'LRScheduler']
-EPOCH_DEPRECATION_WARNING = (
-    "The epoch parameter in `scheduler.step()` was not necessary and is being "
-    "deprecated where possible. Please use `scheduler.step()` to step the "
-    "scheduler. During the deprecation, if epoch is different from None, the "
-    "closed form is used instead of the new chainable form, where available. "
-    "Please open an issue if you are unable to replicate your use case: "
-    "https://github.com/pytorch/pytorch/issues/new/choose."
-)
-def update_LR(opt, lr):
-    for param_group in opt.param_groups:
-        param_group['lr'] = lr
-def print_LR(opt):
-    for param_group in opt.param_groups:
-        print(f"LR = {param_group['lr']}")
-def _check_verbose_deprecated_warning(verbose):
-    """Raises a warning when verbose is not the default value."""
-    if verbose != "deprecated":
-        warnings.warn("The verbose parameter is deprecated. Please use get_last_lr() "
-                      "to access the learning rate.", UserWarning)
-        return verbose
-    return False
-class LRScheduler:
-    def __init__(self, optimizer, last_epoch=-1, verbose="deprecated"):
-        # Attach optimizer
-        if not isinstance(optimizer, Optimizer):
-            raise TypeError(f'{type(optimizer).__name__} is not an Optimizer')
-        self.optimizer = optimizer
-        # Initialize epoch and base learning rates
-        if last_epoch == -1:
-            for group in optimizer.param_groups:
-                group.setdefault('initial_lr', group['lr'])
-        else:
-            for i, group in enumerate(optimizer.param_groups):
-                if 'initial_lr' not in group:
-                    raise KeyError("param 'initial_lr' is not specified "
-                                   f"in param_groups[{i}] when resuming an optimizer")
-        self.base_lrs = [group['initial_lr'] for group in optimizer.param_groups]
-        self.last_epoch = last_epoch
-        # Following https://github.com/pytorch/pytorch/issues/20124
-        # We would like to ensure that `lr_scheduler.step()` is called after
-        # `optimizer.step()`
-        def with_counter(method):
-            if getattr(method, '_with_counter', False):
-                # `optimizer.step()` has already been replaced, return.
-                return method
-            # Keep a weak reference to the optimizer instance to prevent
-            # cyclic references.
-            instance_ref = weakref.ref(method.__self__)
-            # Get the unbound method for the same purpose.
-            func = method.__func__
-            cls = instance_ref().__class__
-            del method
-            @wraps(func)
-            def wrapper(*args, **kwargs):
-                instance = instance_ref()
-                instance._step_count += 1
-                wrapped = func.__get__(instance, cls)
-                return wrapped(*args, **kwargs)
-            # Note that the returned function here is no longer a bound method,
-            # so attributes like `__func__` and `__self__` no longer exist.
-            wrapper._with_counter = True
-            return wrapper
-        self.optimizer.step = with_counter(self.optimizer.step)
-        self.verbose = _check_verbose_deprecated_warning(verbose)
-        self._initial_step()
-    def _initial_step(self):
-        """Initialize step counts and performs a step"""
-        self.optimizer._step_count = 0
-        self._step_count = 0
-        self.step()
-    def state_dict(self):
-        """Returns the state of the scheduler as a :class:`dict`.
-        It contains an entry for every variable in self.__dict__ which
-        is not the optimizer.
-        """
-        return {key: value for key, value in self.__dict__.items() if key != 'optimizer'}
-    def load_state_dict(self, state_dict):
-        """Loads the schedulers state.
-        Args:
-            state_dict (dict): scheduler state. Should be an object returned
-                from a call to :meth:`state_dict`.
-        """
-        self.__dict__.update(state_dict)
-    def get_last_lr(self):
-        """ Return last computed learning rate by current scheduler.
-        """
-        return self._last_lr
-    def get_lr(self):
-        # Compute learning rate using chainable form of the scheduler
-        raise NotImplementedError
-    def print_lr(self, is_verbose, group, lr, epoch=None):
-        """Display the current learning rate.
-        """
-        if is_verbose:
-            if epoch is None:
-                print(f'Adjusting learning rate of group {group} to {lr:.4e}.')
-            else:
-                epoch_str = ("%.2f" if isinstance(epoch, float) else
-                             "%.5d") % epoch
-                print(f'Epoch {epoch_str}: adjusting learning rate of group {group} to {lr:.4e}.')
-    def step(self, epoch=None):
-        # Raise a warning if old pattern is detected
-        # https://github.com/pytorch/pytorch/issues/20124
-        if self._step_count == 1:
-            if not hasattr(self.optimizer.step, "_with_counter"):
-                warnings.warn("Seems like `optimizer.step()` has been overridden after learning rate scheduler "
-                              "initialization. Please, make sure to call `optimizer.step()` before "
-                              "`lr_scheduler.step()`. See more details at "
-                              "https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate", UserWarning)
-            # Just check if there were two first lr_scheduler.step() calls before optimizer.step()
-            elif self.optimizer._step_count < 1:
-                warnings.warn("Detected call of `lr_scheduler.step()` before `optimizer.step()`. "
-                              "In PyTorch 1.1.0 and later, you should call them in the opposite order: "
-                              "`optimizer.step()` before `lr_scheduler.step()`.  Failure to do this "
-                              "will result in PyTorch skipping the first value of the learning rate schedule. "
-                              "See more details at "
-                              "https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate", UserWarning)
-        self._step_count += 1
-        with _enable_get_lr_call(self):
-            if epoch is None:
-                self.last_epoch += 1
-                values = self.get_lr()
-            else:
-                warnings.warn(EPOCH_DEPRECATION_WARNING, UserWarning)
-                self.last_epoch = epoch
-                if hasattr(self, "_get_closed_form_lr"):
-                    values = self._get_closed_form_lr()
-                else:
-                    values = self.get_lr()
-        for i, data in enumerate(zip(self.optimizer.param_groups, values)):
-            param_group, lr = data
-            param_group['lr'] = lr
-        self._last_lr = [group['lr'] for group in self.optimizer.param_groups]
-# Including _LRScheduler for backwards compatibility
-# Subclass instead of assign because we want __name__ of _LRScheduler to be _LRScheduler (assigning would make it LRScheduler).
-class _LRScheduler(LRScheduler):
-    pass
-class _enable_get_lr_call:
-    def __init__(self, o):
-        self.o = o
-    def __enter__(self):
-        self.o._get_lr_called_within_step = True
-        return self
-    def __exit__(self, type, value, traceback):
-        self.o._get_lr_called_within_step = False
-class Dynamic_LR(LRScheduler):
-    """Reduce learning rate when a metric has stopped improving.
-    Models often benefit from reducing the learning rate by a factor
-    of 2-10 once learning stagnates. This scheduler reads a metrics
-    quantity and if no improvement is seen for a 'patience' number
-    of epochs, the learning rate is reduced.
-    Args:
-        optimizer (Optimizer): Wrapped optimizer.
-        mode (str): One of `min`, `max`. In `min` mode, lr will
-            be reduced when the quantity monitored has stopped
-            decreasing; in `max` mode it will be reduced when the
-            quantity monitored has stopped increasing. Default: 'min'.
-        factor (float): Factor by which the learning rate will be
-            reduced. new_lr = lr * factor. Default: 0.1.
-        patience (int): Number of epochs with no improvement after
-            which learning rate will be reduced. For example, if
-            `patience = 2`, then we will ignore the first 2 epochs
-            with no improvement, and will only decrease the LR after the
-            3rd epoch if the loss still hasn't improved then.
-            Default: 10.
-        threshold (float): Threshold for measuring the new optimum,
-            to only focus on significant changes. Default: 1e-4.
-        threshold_mode (str): One of `rel`, `abs`. In `rel` mode,
-            dynamic_threshold = best * ( 1 + threshold ) in 'max'
-            mode or best * ( 1 - threshold ) in `min` mode.
-            In `abs` mode, dynamic_threshold = best + threshold in
-            `max` mode or best - threshold in `min` mode. Default: 'rel'.
-        cooldown (int): Number of epochs to wait before resuming
-            normal operation after lr has been reduced. Default: 0.
-        min_lr (float or list): A scalar or a list of scalars. A
-            lower bound on the learning rate of all param groups
-            or each group respectively. Default: 0.
-        eps (float): Minimal decay applied to lr. If the difference
-            between new and old lr is smaller than eps, the update is
-            ignored. Default: 1e-8.
-        verbose (bool): If ``True``, prints a message to stdout for
-            each update. Default: ``False``.
-            .. deprecated:: 2.2
-                ``verbose`` is deprecated. Please use ``get_last_lr()`` to access the
-                learning rate.
-    Example:
-        >>> # xdoctest: +SKIP
-        >>> optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
-        >>> scheduler = ReduceLROnPlateau(optimizer, 'min')
-        >>> for epoch in range(10):
-        >>>     train(...)
-        >>>     val_loss = validate(...)
-        >>>     # Note that step should be called after validate()
-        >>>     scheduler.step(val_loss)
-    """
-    def __init__(self, optimizer, mode = 'max', factor=0.1, patience=10,
-                 plateau_var = "test_auc",
-                 threshold=1e-4, threshold_mode='rel', cooldown=0,
-                 min_lr=0, max_lr=1e-4, eps=1e-8, verbose=False):
-        """
-        if factor >= 1.0:
-            raise ValueError('Factor should be < 1.0.')
-        """
-        self.factor = factor
-        # Attach optimizer
-        if not isinstance(optimizer, Optimizer):
-            raise TypeError(f'{type(optimizer).__name__} is not an Optimizer')
-        self.optimizer = optimizer
-        if isinstance(min_lr, (list, tuple)):
-            if len(min_lr) != len(optimizer.param_groups):
-                raise ValueError(f"expected {len(optimizer.param_groups)} min_lrs, got {len(min_lr)}")
-            self.min_lrs = list(min_lr)
-            self.max_lrs = list(max_lr)
-        else:
-            self.min_lrs = [min_lr] * len(optimizer.param_groups)
-            self.max_lrs = [max_lr] * len(optimizer.param_groups)
-        self.patience = patience
-        self.plateau_var = plateau_var
-        self.verbose = verbose
-        self.cooldown = cooldown
-        self.cooldown_counter = 0
-        self.mode = mode
-        self.threshold = threshold
-        self.threshold_mode = threshold_mode
-        self.best = None
-        self.num_bad_epochs = None
-        self.mode_worse = None  # the worse value for the chosen mode
-        self.eps = eps
-        self.last_epoch = 0
-        self._last_lr = [group['lr'] for group in self.optimizer.param_groups]
-        self._init_is_better(mode=mode, threshold=threshold,
-                             threshold_mode=threshold_mode)
-        self._reset()
-    def _reset(self):
-        """Resets num_bad_epochs counter and cooldown counter."""
-        self.best = self.mode_worse
-        self.cooldown_counter = 0
-        self.num_bad_epochs = 0
-    def step(self, model, metrics, epoch=None):
-        # convert `metrics` to float, in case it's a zero-dim Tensor
-        current = float(metrics[self.plateau_var])
-        if epoch is None:
-            epoch = self.last_epoch + 1
-        else:
-            warnings.warn(EPOCH_DEPRECATION_WARNING, UserWarning)
-        self.last_epoch = epoch
-        if self.is_better(current, self.best):
-            if(self.verbose):
-                print("Model is improving!")
-            self.best = current
-            self.num_bad_epochs = 0
-        else:
-            if(self.verbose):
-                print(f"Model is not improving :( best = {self.best}, current = {current}")
-            self.num_bad_epochs += 1
-        if self.in_cooldown:
-            self.cooldown_counter -= 1
-            self.num_bad_epochs = 0  # ignore any bad epochs in cooldown
-        if self.num_bad_epochs > self.patience:
-            self._reduce_lr(epoch)
-            self.cooldown_counter = self.cooldown
-            self.num_bad_epochs = 0
-        self._last_lr = [group['lr'] for group in self.optimizer.param_groups]
-    def _reduce_lr(self, epoch):
-        print("Adjusting Learning Rate")
-        self._reset()
-        for i, param_group in enumerate(self.optimizer.param_groups):
-            old_lr = float(param_group['lr'])
-            new_lr = max(old_lr * self.factor, self.min_lrs[i])
-            new_lr = min(new_lr, self.max_lrs[i])
-            if abs(old_lr - new_lr) > self.eps:
-                param_group['lr'] = new_lr
-    def get_last_lr(self):
-        return self._last_lr
-    @property
-    def in_cooldown(self):
-        return self.cooldown_counter > 0
-    def is_better(self, a, best):
-        if self.mode == 'min' and self.threshold_mode == 'rel':
-            rel_epsilon = 1. - self.threshold
-            return a < best * rel_epsilon
-        elif self.mode == 'min' and self.threshold_mode == 'abs':
-            return a < best - self.threshold
-        elif self.mode == 'max' and self.threshold_mode == 'rel':
-            rel_epsilon = self.threshold + 1.
-            return a > best * rel_epsilon
-        else:  # mode == 'max' and epsilon_mode == 'abs':
-            return a > best + self.threshold
-    def _init_is_better(self, mode, threshold, threshold_mode):
-        if mode not in {'min', 'max'}:
-            raise ValueError('mode ' + mode + ' is unknown!')
-        if threshold_mode not in {'rel', 'abs'}:
-            raise ValueError('threshold mode ' + threshold_mode + ' is unknown!')
-        if mode == 'min':
-            self.mode_worse = inf
-        else:  # mode == 'max':
-            self.mode_worse = -inf
-        self.mode = mode
-        self.threshold = threshold
-        self.threshold_mode = threshold_mode
-    def state_dict(self):
-        return {key: value for key, value in self.__dict__.items() if key != 'optimizer'}
-    def load_state_dict(self, state_dict):
-        self.__dict__.update(state_dict)
-        self._init_is_better(mode=self.mode, threshold=self.threshold, threshold_mode=self.threshold_mode)
-class Action_On_Plateau():
-    def __init__(self, mode = 'max', patience=10,
-                 plateau_var = "test_auc",
-                 threshold=1e-4, threshold_mode='rel', cooldown=0,
-                 eps=1e-8, verbose=False):
-        self.patience = patience
-        self.plateau_var = plateau_var
-        self.verbose = verbose
-        self.cooldown = cooldown
-        self.cooldown_counter = 0
-        self.mode = mode
-        self.threshold = threshold
-        self.threshold_mode = threshold_mode
-        self.best = None
-        self.num_bad_epochs = None
-        self.mode_worse = None  # the worse value for the chosen mode
-        self.eps = eps
-        self.last_epoch = 0
-        self._init_is_better(mode=mode, threshold=threshold,
-                             threshold_mode=threshold_mode)
-        self._reset()
-    def _reset(self):
-        """Resets num_bad_epochs counter and cooldown counter."""
-        self.best = self.mode_worse
-        self.cooldown_counter = 0
-        self.num_bad_epochs = 0
-    def step(self, model, metrics, epoch=None):
-        # convert `metrics` to float, in case it's a zero-dim Tensor
-        current = float(metrics[self.plateau_var])
-        if epoch is None:
-            epoch = self.last_epoch + 1
-        else:
-            warnings.warn(EPOCH_DEPRECATION_WARNING, UserWarning)
-        self.last_epoch = epoch
-        if self.is_better(current, self.best):
-            if(self.verbose):
-                print("Model is improving!")
-            self.best = current
-            self.num_bad_epochs = 0
-        else:
-            if(self.verbose):
-                print(f"Model is not improving :( best = {self.best}, current = {current}")
-            self.num_bad_epochs += 1
-        if self.in_cooldown:
-            self.cooldown_counter -= 1
-            self.num_bad_epochs = 0  # ignore any bad epochs in cooldown
-        if self.num_bad_epochs > self.patience:
-            self.action(model, metrics, epoch)
-    def action(self, model, metrics, epoch=None):
-        if(self.verbose):
-            print("Doing my action")
-    @property
-    def in_cooldown(self):
-        return self.cooldown_counter > 0
-    def is_better(self, a, best):
-        if self.mode == 'min' and self.threshold_mode == 'rel':
-            rel_epsilon = 1. - self.threshold
-            return a < best * rel_epsilon
-        elif self.mode == 'min' and self.threshold_mode == 'abs':
-            return a < best - self.threshold
-        elif self.mode == 'max' and self.threshold_mode == 'rel':
-            rel_epsilon = self.threshold + 1.
-            return a > best * rel_epsilon
-        else:  # mode == 'max' and epsilon_mode == 'abs':
-            return a > best + self.threshold
-    def _init_is_better(self, mode, threshold, threshold_mode):
-        if mode not in {'min', 'max'}:
-            raise ValueError('mode ' + mode + ' is unknown!')
-        if threshold_mode not in {'rel', 'abs'}:
-            raise ValueError('threshold mode ' + threshold_mode + ' is unknown!')
-        if mode == 'min':
-            self.mode_worse = inf
-        else:  # mode == 'max':
-            self.mode_worse = -inf
-        self.mode = mode
-        self.threshold = threshold
-        self.threshold_mode = threshold_mode
-class Partial_Reset(Action_On_Plateau):
-    def __init__(self, mode='max', patience=10, plateau_var="test_auc",
-                        threshold=0.0001, threshold_mode='rel', cooldown=0,
-                        eps=1e-8, verbose=False):
-        super().__init__(mode, patience, plateau_var, threshold,
-                         threshold_mode, cooldown, eps, verbose)
-    def action(self, model, metrics, epoch=None):
-        print("Partial Reset!!")
-        GCN.partial_reset(model)
-        self._reset()
-        self.cooldown_counter = self.cooldown
-        self.num_bad_epochs = 0
-class Full_Reset(Action_On_Plateau):
-    def __init__(self, mode='max', patience=10, plateau_var="test_auc",
-                        threshold=0.0001, threshold_mode='rel', cooldown=0,
-                        eps=1e-8, verbose=False):
-        super().__init__(mode, patience, plateau_var, threshold,
-                         threshold_mode, cooldown, eps, verbose)
-    def action(self, model, metrics, epoch=None):
-        print("Full Reset!!")
-        GCN.full_reset(model)
-        self._reset()
-        self.cooldown_counter = self.cooldown
-        self.num_bad_epochs = 0
-class Dynamic_LR_AND_Partial_Reset():
-    def __init__(self, optimizer, mode = 'max', factor=0.1, patience=10,
-                plateau_var = "test_auc", reset_patience=None, reset_plateau_var=None,
-                threshold=1e-4, threshold_mode='rel', cooldown=0,
-                min_lr=0, max_lr=1e-4, eps=1e-8, verbose=False):
-        if (reset_patience == None):
-            reset_patience = patience
-        if(reset_plateau_var == None):
-            reset_plateau_var = plateau_var
-        self.dynamic_lr = Dynamic_LR(optimizer, mode=mode, factor=factor, patience = patience,
-                        plateau_var=plateau_var, threshold=threshold, threshold_mode =threshold_mode,
-                        cooldown=cooldown, min_lr=min_lr, max_lr=max_lr, eps=eps, verbose=verbose)
-        self.partial_reset = Partial_Reset(mode=mode, patience=reset_patience, plateau_var=reset_plateau_var,
-                        threshold=threshold, threshold_mode=threshold_mode, cooldown=cooldown,
-                        eps=eps)
-    def step(self, model, metrics, epoch=None):
-        self.dynamic_lr.step(model=model, metrics=metrics, epoch=epoch)
-        self.partial_reset.step(model=model, metrics=metrics, epoch=epoch)
-class Dynamic_LR_AND_Full_Reset():
-    def __init__(self, optimizer, mode = 'max', factor=0.1, patience=10,
-                plateau_var = "test_auc", reset_patience=None, reset_plateau_var=None,
-                threshold=1e-4, threshold_mode='rel', cooldown=0,
-                min_lr=0, max_lr=1e-4, eps=1e-8, verbose=False):
-        if (reset_patience == None):
-            reset_patience = patience
-        if(reset_plateau_var == None):
-            reset_plateau_var = plateau_var
-        self.dynamic_lr = Dynamic_LR(optimizer, mode=mode, factor=factor, patience = patience,
-                        plateau_var=plateau_var, threshold=threshold, threshold_mode =threshold_mode,
-                        cooldown=cooldown, min_lr=min_lr, max_lr=max_lr, eps=eps, verbose=verbose)
-        self.full_reset = Full_Reset(mode=mode, patience=reset_patience, plateau_var=reset_plateau_var,
-                        threshold=threshold, threshold_mode=threshold_mode, cooldown=cooldown,
-                        eps=eps)
-    def step(self, model, metrics, epoch=None):
-        self.dynamic_lr.step(model=model, metrics=metrics, epoch=epoch)
-        self.full_reset.step(model=model, metrics=metrics, epoch=epoch)

root_gnn_base/dataset.py DELETED Viewed

@@ -1,685 +0,0 @@
-from dgl.data import DGLDataset
-import dgl
-import ROOT
-import torch
-import os
-import glob
-import time
-import numpy as np
-from root_gnn_base import utils
-def node_features_from_tree(ch, node_branch_names, node_branch_types, node_feature_scales):
-    lengths = []
-    for branch, node_type in zip(node_branch_names[0], node_branch_types):
-        if node_type == 'single':
-            lengths.append(1)
-        elif node_type == 'vector':
-            lengths.append(len(getattr(ch, branch)))
-        else:
-            print('Unknown node branch type: {}'.format(node_type))
-    features = []
-    for node_feat in node_branch_names:
-        if node_feat == 'CALC_E':
-            features.append(features[0]*torch.cosh(features[1]))
-            continue
-        elif node_feat == 'NODE_TYPE':
-            feat = []
-            for i, length in enumerate(lengths):
-                feat.extend([i,]*length)
-            features.append(torch.tensor(feat))
-            continue
-        feat = []
-        itype = 0
-        for length, branch, node_type in zip(lengths, node_feat, node_branch_types):
-            if isinstance(branch, (int, float, complex)):
-                feat.extend([branch,]*length)
-            elif branch == 'CALC_E':
-                this_type_starts_at = sum(lengths[:itype])
-                this_type_ends_at = sum(lengths[:itype+1])
-                feat.extend(features[0][this_type_starts_at:this_type_ends_at]*torch.cosh(features[1][this_type_starts_at:this_type_ends_at]))
-            elif node_type == 'single':
-                feat.append(getattr(ch, branch))
-            elif node_type == 'vector':
-                feat.extend(getattr(ch, branch))
-            itype += 1
-        features.append(torch.tensor(feat))
-    return torch.stack(features, dim=1) * node_feature_scales, lengths
-def full_connected_graph(n_nodes, self_loops=True):
-    senders = []
-    receivers = []
-    senders = np.arange(n_nodes*n_nodes) // n_nodes
-    receivers = np.arange(n_nodes*n_nodes) % n_nodes
-    if not self_loops and n_nodes > 1:
-        mask = senders != receivers
-        senders = senders[mask]
-        receivers = receivers[mask]
-    return dgl.graph((senders, receivers))
-def check_selection(ch, selection):
-    var, cut, op = selection
-    if op == '>':
-        return getattr(ch, var) > cut
-    elif op == '<':
-        return getattr(ch, var) < cut
-    elif op == '==':
-        return getattr(ch, var) == cut
-def check_selections(ch, selections):
-    for selection in selections:
-        if not check_selection(ch, selection):
-            return False
-    return True
-#Base dataset class for making graphs from ROOT ntuples.
-class RootDataset(DGLDataset):
-    def __init__(self, name=None, raw_dir=None, save_dir=None, label=1, file_names = '*.root', node_branch_names=None, node_branch_types=None, node_feature_scales=None,
-                 selections=[], save=True, tree_name = 'nominal_Loose', fold_var = 'eventNumber', weight_var = None, chunks = 1, process_chunks = None, global_features = [], tracking_info = [], **kwargs):
-        print(f'Unused args while creating RootDataset: {kwargs}')
-        self.label = label
-        self.counts = []
-        self.selections = selections
-        self.save_to_disk = save
-        self.file_names = file_names
-        self.node_branch_names = node_branch_names
-        self.node_branch_types = node_branch_types
-        self.node_feature_scales = torch.tensor([float(sf) for sf in node_feature_scales])
-        self.tree_name = tree_name
-        self.fold_var = fold_var
-        self.tracking_info = tracking_info
-        self.tracking_info.insert(0, fold_var)
-        if weight_var == None:
-            weight_var = 1
-        self.tracking_info.insert(1, weight_var)
-        self.global_features = global_features
-        self.chunks = chunks
-        self.process_chunks = process_chunks
-        if self.process_chunks is None:
-            self.process_chunks = [i for i in range(self.chunks)]
-        self.times = [0, 0]
-        super().__init__(name=name, raw_dir=raw_dir, save_dir=save_dir)
-    def get_list_of_branches(self):
-        branches = []
-        for feat in self.node_branch_names:
-            if isinstance(feat, list):
-                for branch in feat:
-                    if branch == 'CALC_E':
-                        continue
-                    if isinstance(branch, str):
-                        branches.append(branch)
-        for feat in self.global_features:
-            if isinstance(feat, str):
-                branches.append(feat)
-        for feat in self.tracking_info:
-            if isinstance(feat, str):
-                branches.append(feat)
-        for selection in self.selections:
-            branches.append(selection[0])
-        return branches
-    def make_graph(self, ch):
-        t1 = time.time()
-        features, _ = node_features_from_tree(ch, self.node_branch_names, self.node_branch_types, self.node_feature_scales)
-        features = features[features[:,0] != 0]
-        t2 = time.time()
-        g = full_connected_graph(features.shape[0], self_loops=False)
-        g.ndata['features'] = features
-        t3 = time.time()
-        self.times[0] += t2 - t1
-        self.times[1] += t3 - t2
-        return g
-    def process(self):
-        times = [0, 0, 0]
-        oldtime = time.time()
-        if isinstance(self.file_names, str):
-            self.files = glob.glob(os.path.join(self.raw_dir, self.file_names))
-        else:
-            self.files = []
-            for file_name in self.file_names:
-                self.files.extend(glob.glob(os.path.join(self.raw_dir, file_name)))
-        self.chain = ROOT.TChain(self.tree_name)
-        if len(self.files) == 0:
-            print('No files found in {}'.format(os.path.join(self.raw_dir, self.file_names)))
-        for file in self.files:
-            utils.set_timeout(60*2)
-            self.chain.Add(file)
-            utils.unset_timeout()
-        branches = self.get_list_of_branches()
-        self.chain.SetBranchStatus('*', 0)
-        for branch in branches:
-            self.chain.SetBranchStatus(branch, 1)
-        newtime = time.time()
-        times[0] += newtime - oldtime
-        chunks = np.array_split(np.arange(self.chain.GetEntries()), self.chunks)
-        chunks = [chunk for i, chunk in enumerate(chunks) if i in self.process_chunks]
-        self.graph_chunks = []
-        self.label_chunks = []
-        self.tracking_chunks = []
-        self.global_chunks = []
-        chunk_id = -1
-        for chunk in chunks:
-            chunk_id += 1
-            graphs = []
-            labels = []
-            tracking = []
-            globals = []
-            for ientry in chunk:
-                if (ientry % 10000 == 0):
-                    print('Processing event {}/{}'.format(ientry, self.chain.GetEntries()), flush=True)
-                self.chain.GetEntry(ientry)
-                passed = True
-                for selection in self.selections:
-                    if not check_selection(self.chain, selection):
-                        passed = False
-                        continue
-                oldtime = newtime
-                newtime = time.time()
-                times[1] += newtime - oldtime
-                if passed:
-                    graphs.append(self.make_graph(self.chain))
-                    labels.append( self.label )
-                    tracking.append(torch.zeros(len(self.tracking_info), dtype=torch.double))
-                    globals.append(torch.zeros(len(self.global_features)))
-                    for i_ti, tr_branch in enumerate(self.tracking_info):
-                        if isinstance(tr_branch, str):
-                            tracking[-1][i_ti] = getattr(self.chain, tr_branch)
-                        else:
-                            tracking[-1][i_ti] = tr_branch
-                    for i_gl, gl_branch in enumerate(self.global_features):
-                        globals[-1][i_gl] = getattr(self.chain, gl_branch)
-                oldtime = newtime
-                newtime = time.time()
-                times[2] += newtime - oldtime
-            labels = torch.tensor(labels)
-            tracking = torch.stack(tracking)
-            globals = torch.stack(globals)
-            # self.labels = labels
-            # self.tracking = tracking
-            # self.global_features = globals
-            # self.graphs = graphs
-            self.save_chunk(chunk_id, graphs, labels, tracking, globals)
-        return
-        self.graphs = self.graph_chunks[0]
-        for chunk in self.graph_chunks[1:]:
-            self.graphs += chunk
-        self.labels = torch.cat(self.label_chunks)
-        self.tracking = torch.cat(self.tracking_chunks)
-        self.global_features = torch.cat(self.global_chunks)
-        print('Time spent: Creating TChain: {}s, Getting Entries and Selection: {}s, Graph Creation: {}s'.format(*times))
-        print('Time spent in node_features_from_tree: {}s, full_connected_graph: {}s'.format(*self.times))
-    def save(self):
-        """save the graph list and the labels"""
-        if not self.save_to_disk:
-            return
-        graph_path = os.path.join(self.save_dir, self.name + '.bin')
-        if self.chunks == 1:
-            # print(len(self.graphs))
-            # print(len(self.labels))
-            # print(len(self.tracking))
-            # print(len(self.globals))
-            print(f'Saving dataset to {os.path.join(self.save_dir, self.name + ".bin")}')
-            dgl.save_graphs(str(graph_path), self.graphs, {'labels': torch.tensor(self.labels), 'tracking': torch.tensor(self.tracking), 'global': torch.tensor(self.global_features)})
-        else:
-            print(len(self.graph_chunks))
-            for i in range(len(self.process_chunks)):
-                print(f'Saving dataset to {os.path.join(self.save_dir, self.name + f"_{self.process_chunks[i]}.bin")}')
-                dgl.save_graphs(str(graph_path).replace('.bin', f'_{self.process_chunks[i]}.bin'), self.graph_chunks[i], {'labels': self.label_chunks[i], 'tracking': self.tracking_chunks[i], 'global': self.global_chunks[i]})
-    def save_chunk(self, chunk_id, graphs, labels, tracking, globals):
-        if not self.save_to_disk:
-            return
-        graph_path = os.path.join(self.save_dir, self.name + '.bin')
-        print(f'Saving dataset to {os.path.join(self.save_dir, self.name + f"_{self.process_chunks[chunk_id]}.bin")}')
-        dgl.save_graphs(str(graph_path).replace('.bin', f'_{self.process_chunks[chunk_id]}.bin'), graphs, {'labels': labels, 'tracking': tracking, 'global': globals})
-    def has_cache(self):
-        print(f'Checking for cache of {self.name}')
-        if not self.save_to_disk:
-            print('Skipping load.')
-            return False
-        if self.chunks == 1:
-            graph_path = os.path.join(self.save_dir, self.name + '.bin')
-            return os.path.exists(graph_path)
-        else:
-            for i in range(len(self.process_chunks)):
-                graph_path = os.path.join(self.save_dir, self.name + f'_{self.process_chunks[i]}.bin')
-                if not os.path.exists(graph_path):
-                    print(f'File {graph_path} does not exist, processing.')
-                    return False
-            return True
-    def load(self):
-        if self.chunks == 1:
-            print(f'Loading dataset from {os.path.join(self.save_dir, self.name + ".bin")}')
-            graphs, label_dict = dgl.load_graphs(os.path.join(self.save_dir, self.name + '.bin'))
-            self.graphs = graphs
-            self.labels = label_dict['labels']
-            self.tracking = label_dict['tracking']
-            self.global_features = label_dict['global']
-        else:
-            self.graphs = []
-            self.labels = []
-            self.tracking = []
-            self.global_features = []
-            for i in range(self.chunks):
-                try:
-                    print(f'Loading dataset from {os.path.join(self.save_dir, self.name + f"_{self.process_chunks[i]}.bin")}')
-                    graphs, label = dgl.load_graphs(os.path.join(self.save_dir, self.name + f'_{self.process_chunks[i]}.bin'))
-                    self.graphs.extend(graphs)
-                    self.labels.append(label['labels'])
-                    self.tracking.append(label['tracking'])
-                    self.global_features.append(label['global'])
-                except Exception as e:
-                    print(e)
-            self.labels = torch.cat(self.labels)
-            self.tracking = torch.cat(self.tracking)
-            self.global_features = torch.cat(self.global_features)
-    def __getitem__(self, idx):
-        return self.graphs[idx], self.labels[idx], self.tracking[idx], self.global_features[idx]
-    def __len__(self):
-        return len(self.graphs)
-#Dataset with edge features added (deta, dphi, dR)
-class EdgeDataset(RootDataset):
-    def make_graph(self, ch):
-        g = super().make_graph(ch)
-        u, v = g.edges()
-        deta = g.ndata['features'][u, 1] - g.ndata['features'][v, 1]
-        dphi = g.ndata['features'][u, 2] - g.ndata['features'][v, 2]
-        dphi = torch.where(dphi > np.pi, dphi - 2*np.pi, dphi)
-        dphi = torch.where(dphi < -np.pi, dphi + 2*np.pi, dphi)
-        dR   = torch.sqrt(deta**2 + dphi**2)
-        g.edata['features'] = torch.stack([deta, dphi, dR], dim=1)
-        return g
-class tHbbEdgeDataset(RootDataset):
-    def __init__(self, exclude_branches=None, **kwargs):
-        self.exclude_branches = exclude_branches
-        super().__init__(**kwargs)
-    def get_list_of_branches(self):
-        br = super().get_list_of_branches()
-        for sector in self.exclude_branches:
-            if sector == None:
-                continue
-            for excl in sector:
-                if type(excl) == str:
-                    br.append(excl)
-        return br
-    def make_graph(self, ch):
-        features, lengths = node_features_from_tree(ch, self.node_branch_names, self.node_branch_types, self.node_feature_scales)
-        include_mask = torch.ones(features.shape[0], dtype=torch.bool)
-        node_idx = 0
-        for sector, length in zip(self.exclude_branches, lengths):
-            if sector == None:
-                node_idx += length
-                continue
-            for excl in sector:
-                if type(excl) == int:
-                    include_mask[excl + node_idx] = False
-                elif type(excl) == str:
-                    include_mask[getattr(self.chain, excl) + node_idx] = False
-        g = full_connected_graph(features[include_mask].shape[0], self_loops=False)
-        g.ndata['features'] = features[include_mask]
-        u, v = g.edges()
-        deta = g.ndata['features'][u, 1] - g.ndata['features'][v, 1]
-        dphi = g.ndata['features'][u, 2] - g.ndata['features'][v, 2]
-        dphi = torch.where(dphi > np.pi, dphi - 2*np.pi, dphi)
-        dphi = torch.where(dphi < -np.pi, dphi + 2*np.pi, dphi)
-        dR   = torch.sqrt(deta**2 + dphi**2)
-        g.edata['features'] = torch.stack([deta, dphi, dR], dim=1)
-        return g
-class LazyDataset(EdgeDataset):
-    def __init__(self, buffer_size = 2, **kwargs):
-        self.buffer = [None,] * buffer_size
-        self.buffer_ptr = 0
-        self.get_item_calls = 0
-        self.buffer_indices = [-1,] * buffer_size
-        super().__init__(**kwargs)
-    def __getitem__(self, idx):
-        self.get_item_calls += 1
-        chunk_idx = -1
-        sum = 0
-        ev_idx = -999
-        for i, count in enumerate(self.counts):
-            sum += count
-            if idx < sum:
-                chunk_idx = i
-                ev_idx = idx - sum + count
-                break
-        buf_idx = self.buffer_get(chunk_idx)
-        if ev_idx >= len(self.buffer[buf_idx][0]):
-            print(f'Getting event {ev_idx} from chunk {chunk_idx} from buffer {buf_idx}. Calls: {self.get_item_calls}')
-            print(len(self.buffer))
-            print(self.counts)
-            print(len(self.buffer[buf_idx][0]))
-        return self.buffer[buf_idx][0][ev_idx], self.buffer[buf_idx][1]['labels'][ev_idx], self.buffer[buf_idx][1]['tracking'][ev_idx], self.buffer[buf_idx][1]['global'][ev_idx]
-    def buffer_get(self, buffer_idx):
-        if buffer_idx in self.buffer_indices:
-            for i in range(len(self.buffer)):
-                if self.buffer_indices[i] == buffer_idx:
-                    return i
-        else:
-            print(f'Loading dataset from {os.path.join(self.save_dir, self.name + f"_{buffer_idx}.bin")}', flush=True)
-            self.buffer_ptr = (self.buffer_ptr + 1) % len(self.buffer)
-            self.buffer[self.buffer_ptr] = dgl.load_graphs(os.path.join(self.save_dir, self.name + f'_{buffer_idx}.bin'))
-            self.buffer_indices[self.buffer_ptr] = buffer_idx
-            return self.buffer_ptr
-    def load(self):
-        self.counts = []
-        self.tracking = []
-        try:
-            for i in range(self.chunks):
-                print(f'Loading dataset from {os.path.join(self.save_dir, self.name + f"_{self.process_chunks[i]}.bin")}')
-                l = dgl.data.graph_serialize.load_labels_v2(os.path.join(self.save_dir, self.name + f'_{self.process_chunks[i]}.bin'))
-                self.counts.append(len(l['tracking']))
-                self.tracking.append(l['tracking'])
-            self.tracking = torch.cat(self.tracking)
-        except Exception as e:
-            print(e)
-    def __len__(self):
-        return sum(self.counts)
-class MultiLabelDataset(EdgeDataset):
-    def __init__(self, **kwargs):
-        super().__init__(**kwargs)
-    def get_list_of_branches(self):
-        br =  super().get_list_of_branches()
-        for l in self.label:
-            if isinstance(l, str):
-                br.append(l)
-            if isinstance(l, dict):
-                br.append(l['branch'])
-        return br
-    def get_label(self, ch):
-        label = []
-        for l in self.label:
-            if isinstance(l, str):
-                label.append((getattr(ch, l)))
-            if isinstance(l, dict):
-                label.append(getattr(ch, l['branch'])*float(l['scale']))
-            if isinstance(l, float) or isinstance(l, int):
-                label.append(l)
-        return torch.tensor(label)
-    def process(self):
-        times = [0, 0, 0]
-        oldtime = time.time()
-        if isinstance(self.file_names, str):
-            self.files = glob.glob(os.path.join(self.raw_dir, self.file_names))
-        else:
-            self.files = []
-            for file_name in self.file_names:
-                self.files.extend(glob.glob(os.path.join(self.raw_dir, file_name)))
-        self.chain = ROOT.TChain(self.tree_name)
-        if len(self.files) == 0:
-            print('No files found in {}'.format(os.path.join(self.raw_dir, self.file_names)))
-        for file in self.files:
-            utils.set_timeout(60*2)
-            self.chain.Add(file)
-            utils.unset_timeout()
-        branches = self.get_list_of_branches()
-        self.chain.SetBranchStatus('*', 0)
-        for branch in branches:
-            self.chain.SetBranchStatus(branch, 1)
-        newtime = time.time()
-        times[0] += newtime - oldtime
-        chunks = np.array_split(np.arange(self.chain.GetEntries()), self.chunks)
-        chunks = [chunk for i, chunk in enumerate(chunks) if i in self.process_chunks]
-        self.graph_chunks = []
-        self.label_chunks = []
-        self.tracking_chunks = []
-        self.global_chunks = []
-        chunk_id = -1
-        for chunk in chunks:
-            chunk_id += 1
-            graphs = []
-            labels = []
-            tracking = []
-            globals = []
-            for ientry in chunk:
-                if (ientry % 10000 == 0):
-                    print('Processing event {}/{}'.format(ientry, self.chain.GetEntries()), flush=True)
-                self.chain.GetEntry(ientry)
-                passed = True
-                for selection in self.selections:
-                    if not check_selection(self.chain, selection):
-                        passed = False
-                        continue
-                oldtime = newtime
-                newtime = time.time()
-                times[1] += newtime - oldtime
-                if passed:
-                    graphs.append(self.make_graph(self.chain))
-                    labels.append(self.get_label(self.chain))
-                    tracking.append(torch.zeros(len(self.tracking_info), dtype=torch.double))
-                    globals.append(torch.zeros(len(self.global_features)))
-                    for i_ti, tr_branch in enumerate(self.tracking_info):
-                        if isinstance(tr_branch, str):
-                            tracking[-1][i_ti] = getattr(self.chain, tr_branch)
-                        else:
-                            tracking[-1][i_ti] = tr_branch
-                    for i_gl, gl_branch in enumerate(self.global_features):
-                        globals[-1][i_gl] = getattr(self.chain, gl_branch)
-                oldtime = newtime
-                newtime = time.time()
-                times[2] += newtime - oldtime
-            labels = torch.stack(labels)
-            self.save_chunk(chunk_id, graphs, labels, torch.stack(tracking), torch.stack(globals))
-            # self.graph_chunks.append(graphs)
-            # self.label_chunks.append(labels)
-            # self.tracking_chunks.append(torch.stack(tracking))
-            # self.global_chunks.append(torch.stack(globals))
-            # self.counts.append(len(graphs))
-        return
-        self.graphs = self.graph_chunks[0]
-        for chunk in self.graph_chunks[1:]:
-            self.graphs += chunk
-        self.labels = torch.cat(self.label_chunks)
-        self.tracking = torch.cat(self.tracking_chunks)
-        self.global_features = torch.cat(self.global_chunks)
-        print('Time spent: Creating TChain: {}s, Getting Entries and Selection: {}s, Graph Creation: {}s'.format(*times))
-        print('Time spent in node_features_from_tree: {}s, full_connected_graph: {}s'.format(*self.times))
-class LazyMultiLabelDataset(MultiLabelDataset, LazyDataset):
-    def __init__(self, buffer_size = 2, **kwargs):
-        LazyDataset.__init__(self, buffer_size=buffer_size, **kwargs)
-class MultiLabeltHbbDataset(MultiLabelDataset, tHbbEdgeDataset):
-    def __init__(self, **kwargs):
-        super().__init__(**kwargs)
-    def get_list_of_branches(self):
-        br =  super().get_list_of_branches()
-        for sector in self.exclude_branches:
-            if sector == None:
-                continue
-            for excl in sector:
-                if type(excl) == str:
-                    br.append(excl)
-        return br
-class AugmentedDataset(RootDataset):
-    def __init__(self, seed = 2, feature_index = None, node_mapping = None, **kwargs):
-        self.seed = seed
-        np.random.seed(seed)
-        if(feature_index == None):
-            self.feature_index = {"pt": 0, "eta": 1, "phi": 2, "energy": 3, "btag": 4, "charge": 5, "node_type": 6}
-        if (node_mapping == None):
-            self.node_mapping = {"jet": 0, "ele": 1, "mu": 2, "ph": 3, "MET": 4}
-        super().__init__(**kwargs)
-    def detector_noise(self, node_features):
-        noise = np.zeros_like(node_features)
-        node_types = node_features[:, self.feature_index["node_type"]]
-        pts = node_features[:, self.feature_index["pt"]]
-        etas = node_features[:, self.feature_index["eta"]]
-        energies = node_features[:, self.feature_index["energy"]]
-        # Noise calculation for jets
-        jet_mask = (node_types == self.node_mapping["jet"])
-        jet_pts = pts[jet_mask]
-        jet_etas = etas[jet_mask]
-        if (jet_mask.sum() > 0):
-            jet_resolutions = np.where(
-                jet_pts <= 0.1, 0.0,
-                np.where(
-                    np.abs(jet_etas) <= 0.5, np.sqrt(0.06**2 + jet_pts**2 * 1.3e-3**2),
-                    np.where(
-                        np.abs(jet_etas) <= 1.5, np.sqrt(0.10**2 + jet_pts**2 * 1.7e-3**2),
-                        np.where(
-                            np.abs(jet_etas) <= 2.5, np.sqrt(0.25**2 + jet_pts**2 * 3.1e-3**2),
-                            0.0
-                        )
-                    )
-                )
-            )
-            noise[jet_mask, self.feature_index["pt"]] = np.random.normal(loc=0.0, scale=jet_resolutions)
-        # Noise calculation for electrons
-        ele_mask = (node_types == self.node_mapping["ele"])
-        ele_pts = pts[ele_mask]
-        ele_etas = etas[ele_mask]
-        if (ele_mask.sum() > 0):
-            ele_resolutions = np.where(
-                np.abs(ele_etas) <= 0.5, np.sqrt(0.03**2 + ele_pts**2 * 1.3e-3**2),
-                np.where(
-                    np.abs(ele_etas) <= 1.5, np.sqrt(0.05**2 + ele_pts**2 * 1.7e-3**2),
-                    np.where(
-                        np.abs(ele_etas) <= 2.5, np.sqrt(0.15**2 + ele_pts**2 * 3.1e-3**2),
-                        0.0
-                    )
-                )
-            )
-            noise[ele_mask, self.feature_index["pt"]] = np.random.normal(loc=0.0, scale=ele_resolutions)
-        # Noise calculation for muons
-        mu_mask = (node_types == self.node_mapping["mu"])
-        mu_pts = pts[mu_mask]
-        mu_etas = etas[mu_mask]
-        if (mu_mask.sum() > 0):
-            mu_resolutions = np.where(
-                np.abs(mu_etas) <= 0.5, np.sqrt(0.01**2 + mu_pts**2 * 1.0e-4**2),
-                np.where(
-                    np.abs(mu_etas) <= 1.5, np.sqrt(0.015**2 + mu_pts**2 * 1.5e-4**2),
-                    np.where(
-                        np.abs(mu_etas) <= 2.5, np.sqrt(0.025**2 + mu_pts**2 * 3.5e-4**2),
-                        0.0
-                    )
-                )
-            )
-            noise[mu_mask, self.feature_index["pt"]] = np.random.normal(loc=0.0, scale=mu_resolutions)
-        # Noise calculation for photons
-        ph_mask = (node_types == self.node_mapping["ph"])
-        ph_etas = etas[ph_mask]
-        ph_energies = energies[ph_mask]
-        if (ph_mask.sum() > 0):
-            ph_resolutions = np.where(
-                np.abs(ph_etas) <= 3.2, np.sqrt(ph_energies**2 * 0.0017**2 + ph_energies * 0.101**2),
-                np.where(
-                    np.abs(ph_etas) <= 4.9, np.sqrt(ph_energies**2 * 0.0350**2 + ph_energies * 0.285**2),
-                    0.0
-                )
-            )
-            noise[ph_mask, self.feature_index["energy"]] = np.random.normal(loc=0.0, scale=ph_resolutions)
-        return noise
-    def make_graph(self, ch):
-        g = super().make_graph(ch)
-        g.ndata['augmented_features'] = g.ndata['features']
-        num_nodes = len(g.ndata['features'][:, 0])
-        # Rotations: phi -> phi + delta_phi
-        phi_index = self.feature_index["phi"]
-        # Generate a single delta_phi for all nodes
-        delta_phi = np.random.uniform(low=-np.pi, high=np.pi)
-        # Apply the same delta_phi to all nodes
-        g.ndata['augmented_features'][:, phi_index] = (g.ndata['augmented_features'][:, phi_index] + delta_phi + np.pi) % (2 * np.pi) - np.pi
-        # Reflections: eta -> -1 * eta, phi -> -1 * phi
-        eta_index = self.feature_index["eta"]
-        eta_reflection = np.random.choice([-1, 1])
-        phi_reflection = np.random.choice([-1, 1])
-        g.ndata['augmented_features'][:, eta_index] = g.ndata['augmented_features'][:, eta_index] * eta_reflection
-        g.ndata['augmented_features'][:, phi_index] = g.ndata['augmented_features'][:, phi_index] * phi_reflection
-        # Detector Noise: pt -> pt + normal(pt, noise(pt))
-        noise = self.detector_noise(g.ndata['augmented_features'])
-        g.ndata['augmented_features'] = g.ndata['augmented_features'] + noise
-        pt_index = self.feature_index["pt"]
-        if (g.ndata['augmented_features'][-1][self.feature_index["node_type"]] == self.node_mapping["MET"]):
-            # Initialize sums of px and py
-            sum_px = 0
-            sum_py = 0
-            # Loop over all nodes except the last one (MET node)
-            for i in range(len(g.ndata['augmented_features']) - 1):
-                pt = g.ndata['augmented_features'][i][pt_index]
-                phi = g.ndata['augmented_features'][i][phi_index]
-                # Compute px and py
-                px = pt * np.cos(phi)
-                py = pt * np.sin(phi)
-                # Sum px and py
-                sum_px += px
-                sum_py += py
-            # Calculate MET
-            g.ndata['augmented_features'][-1][pt_index] = np.sqrt(sum_px**2 + sum_py**2)
-        u, v = g.edges()
-        deta = g.ndata['features'][u, 1] - g.ndata['features'][v, 1]
-        dphi = g.ndata['features'][u, 2] - g.ndata['features'][v, 2]
-        dphi = torch.where(dphi > np.pi, dphi - 2*np.pi, dphi)
-        dphi = torch.where(dphi < -np.pi, dphi + 2*np.pi, dphi)
-        dR   = torch.sqrt(deta**2 + dphi**2)
-        g.edata['features'] = torch.stack([deta, dphi, dR], dim=1)
-        deta = g.ndata['augmented_features'][u, 1] - g.ndata['augmented_features'][v, 1]
-        dphi = g.ndata['augmented_features'][u, 2] - g.ndata['augmented_features'][v, 2]
-        dphi = torch.where(dphi > np.pi, dphi - 2*np.pi, dphi)
-        dphi = torch.where(dphi < -np.pi, dphi + 2*np.pi, dphi)
-        dR   = torch.sqrt(deta**2 + dphi**2)
-        g.edata['augmented_features'] = torch.stack([deta, dphi, dR], dim=1)
-        return g

root_gnn_base/photon_ID_dataset.py DELETED Viewed

@@ -1,44 +0,0 @@
-from root_gnn_base import dataset
-import dgl
-import torch
-import numpy as np
-def radius_graph(features, radii, self_loops=False):
-    senders = []
-    receivers = []
-    n_nodes = features.shape[0]
-    senders = np.arange(n_nodes*n_nodes) // n_nodes
-    receivers = np.arange(n_nodes*n_nodes) % n_nodes
-    if not self_loops and n_nodes > 1:
-        mask = senders != receivers
-        senders = senders[mask]
-        receivers = receivers[mask]
-    for k, r in radii.items():
-        d = features[senders, k] - features[receivers, k]
-        mask = np.abs(d) < r
-        senders = senders[mask]
-        receivers = receivers[mask]
-    return dgl.graph((senders, receivers))
-class PhotonIDDataset(dataset.LazyMultiLabelDataset):
-    def __init__(self, eta_radius, phi_radius, **kwargs):
-        self.eta_radius = eta_radius
-        self.phi_radius = phi_radius
-        super().__init__(**kwargs)
-    def make_graph(self, ch):
-        features, _ = dataset.node_features_from_tree(ch, self.node_branch_names, self.node_branch_types, self.node_feature_scales)
-        features = features[features[:,0] != 0]
-        #Delta Eta, Delta Phi, Adjacent Layer
-        g = radius_graph(features, {1: self.eta_radius, 2: self.phi_radius, 6: 1.1}, self_loops=True) #Self loops ensure last cell is included even if disconnected
-        g.ndata['features'] = features
-        u, v = g.edges()
-        deta = features[u, 1] - features[v, 1]
-        dphi = g.ndata['features'][u, 2] - g.ndata['features'][v, 2]
-        dphi = torch.where(dphi > np.pi, dphi - 2*np.pi, dphi)
-        dphi = torch.where(dphi < -np.pi, dphi + 2*np.pi, dphi)
-        dR   = torch.sqrt(deta**2 + dphi**2)
-        dx   = features[u, 3] - features[v, 3]
-        dy   = features[u, 4] - features[v, 4]
-        dz   = features[u, 5] - features[v, 5]
-        g.edata['features'] = torch.stack([deta, dphi, dR, dx, dy, dz], dim=1)
-        return g

root_gnn_base/similarity.py DELETED Viewed

@@ -1,158 +0,0 @@
-import numpy as np
-import scipy
-from sklearn.decomposition import PCA
-from sklearn.metrics.pairwise import cosine_similarity
-from sklearn.metrics.pairwise import euclidean_distances
-from sklearn.preprocessing import StandardScaler
-from scipy.stats import wasserstein_distance
-def cka(rep_a, rep_b, size=None):
-    """
-    Computes the Centered Kernel Alignment (CKA) between two large representation matrices rep_a and rep_b.
-    If size is provided, it performs CKA on a randomly selected subset of the data.
-    Parameters:
-    rep_a : np.ndarray
-        First representation matrix of size (n_samples, n_features_a).
-    rep_b : np.ndarray
-        Second representation matrix of size (n_samples, n_features_b).
-    size : int, optional
-        Number of samples to use for the CKA calculation. If None, use the full dataset.
-    Returns:
-    float
-        CKA similarity between rep_a and rep_b.
-    """
-    def gram_linear(x):
-        """Compute the Gram (kernel) matrix using a linear kernel."""
-        return x @ x.T
-    def center_gram(gram):
-        """Center the Gram matrix."""
-        n = gram.shape[0]
-        identity = np.eye(n)
-        ones = np.ones((n, n)) / n
-        return gram - ones @ gram - gram @ ones + ones @ gram @ ones
-    # If sample_size is specified, randomly sample a subset of the data
-    if size is not None and size < rep_a.shape[0]:
-        indices = np.random.choice(rep_a.shape[0], size, replace=False)
-        rep_a = rep_a[indices]
-        rep_b = rep_b[indices]
-    # Compute the Gram matrices
-    gram_a = gram_linear(rep_a)
-    gram_b = gram_linear(rep_b)
-    # Center the Gram matrices
-    centered_gram_a = center_gram(gram_a)
-    centered_gram_b = center_gram(gram_b)
-    # Compute the CKA similarity
-    numerator = np.sum(centered_gram_a * centered_gram_b)
-    denominator = np.sqrt(np.sum(centered_gram_a**2) * np.sum(centered_gram_b**2))
-    return numerator / denominator if denominator != 0 else 0
-def cca(X, Y, size = None, num_components=10):
-    """
-    Perform Canonical Correlation Analysis (CCA) between two datasets.
-    Parameters:
-    X : np.ndarray
-        First dataset, shape (n_samples, n_features_X).
-    Y : np.ndarray
-        Second dataset, shape (n_samples, n_features_Y).
-    num_components : int
-        Number of CCA components to return.
-    Returns:
-    w_X : np.ndarray
-        Canonical weights for the first dataset, shape (n_features_X, num_components).
-    w_Y : np.ndarray
-        Canonical weights for the second dataset, shape (n_features_Y, num_components).
-    corrs : np.ndarray
-        Array of canonical correlations for each component.
-    """
-    # If sample size is specified, randomly sample a subset of the data
-    if size is not None and size < X.shape[0]:
-        indices = np.random.choice(X.shape[0], size, replace=False)
-        X = X[indices]
-        Y = Y[indices]
-    # Standardize both datasets (mean = 0, variance = 1)
-    scaler_X = StandardScaler()
-    scaler_Y = StandardScaler()
-    X = scaler_X.fit_transform(X)
-    Y = scaler_Y.fit_transform(Y)
-    # Covariance matrices
-    C_XX = np.cov(X, rowvar=False)  # Covariance of X
-    C_YY = np.cov(Y, rowvar=False)  # Covariance of Y
-    C_XY = np.cov(X, Y, rowvar=False)[:X.shape[1], X.shape[1]:]  # Cross-covariance of X and Y
-    # Regularization term to avoid singular matrices
-    reg = 1e-6
-    inv_C_XX = np.linalg.inv(C_XX + reg * np.eye(C_XX.shape[0]))
-    inv_C_YY = np.linalg.inv(C_YY + reg * np.eye(C_YY.shape[0]))
-    # Solve the generalized eigenvalue problem for CCA
-    # (inv_C_XX @ C_XY @ inv_C_YY @ C_XY.T) and vice versa for Y
-    A = inv_C_XX @ C_XY @ inv_C_YY @ C_XY.T
-    B = inv_C_YY @ C_XY.T @ inv_C_XX @ C_XY
-    # Perform eigenvalue decomposition
-    eigvals_X, eigvecs_X = np.linalg.eigh(A)
-    eigvals_Y, eigvecs_Y = np.linalg.eigh(B)
-    # Sort the eigenvalues and eigenvectors in descending order
-    idx_X = np.argsort(eigvals_X)[::-1]
-    idx_Y = np.argsort(eigvals_Y)[::-1]
-    eigvecs_X = eigvecs_X[:, idx_X]
-    eigvecs_Y = eigvecs_Y[:, idx_Y]
-    # Canonical weights (the first `num_components` components)
-    w_X = eigvecs_X[:, :num_components]
-    w_Y = eigvecs_Y[:, :num_components]
-    # Canonical correlations (square root of the eigenvalues, constrained to [0,1])
-    corrs = np.sqrt(np.clip(eigvals_X[:num_components], 0, 1))
-    return np.mean(corrs)
-    return w_X, w_Y, corrs
-def pca(X, Y, size=1000, n_components=3, bins=30):
-    pca_X = PCA(n_components=n_components)
-    X_pca = pca_X.fit_transform(X)
-    pca_Y = PCA(n_components=n_components)
-    Y_pca = pca_Y.fit_transform(Y)
-    # Step 2: Determine common bin edges based on the range of PCA components
-    min_value = min(X_pca.min(), Y_pca.min())
-    max_value = max(X_pca.max(), Y_pca.max())
-    bin_edges = np.linspace(min_value, max_value, bins + 1)
-    # Step 3: Calculate histograms for each PCA component using the same bins
-    histograms_X = [np.histogram(X_pca[:, i], bins=bin_edges, density=True)[0] for i in range(n_components)]
-    histograms_Y = [np.histogram(Y_pca[:, i], bins=bin_edges, density=True)[0] for i in range(n_components)]
-    # Step 4: Calculate Wasserstein distance between corresponding histograms
-    total_distance = 0
-    for i in range(n_components):
-        total_distance += wasserstein_distance(histograms_X[i], histograms_Y[i])
-    # Step 5: Normalize the total distance for a similarity score
-    # Calculate the maximum possible distance (theoretical max could be based on histogram size)
-    # This could be replaced with a more complex calculation if necessary.
-    max_distance = 1.0  # Replace this with a suitable maximum based on your dataset properties.
-    similarity_score = 1 - (total_distance / max_distance)
-    return max(0, min(1, similarity_score))  # Ensure the score stays in [0, 1]

root_gnn_base/uproot_dataset.py DELETED Viewed

@@ -1,54 +0,0 @@
-from root_gnn_base import dataset
-import torch
-import uproot
-import glob
-import os
-import awkward as ak
-import numpy as np
-import time
-def node_features_from_ak(ch, node_branch_names, node_branch_types, node_feature_scales):
-    node_types = []
-    n_types = len(node_branch_names[0])
-    for i in range(n_types):
-        features = []
-        branch_type = node_branch_types[i]
-        for j in range(len(node_branch_names)):
-            if node_branch_names[j] == 'CALC_E':
-                features.append(features[0] * np.cosh(features[1]))
-            elif node_branch_names[j] == 'NODE_TYPE':
-                features.append(ak.full_like(features[0], i))
-            elif isinstance(node_branch_names[j][i], str):
-                features.append(ch[node_branch_names[j][i]])
-            elif isinstance(node_branch_names[j][i], (int, float)):
-                features.append(ak.full_like(features[0], node_branch_names[j][i]))
-        if branch_type == 'single':
-            features = [f[:,np.newaxis] for f in features]
-        node_types.append(ak.Array(features))
-    node_features = ak.concatenate(node_types, axis=2) * node_feature_scales #axis order at this point is (feature, event, node)
-    return node_features
-class UprootDataset(dataset.RootDataset):
-    def process(self):
-        starttime = time.time()
-        self.files = glob.glob(os.path.join(self.raw_dir, self.file_names))
-        branches = self.get_list_of_branches()
-        self.chain = uproot.concatenate([f + ':' + self.tree_name for f in self.files], branches, num_workers=4)
-        node_features = node_features_from_ak(self.chain, self.node_branch_names, self.node_branch_types, self.node_feature_scales)
-        loadtime = time.time()
-        n_nodes = ak.num(node_features[0], axis=1) #number of nodes for each event
-        ftime = time.time()
-        self.graphs = [dataset.full_connected_graph(n, False) for n in n_nodes]
-        itime = time.time()
-        for i in range(len(self.graphs)):
-            if i % 10000 == 0:
-                print(f'Processing event {i}/{len(self.graphs)}')
-            self.graphs[i].ndata['features'] = torch.transpose(torch.tensor(node_features[:,i,:]),0,1).to(torch.float)
-        self.label = torch.stack([torch.full((len(self.graphs),),torch.tensor(self.label)), torch.tensor(ak.values_astype(self.chain[self.fold_var], np.int64))], dim=1)
-        gtime = time.time()
-        print()
-        print(f'load time: {loadtime - starttime} s')
-        print(f'feature time: {ftime - loadtime} s')
-        print(f'graph time: {itime - ftime} s')
-        print(f'graph data time: {gtime - itime} s')

root_gnn_base/utils.py DELETED Viewed

@@ -1,307 +0,0 @@
-import importlib
-import yaml
-import os
-import torch
-import numpy as np
-import matplotlib.pyplot as plt
-import dgl
-import signal
-def buildFromConfig(conf, run_time_args = {}):
-    if 'module' in conf:
-        module = importlib.import_module(conf['module'])
-        cls = getattr(module, conf['class'])
-        return cls(**conf['args'], **run_time_args)
-    else:
-        print('No module specified in config. Returning None.')
-def cycler(iterable):
-    while True:
-        #print('Cycler is cycling...')
-        for i in iterable:
-            yield i
-def include_config(conf):
-    if 'include' in conf:
-        for i in conf['include']:
-            with open(i) as f:
-                conf.update(yaml.load(f, Loader=yaml.FullLoader))
-        del conf['include']
-def load_config(config_file):
-    with open(config_file) as f:
-        conf = yaml.load(f, Loader=yaml.FullLoader)
-    include_config(conf)
-    return conf
-#Timeout function from https://stackoverflow.com/questions/492519/timeout-on-a-function-call
-class TimeoutException(Exception):
-    pass
-def timeout_handler(signum, frame):
-    raise TimeoutException()
-def set_timeout(timeout):
-    signal.signal(signal.SIGALRM, timeout_handler)
-    signal.alarm(timeout)
-def unset_timeout():
-    signal.alarm(0)
-    signal.signal(signal.SIGALRM, signal.SIG_DFL)
-def make_padding_graph(batch, pad_nodes, pad_edges):
-    senders = []
-    receivers = []
-    senders = torch.arange(0,pad_edges) // pad_nodes
-    receivers = torch.arange(1,pad_edges+1) % pad_nodes
-    if pad_nodes < 0 or pad_edges < 0 or pad_edges > pad_nodes * pad_nodes / 2:
-        print('Batch is larger than padding size or e > n^2/2. Repeating edges as necessary.')
-        print(f'Batch nodes: {batch.num_nodes()}, Batch edges: {batch.num_edges()}, Padding nodes: {pad_nodes}, Padding edges: {pad_edges}')
-        senders = senders % pad_nodes
-    padg = dgl.graph((senders[:pad_edges], receivers[:pad_edges]), num_nodes = pad_nodes)
-    for k in batch.ndata.keys():
-        padg.ndata[k] = torch.zeros( (pad_nodes, batch.ndata[k].shape[1]) )
-    for k in batch.edata.keys():
-        padg.edata[k] = torch.zeros( (pad_edges, batch.edata[k].shape[1]) )
-    return dgl.batch([batch, padg.to(batch.device)])
-def pad_size(graphs, edges, nodes, edge_per_graph=3, node_per_graph=14):
-    pad_nodes = ((nodes // (node_per_graph * graphs))+1) * graphs * node_per_graph
-    pad_edges = ((edges // (edge_per_graph * graphs))+1) * graphs * edge_per_graph
-    return pad_nodes, pad_edges
-def pad_batch_to_step_per_graph(batch, edge_per_graph=3, node_per_graph=14):
-    n_graphs = batch.batch_num_nodes().shape[0]
-    pad_nodes = (batch.num_nodes() + node_per_graph * n_graphs) % int(n_graphs * node_per_graph)
-    pad_edges = (batch.num_edges() + edge_per_graph * n_graphs) % int(n_graphs * edge_per_graph)
-    return make_padding_graph(batch, pad_nodes, pad_edges)
-def pad_batch(batch, edges = 104000, nodes = 16000):
-    if edges == 0 and nodes == 0:
-        return batch
-    pad_nodes = 0
-    pad_edges = 0
-    pad_nodes = nodes - batch.num_nodes()
-    pad_edges = edges - batch.num_edges()
-    return make_padding_graph(batch, pad_nodes, pad_edges)
-def pad_batch_num_nodes(batch, max_num_nodes, hid_size = 64):
-    print(f"Padding each graph to have {max_num_nodes} nodes")
-    unbatched = dgl.unbatch(batch)
-    for g in unbatched:
-        num_nodes_to_add = max_num_nodes - g.number_of_nodes()
-        if num_nodes_to_add > 0:
-            g.add_nodes(num_nodes_to_add)  # Add isolated nodes
-    batch = dgl.batch(unbatched)
-    padding_mask = torch.zeros((batch.ndata['features'].shape[0]), dtype=torch.bool)
-    global_update_weights = torch.ones((batch.ndata['features'].shape[0], hid_size))
-    for i in range(len(batch.ndata['features'])):
-        if (torch.count_nonzero(batch.ndata['features'][i]) == 0):
-            padding_mask[i] = True
-            global_update_weights[i] = 0
-    batch.ndata['w'] = global_update_weights
-    batch.ndata['padding_mask'] = padding_mask
-    return batch
-def fold_selection(fold_config, sample):
-    n_folds = fold_config['n_folds']
-    folds_opt = fold_config[sample]
-    folds = []
-    if type(folds_opt) == int:
-        return lambda x : x.tracking[:,0] % n_folds == folds_opt
-    elif type(folds_opt) == list:
-        print("fold type is list")
-        print(f"fold_config = {fold_config}")
-        print(f"folds_opt = {folds_opt}")
-        return lambda x : sum([x.tracking[:,0] % n_folds == f for f in folds_opt]) == 1
-    else:
-        raise ValueError("Invalid fold selection option with type {}".format(type(folds_opt)))
-def fold_selection_name(fold_config, sample):
-    n_folds = fold_config['n_folds']
-    folds_opt = fold_config[sample]
-    if type(folds_opt) == int:
-        return f'n_{n_folds}_f_{folds_opt}'
-    elif type(folds_opt) == list:
-        return f'n_{n_folds}_f_{"_".join([str(f) for f in folds_opt])}'
-    else:
-        raise ValueError("Invalid fold selection option with type {}".format(type(folds_opt)))
-#Return the index and checkpoint of the last epoch.
-def get_last_epoch(config, max_ep = -1, device = None):
-    last_epoch = -1
-    checkpoint = None
-    if max_ep < 0:
-        max_ep = config['Training']['epochs']
-    for ep in range(max_ep):
-        if os.path.exists(os.path.join(config['Training_Directory'], f'model_epoch_{ep}.pt')):
-            last_epoch = ep
-        else:
-            print(f'Epoch {ep} not found. Stopping at epoch {last_epoch}')
-            print('File not found: ', os.path.join(config['Training_Directory'], f'model_epoch_{ep}.pt'))
-            break
-    if last_epoch >= 0:
-        checkpoint = torch.load(os.path.join(config['Training_Directory'], f'model_epoch_{last_epoch}.pt'), map_location=device)
-    return last_epoch, checkpoint
-#Return the index and checkpoint of the last epoch.
-def get_specific_epoch(config, target_epoch, device = None, from_ryan = False):
-    last_epoch = -1
-    checkpoint = None
-    for ep in range(target_epoch + 1):
-        if (from_ryan):
-            if os.path.exists(os.path.join('/global/cfs/cdirs/atlas/berobert/root_gnn_dgl/' + config['Training_Directory'], f'model_epoch_{ep}.pt')):
-                last_epoch = ep
-            else:
-                print(f'Epoch {ep} not found. Stopping at epoch {last_epoch}')
-                print('File not found: ', os.path.join('/global/cfs/cdirs/atlas/berobert/root_gnn_dgl/' + config['Training_Directory'], f'model_epoch_{ep}.pt'))
-                break
-        else:
-            if os.path.exists(os.path.join(config['Training_Directory'], f'model_epoch_{ep}.pt')):
-                last_epoch = ep
-            else:
-                print(f'Epoch {ep} not found. Stopping at epoch {last_epoch}')
-                print('File not found: ', os.path.join(config['Training_Directory'], f'model_epoch_{ep}.pt'))
-                break
-    if last_epoch >= 0:
-        if (from_ryan):
-            checkpoint = torch.load('/global/cfs/cdirs/atlas/berobert/root_gnn_dgl/' + os.path.join(config['Training_Directory'], f'model_epoch_{last_epoch}.pt'), map_location=device)
-        else:
-            checkpoint = torch.load(os.path.join(config['Training_Directory'], f'model_epoch_{last_epoch}.pt'), map_location=device)
-    return last_epoch, checkpoint
-#Convert training logs into dict for plotting.
-def read_log(config):
-    lines = []
-    with open(config['Training_Directory'] + '/training.log', 'r') as f:
-        lines = f.readlines()
-    lines = [ l for l in lines if 'Epoch' in l ]
-    nlines = len(lines)
-    labels = []
-    for field in lines[0].split('|'):
-        labels.append(field.split()[0])
-    log = {label : np.zeros(nlines) for label in labels}
-    for i, line in enumerate(lines):
-        for field in line.split('|'):
-            spl = field.split()
-            log[spl[0]][i] = float(spl[1])
-    return log
-#Plot training logs.
-def plot_log(log, output_file):
-    fig, ax = plt.subplots(2, 2, figsize=(10,10))
-    #Time
-    ax[0][0].plot(log['Epoch'], np.cumsum(log['Time']), label='Time')
-    ax[0][0].set_xlabel('Epoch')
-    ax[0][0].set_ylabel('Time (s)')
-    ax[0][0].legend()
-    """
-    ax[0][0].plot(log['Epoch'], log['LR'], label='Learning Rate')
-    ax[0][0].set_xlabel('Epoch')
-    ax[0][0].set_ylabel('Learning Rate')
-    ax[0][0].set_yscale('log')
-    ax[0][0].legend()
-    """
-    #Loss
-    ax[0][1].plot(log['Epoch'], log['Loss'], label='Train Loss')
-    ax[0][1].plot(log['Epoch'], log['Test_Loss'], label='Test Loss')
-    ax[0][1].set_xlabel('Epoch')
-    ax[0][1].set_ylabel('Loss')
-    ax[0][1].legend()
-    #Accuracy
-    ax[1][0].plot(log['Epoch'], log['Accuracy'], label='Test Accuracy')
-    ax[1][0].set_xlabel('Epoch')
-    ax[1][0].set_ylabel('Accuracy')
-    ax[1][0].set_ylim((0.44, 0.56))
-    ax[1][0].legend()
-    #AUC
-    ax[1][1].plot(log['Epoch'], log['Test_AUC'], label='Test AUC')
-    ax[1][1].set_xlabel('Epoch')
-    ax[1][1].set_ylabel('AUC')
-    ax[1][1].legend()
-    fig.savefig(output_file)
-class EarlyStop():
-    def __init__(self, patience=15, threshold=1e-8, mode='min'):
-        self.patience = patience
-        self.threshold = threshold
-        self.mode = mode
-        self.count = 0
-        self.current_best = np.inf if mode == 'min' else -np.inf
-        self.should_stop = False
-    def update(self, value):
-        if self.mode == 'min':  # Minimizing loss
-            if value < self.current_best - self.threshold:
-                self.current_best = value
-                self.count = 0
-            else:
-                self.count += 1
-        elif self.mode == 'max':  # Maximizing metric
-            if value > self.current_best + self.threshold:
-                self.current_best = value
-                self.count = 0
-            else:
-                self.count += 1
-        # Check if patience is exceeded
-        if self.count >= self.patience:
-            self.should_stop = True
-    def reset(self):
-        self.count = 0
-        self.current_best = np.inf if self.mode == 'min' else -np.inf
-        self.should_stop = False
-    def to_str(self):
-        status = (
-            f"EarlyStop Status:\n"
-            f"  Mode: {'Minimize' if self.mode == 'min' else 'Maximize'}\n"
-            f"  Patience: {self.patience}\n"
-            f"  Threshold: {self.threshold:.3e}\n"
-            f"  Current Best: {self.current_best:.6f}\n"
-            f"  Consecutive Epochs Without Improvement: {self.count}\n"
-            f"  Stopping Triggered: {'Yes' if self.should_stop else 'No'}"
-        )
-        return status
-    def to_dict(self):
-        return {
-            'patience': self.patience,
-            'threshold': self.threshold,
-            'mode': self.mode,
-            'count': self.count,
-            'current_best': self.current_best,
-            'should_stop': self.should_stop,
-        }
-    @classmethod
-    def load_from_dict(cls, state_dict):
-        instance = cls(
-            patience=state_dict['patience'],
-            threshold=state_dict['threshold'],
-            mode=state_dict['mode']
-        )
-        instance.count = state_dict['count']
-        instance.current_best = state_dict['current_best']
-        instance.should_stop = state_dict['should_stop']
-        return instance
-def graph_augmentation(graph):
-    print("Augmenting Graph")
-    return

scripts/find_free_port.py DELETED Viewed

@@ -1,12 +0,0 @@
-# find_free_port.py
-def find_free_port():
-    import socket
-    from contextlib import closing
-    with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
-        s.bind(('', 0))
-        s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-        return str(s.getsockname()[1])
-if __name__ == "__main__":
-    print(find_free_port())

scripts/inference.py DELETED Viewed

@@ -1,289 +0,0 @@
-import sys
-import os
-file_path = os.getcwd()
-sys.path.append(file_path)
-import argparse
-import yaml
-import torch
-import dgl
-from dgl.data import DGLDataset
-from dgl.dataloading import GraphDataLoader
-from torch.utils.data import SubsetRandomSampler, SequentialSampler
-def my_error_handler(level, abort, location, msg):
-    # Log the error message to a file instead of printing
-    with open("error_log.txt", "a") as log_file:
-        log_file.write(f"Error in {location}: {msg}\n")
-    # Optionally, print the error message to the console
-    # print(f"Error in {location}: {msg}")
-    # Decide whether to abort based on the error level
-    if abort:
-        raise RuntimeError(f"Fatal error in {location}: {msg}")
-class CustomPreBatchedDataset(DGLDataset):
-    def __init__(self, start_dataset, batch_size, mask_fn=None, drop_last=False, shuffle=False, **kwargs):
-        self.start_dataset = start_dataset
-        self.batch_size = batch_size
-        self.mask_fn = mask_fn or (lambda x: torch.ones(len(x), dtype=torch.bool))
-        self.drop_last = drop_last
-        self.shuffle = shuffle
-        super().__init__(name=start_dataset.name + '_custom_prebatched', save_dir=start_dataset.save_dir)
-    def process(self):
-        mask = self.mask_fn(self.start_dataset)
-        indices = torch.arange(len(self.start_dataset))[mask]
-        print(f"Number of elements after masking: {len(indices)}")  # Debugging print
-        if self.shuffle:
-            sampler = SubsetRandomSampler(indices)
-        else:
-            sampler = SequentialSampler(indices)
-        self.dataloader = GraphDataLoader(
-            self.start_dataset,
-            sampler=sampler,
-            batch_size=self.batch_size,
-            drop_last=self.drop_last
-        )
-        print(f"Batch size set in DataLoader: {self.batch_size}")  # Debugging print
-    def __getitem__(self, idx):
-        if isinstance(idx, int):
-            idx = [idx]
-        sampler = SequentialSampler(idx)
-        dloader = GraphDataLoader(self.start_dataset, sampler=sampler, batch_size=self.batch_size, drop_last=False)
-        return next(iter(dloader))
-    def __len__(self):
-        return len(self.start_dataset)
-def include_config(conf):
-    if 'include' in conf:
-        for i in conf['include']:
-            with open(i) as f:
-                conf.update(yaml.load(f, Loader=yaml.FullLoader))
-        del conf['include']
-def load_config(config_file):
-    with open(config_file) as f:
-        conf = yaml.load(f, Loader=yaml.FullLoader)
-    include_config(conf)
-    return conf
-def main():
-    parser = argparse.ArgumentParser()
-    add_arg = parser.add_argument
-    add_arg('--config', type=str, required=True)
-    add_arg('--target', type=str, required=True)
-    add_arg('--destination', type=str, default='')
-    add_arg('--chunkno', type=int, default=0)
-    add_arg('--chunks', type=int, default=1)
-    add_arg('--write', action='store_true')
-    add_arg('--ckpt', type=int, default=-1)
-    add_arg('--clobber', action='store_true')
-    add_arg('--tree', type=str, default='')
-    add_arg('--branch_name', type=str, default='score')
-    args = parser.parse_args()
-    config = load_config(args.config)
-    if args.destination == '':
-        args.destination = os.path.join(config['Training_Directory'], 'inference/', os.path.split(args.target)[1])
-    else:
-        args.destination = args.destination
-    if not args.write:
-        args.destination = args.destination.replace('.root', '') + f'_chunk{args.chunkno}.npz'
-    if os.path.exists(args.destination):
-        print(f'File {args.destination} already exists.')
-        if args.clobber:
-            print('Clobbering.')
-        else:
-            print('Exiting.')
-            return
-    else:
-        print(f'Writing to {args.destination}')
-    import time
-    start  = time.time()
-    import ROOT
-    import torch
-    from array import array
-    import numpy as np
-    from root_gnn_base import batched_dataset as dataset
-    from root_gnn_base import utils
-    end = time.time()
-    print('Imports finished in {:.2f} seconds'.format(end - start))
-    start = time.time()
-    dset_config = config['Datasets'][list(config['Datasets'].keys())[0]]
-    if dset_config['class'] == 'LazyDataset':
-        dset_config['class'] = 'EdgeDataset'
-    elif dset_config['class'] == 'LazyMultiLabelDataset':
-        dset_config['class'] = 'MultiLabelDataset'
-    elif dset_config['class'] == 'PhotonIDDataset':
-        dset_config['class'] = 'UnlazyPhotonIDDataset'
-    elif dset_config['class'] == 'kNNDataset':
-        dset_config['class'] = 'UnlazyKNNDataset'
-    dset_config['args']['raw_dir'] = os.path.split(args.target)[0]
-    dset_config['args']['file_names'] = os.path.split(args.target)[1]
-    dset_config['args']['save'] = False
-    dset_config['args']['chunks'] = args.chunks
-    dset_config['args']['process_chunks'] = [args.chunkno,]
-    dset_config['args']['selections'] = []
-    dset_config['args']['save_dir'] = os.path.dirname(args.destination)
-    if args.tree != '':
-        dset_config['args']['tree_name'] = args.tree
-    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    dstart = time.time()
-    dset = utils.buildFromConfig(dset_config)
-    dend = time.time()
-    print('Dataset finished in {:.2f} seconds'.format(dend - dstart))
-    print(dset)
-    batch_size = config['Training']['batch_size']
-    lstart = time.time()
-    loader = CustomPreBatchedDataset(dset, batch_size)
-    loader.process()
-    # loader = dataset.PreBatchedDataset(dset, batch_size, shuffle=False, drop_last=False, save_to_disk=False, chunks = 1, num_workers=0)
-    lend = time.time()
-    print('Loader finished in {:.2f} seconds'.format(lend - lstart))
-    sample_graph, _, _, global_sample = loader[0]
-    print('dset length =', len(dset))
-    print('loader length =', len(loader))
-    model = utils.buildFromConfig(config['Model'], {'sample_graph' : sample_graph, 'sample_global': global_sample}).to(device)
-    if args.ckpt < 0:
-        ep, checkpoint = utils.get_last_epoch(config, args.ckpt, device=device)
-    else:
-        ep, checkpoint = utils.get_specific_epoch(config, args.ckpt, device=device)
-    #Bad filler for models which were compiled. Have to remove this prefix.
-    mds_copy = {}
-    for key in checkpoint['model_state_dict'].keys():
-        newkey = key.replace('module.', '')
-        newkey = newkey.replace('_orig_mod.', '')
-        mds_copy[newkey] = checkpoint['model_state_dict'][key]
-    model.load_state_dict(mds_copy)
-    model.eval()
-    end = time.time()
-    print('Model and dataset finished in {:.2f} seconds'.format(end - start))
-    print('Starting inference')
-    start = time.time()
-    finish_fn = torch.nn.Sigmoid()
-    if 'Loss' in config:
-        finish_fn = utils.buildFromConfig(config['Loss']['finish'])
-    scores = []
-    labels = []
-    tracking_info = []
-    ibatch = 0
-    for batch, label, track, globals in loader.dataloader:
-        batch = batch.to(device)
-        pred = model(batch, globals.to(device))
-        ibatch += 1
-        # scores.append(finish_fn(pred).detach().cpu().numpy())
-        if (finish_fn.__class__.__name__ == "ContrastiveClusterFinish"):
-            scores.append(pred.detach().cpu().numpy())
-        else:
-            scores.append(finish_fn(pred).detach().cpu().numpy())
-        labels.append(label.detach().cpu().numpy())
-        tracking_info.append(track.detach().cpu().numpy())
-    # for batch, label, track, globals in loader:
-    #     batch = batch.to(device)
-    #     pred = model(batch, globals.to(device))
-    #     print(f'Batch size: {batch.batch_size if hasattr(batch, "batch_size") else "Unavailable"}')
-    #     print(f'Prediction shape: {pred.shape}')
-    #     ibatch += 1
-    #     scores.append(finish_fn(pred).detach().cpu().numpy())
-    #     labels.append(label.detach().cpu().numpy())
-    #     tracking_info.append(track.detach().cpu().numpy())
-    #     exit()
-    score_size = scores[0].shape[1]
-    scores = np.concatenate(scores)
-    labels = np.concatenate(labels)
-    tracking_info = np.concatenate(tracking_info)
-    end = time.time()
-    print('Inference finished in {:.2f} seconds'.format(end - start))
-    if args.write:
-        # ROOT.SetErrorHandler(my_error_handler)
-        ROOT.gErrorIgnoreLevel = ROOT.kFatal
-        # ROOT.gSystem.RedirectOutput("/dev/null", "w")
-        # Open the original ROOT file
-        infile = ROOT.TFile.Open(args.target)
-        tree = infile.Get(dset_config['args']['tree_name'])
-        # Create the destination directory if it doesn't exist
-        os.makedirs(os.path.split(args.destination)[0], exist_ok=True)
-        # Create a new ROOT file to write the modified tree
-        outfile = ROOT.TFile.Open(args.destination, 'RECREATE')
-        # Clone the original tree, including data
-        outtree = tree.CloneTree(0)  # Clone all entries
-        # Determine if scores is a list of single values or vectors
-        from ROOT import std
-        if isinstance(scores[0], (list, tuple, np.ndarray)):  # Check if scores contains vectors
-            # Create a new branch for scores as a vector of floats
-            scores_branch_vec = std.vector('float')()
-            outtree.Branch(args.branch_name, scores_branch_vec)
-            is_vector = True
-        else:  # Scores contains single values
-            # Create a new branch for scores as a single float
-            score_branch_arr = array('f', [0])
-            outtree.Branch(args.branch_name, score_branch_arr, f'{args.branch_name}/F')
-            is_vector = False
-        # Write scores to the new branch
-        print(f'Writing {len(scores)} scores to tree')
-        for i in range(tree.GetEntries()):
-            tree.GetEntry(i)
-            if is_vector:
-                # Clear the vector
-                scores_branch_vec.clear()
-                # Add all elements from scores[i] to the vector
-                for value in scores[i]:
-                    scores_branch_vec.push_back(float(value))  # Use push_back to add elements one by one
-            else:
-                # Fill the score branch with the current single score
-                score_branch_arr[0] = float(scores[i])  # Ensure the value is a float
-            # Fill the output tree with all branches, including the new scores branch
-            outtree.Fill()
-        # Write the modified tree to the new file
-        print(f'Writing to file {args.destination}')
-        print(f'Input entries: {tree.GetEntries()}, Output entries: {outtree.GetEntries()}')
-        outtree.Write()
-        outfile.Close()
-        infile.Close()
-    else:
-        os.makedirs(os.path.split(args.destination)[0], exist_ok=True)
-        np.savez(args.destination, scores=scores, labels=labels, tracking_info=tracking_info)
-if __name__ == '__main__':
-    main()

scripts/prep_data.py DELETED Viewed

@@ -1,43 +0,0 @@
-import sys
-import os
-file_path = os.getcwd()
-sys.path.append(file_path)
-import root_gnn_base.utils as utils
-import argparse
-from root_gnn_base.batched_dataset import PreBatchedDataset
-from root_gnn_base.batched_dataset import LazyPreBatchedDataset
-def main():
-    parser = argparse.ArgumentParser()
-    add_arg = parser.add_argument
-    add_arg('--config', type=str, required=True)
-    add_arg('--dataset', type=str, required=True)
-    add_arg('--chunk', type=int, default=0)
-    add_arg('--shuffle_mode', action='store_true', help='Shuffle the dataset before training.')
-    args = parser.parse_args()
-    config = utils.load_config(args.config)
-    dset_config = config['Datasets'][args.dataset]
-    batch_size = config['Training']['batch_size']
-    if not args.shuffle_mode:
-        dset = utils.buildFromConfig(dset_config, {'process_chunks': [args.chunk,]})
-    else:
-        dset = utils.buildFromConfig(dset_config)
-        if 'batch_size' in dset_config:
-            batch_size = dset_config['batch_size']
-        shuffle_chunks = dset_config.get('shuffle_chunks', 10)
-        padding_mode = dset_config.get('padding_mode', 'STEPS')
-        fold_conf = dset_config["folding"]
-        print(f"shuffle_chunks = {shuffle_chunks}, args.chunk = {args.chunk}, padding_mode = {padding_mode}")
-        if dset_config["class"] == "LazyMultiLabelDataset":
-            LazyPreBatchedDataset(start_dataset = dset, batch_size = batch_size, mask_fn = utils.fold_selection(fold_conf, "train"), suffix = utils.fold_selection_name(fold_conf, "train"), chunks = shuffle_chunks, chunkno = args.chunk, padding_mode = padding_mode)
-            LazyPreBatchedDataset(start_dataset = dset, batch_size = batch_size, mask_fn = utils.fold_selection(fold_conf, "test"),  suffix = utils.fold_selection_name(fold_conf, 'test'), chunks = shuffle_chunks, chunkno = args.chunk, padding_mode = padding_mode)
-        else:
-            PreBatchedDataset(dset, batch_size, utils.fold_selection(fold_conf, "train"), suffix = utils.fold_selection_name(fold_conf, "train"), chunks = shuffle_chunks, chunkno = args.chunk, padding_mode = padding_mode)
-            PreBatchedDataset(dset, batch_size, utils.fold_selection(fold_conf, "test"),  suffix = utils.fold_selection_name(fold_conf, 'test'), chunks = shuffle_chunks, chunkno = args.chunk, padding_mode = padding_mode)
-if __name__ == "__main__":
-    main()

scripts/training_script.py DELETED Viewed

@@ -1,755 +0,0 @@
-import argparse
-import time
-import datetime
-import yaml
-import os
-start_time = time.time()
-import dgl
-import torch
-import torch.nn as nn
-import sys
-file_path = os.getcwd()
-sys.path.append(file_path)
-import root_gnn_base.batched_dataset as datasets
-from root_gnn_base import utils
-import root_gnn_base.custom_scheduler as lr_utils
-from models import GCN
-import numpy as np
-from sklearn.metrics import roc_auc_score
-import resource
-import gc
-import torch.distributed as dist
-import torch.multiprocessing as mp
-from torch.utils.data.distributed import DistributedSampler
-from torch.nn.parallel import DistributedDataParallel as DDP
-print("import time: {:.4f} s".format(time.time() - start_time))
-def mem():
-    print(f'Current memory usage: {resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024 / 1024} GB')
-def gpu_mem():
-    print()
-    print('GPU Memory Usage:')
-    sum = 0
-    # for obj in gc.get_objects():
-    #     try:
-    #         if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)):
-    #             print(obj.numel() if len(obj.size()) > 0 else 0, type(obj), obj.size())
-    #             sum += obj.numel() if len(obj.size()) > 0 else 0
-    #     except:
-    #         pass
-    print(f'Current GPU memory usage: {torch.cuda.memory_allocated() / 1024 / 1024 / 1024} GB')
-    print(f'Current GPU cache usage: {torch.cuda.memory_cached() / 1024 / 1024 / 1024} GB')
-    print(f'Current GPU max memory usage: {torch.cuda.max_memory_allocated() / 1024 / 1024 / 1024} GB')
-    print(f'Current GPU max cache usage: {torch.cuda.max_memory_cached() / 1024 / 1024 / 1024} GB')
-    print(f'Numel in current tensors: {sum}')
-    mem()
-## epoch stores the epoch number I want to evaluate the model at
-def evaluate(val_loaders, model, config, device, epoch = -1):
-    print("Evaluating")
-    if (epoch != -1) :
-        print(f"Evalulating at epoch {epoch}")
-        last_ep, checkpoint = utils.get_specific_epoch(config, epoch, from_ryan=False)
-        print(f"Evaluating at epoch = {last_ep}")
-    else:
-        starting_epoch = 0
-        last_ep, checkpoint = utils.get_last_epoch(config)
-    if checkpoint != None:
-        ep = last_ep
-        state_dict = checkpoint['model_state_dict']
-        new_state_dict = {}
-        for k, v in state_dict.items():
-            new_key = k.replace('module.', '')
-            new_state_dict[new_key] = v
-        model.load_state_dict(new_state_dict)
-        starting_epoch = checkpoint['epoch'] + 1
-        print(f"Loaded epoch {checkpoint['epoch']} from checkpoint")
-    if 'Loss' not in config:
-        loss_fcn = nn.BCEWithLogitsLoss()
-    else:
-        loss_fcn = utils.buildFromConfig(config['Loss'])
-    if len(val_loaders) == 0:
-        return "No validation data"
-    start = time.time()
-    scores = []
-    labels = []
-    weights = []
-    before_decoder = []
-    after_decoder = []
-    tracking = []
-    batch_size = config["Training"]["batch_size"]
-    batch_limit = int(np.ceil(1e5 / batch_size))
-    model.eval()
-    with torch.no_grad():
-        for loader in val_loaders:
-            batch_count = 0
-            for batch, label, track, global_feats in loader:
-                #Don't use compiled model for testing since we can't control the batch size.
-                #We could before, but it assumes each dataset has the same number of batches...
-                before_global_decoder, after_global_decoder, after_classify = model.representation(batch.to(device), global_feats.to(device))
-                scores.append(after_classify.to("cpu"))
-                before_decoder.append(before_global_decoder.to("cpu"))
-                after_decoder.append(after_global_decoder.to("cpu"))
-                labels.append(label.to("cpu"))
-                weights.append(track[:,1].to("cpu"))
-                tracking.append(track.to("cpu"))
-                batch_count += 1
-                if batch_count >= batch_limit:
-                    break
-    if scores == []: #If validation set is empty.
-        return
-    logits = torch.concatenate(scores)
-    scores = torch.sigmoid(logits)
-    labels = torch.concatenate(labels)
-    weights = torch.concatenate(weights)
-    before_decoder = torch.concatenate(before_decoder)
-    after_decoder = torch.concatenate(after_decoder)
-    tracking = torch.concatenate(tracking)
-    logits = logits.to("cpu").numpy()
-    scores = scores.to("cpu").numpy()
-    labels = labels.to("cpu").numpy()
-    before_decoder = before_decoder.to("cpu").numpy()
-    after_decoder = after_decoder.to("cpu").numpy()
-    tracking = tracking.to("cpu").numpy()
-    # Save the NumPy arrays to a .npz file
-    outfile = f"{config['Training_Directory']}/evaluation_{epoch}.npz"
-    np.savez(outfile, logits=logits, scores=scores, labels=labels, before_decoder=before_decoder, after_decoder=after_decoder, tracking=tracking)
-    print(f"saved scores to {outfile}")
-    return
-def train(train_loaders, test_loaders, model, device, config, args, rank):
-    nocompile = args.nocompile
-    restart = args.restart
-    # define train/val samples, loss function and optimizer
-    if 'Loss' not in config:
-        loss_fcn = nn.BCEWithLogitsLoss()
-        finish_fn = torch.nn.Sigmoid()
-    else:
-        loss_fcn = utils.buildFromConfig(config['Loss'])
-        finish_fn = utils.buildFromConfig(config['Loss']['finish'])
-    optimizer = torch.optim.Adam(model.parameters(), lr=config['Training']['learning_rate'])
-    if 'gamma' in config['Training']:
-        gamma = config['Training']['gamma']
-    else:
-        gamma = 1
-    if 'dynamic_lr' in config['Training']:
-        factor = config['Training']['dynamic_lr']['factor']
-        patience = config['Training']['dynamic_lr']['patience']
-    else:
-        factor = 1
-        patience = 1
-    early_termination = utils.EarlyStop()
-    if 'early_termination' in config['Training']:
-        early_termination.patience = config['Training']['early_termination']['patience']
-        early_termination.threshold = config['Training']['early_termination']['threshold']
-        early_termination.mode = config['Training']['early_termination']['mode']
-    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = gamma)
-    #scheduler_reset =  custom_scheduler.Dynamic_LR(optimizer, 'max', factor = factor, patience = patience)
-    custom_scheduler = None
-    if ('custom_scheduler' in config['Training']):
-        run_time_args = {}
-        scheduler_class = config['Training']['custom_scheduler']['class']
-        if (scheduler_class == 'Dynamic_LR' or
-                scheduler_class == 'Dynamic_LR_AND_Partial_Reset' or
-                    scheduler_class == 'Dynamic_LR_AND_Full_Reset'):
-            run_time_args={'optimizer': optimizer}
-        custom_scheduler = utils.buildFromConfig(config['Training']['custom_scheduler'], run_time_args=run_time_args)
-    starting_epoch = 0
-    if not restart:
-        last_ep, checkpoint = utils.get_last_epoch(config)
-        if checkpoint != None:
-            ep = starting_epoch - 1
-            if nocompile:
-                new_state_dict = {}
-                for k, v in checkpoint['model_state_dict'].items():
-                    new_key = k.replace('module.', '')
-                    new_state_dict[new_key] = v
-                checkpoint['model_state_dict'] = new_state_dict
-                if (args.multinode or args.multigpu):
-                    new_state_dict = {}
-                    for k, v in checkpoint['model_state_dict'].items():
-                        new_key = 'module.' + k
-                        new_state_dict[new_key] = v
-                    checkpoint['model_state_dict'] = new_state_dict
-                model.load_state_dict(checkpoint['model_state_dict'])
-            else:
-                model._orig_mod.load_state_dict(checkpoint['model_state_dict'])
-            optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
-            starting_epoch = checkpoint['epoch'] + 1
-            if 'early_stop' in checkpoint:
-                early_termination = utils.EarlyStop.load_from_dict(checkpoint['early_stop'])
-                print(early_termination.to_str())
-                print("EarlyStop state restored successfully.")
-                if early_termination.should_stop:
-                    print(f"Early Termination at Epoch {epoch}")
-                    return
-            else:
-                print("'early_stop' not found in checkpoint. Initializing a new EarlyStop instance.")
-                early_termination = utils.EarlyStop()
-            print(f"Loaded epoch {checkpoint['epoch']} from checkpoint")
-        log = open(config['Training_Directory'] + '/training.log', 'a', buffering=1)
-    else:
-        log = open(config['Training_Directory'] + '/training.log', 'w', buffering=1)
-    train_cyclers = []
-    for loader in train_loaders:
-        train_cyclers.append(utils.cycler((loader)))
-    if args.savecache:
-        max_batch = [None,] * len(train_loaders)
-        for dset_i, loader in enumerate(train_loaders):
-            mbs = 0
-            for batch_i, batch in enumerate(loader):
-                if batch[0].num_nodes() > mbs:
-                    mbs = batch[0].num_nodes()
-                    max_batch[dset_i] = batch[0]
-                    print(f'Max batch size for dataset {dset_i}: {mbs}')
-        big_batch = dgl.batch(max_batch).to(device)
-        with torch.no_grad():
-            model(big_batch)
-    cumulative_times = [0,0,0,0,0]
-    log.write(f'Training {config["Training_Name"]} {datetime.datetime.now()} \n')
-    print(f"Starting training for {config['Training']['epochs']} epochs")
-    if hasattr(train_loaders[0].dataset, 'padding_mode'):
-        is_padded = train_loaders[0].dataset.padding_mode != 'NONE'
-        if (train_loaders[0].dataset.padding_mode == 'NODE'):
-            is_padded = False
-    else:
-        is_padded = False
-    lr_utils.print_LR(optimizer)
-    # torch.save({
-    #             'epoch': 0,
-    #             'model_state_dict': model.state_dict(),
-    #             'optimizer_state_dict': optimizer.state_dict(),
-    #             }, os.path.join(config['Training_Directory'], f"model_epoch_{0}.pt"))
-    # exit()
-    # training loop
-    # gpu_mem()
-    for epoch in range(starting_epoch, config['Training']['epochs']):
-        start = time.time()
-        run = start
-        if (args.multigpu or args.multinode):
-            dist.barrier()
-        if (epoch == 2):
-            # torch.cuda.cudart().cudaProfilerStart()
-            pass
-        # training
-        model.train()
-        ibatch = 0
-        total_loss = 0
-        for batched_graph, labels, _, global_feats in train_loaders[0]:
-            # # need to fix padded case
-            # if is_padded:
-            #     tglobals.append(torch.zeros(1, len(global_feats[0])))
-            batch_start = time.time()
-            logits = torch.tensor([])
-            tlabels = torch.tensor([])
-            batch_lengths = []
-            for cycler in train_cyclers:
-                graph, label, _, global_feats = next(cycler)
-                graph = graph.to(device)
-                label = label.to(device)
-                global_feats = global_feats.to(device)
-                if is_padded: #Padding the globals to match padded graphs.
-                    global_feats = torch.concatenate((global_feats, torch.zeros(1, len(global_feats[0])).to(device)))
-                load = time.time()
-                if (len(logits) == 0):
-                    logits = model(graph, global_feats)
-                    tlabels = label
-                else:
-                    logits = torch.concatenate((logits, model(graph, global_feats)), dim=0)
-                    tlabels = torch.concatenate((tlabels, label), dim=0)
-                batch_lengths.append(logits.shape[0] - 1)
-            if is_padded:
-                keepmask = torch.full_like(logits[:,0], True, dtype=torch.bool)
-                keepmask[batch_lengths] = False
-                logits = logits[keepmask]
-            tlabels = tlabels.to(torch.float)
-            if logits.shape[1] == 1 and loss_fcn.__class__.__name__ == 'BCEWithLogitsLoss':
-                logits = logits[:,0]
-                tlabels = tlabels.to(torch.float)
-            if loss_fcn.__class__.__name__ == 'CrossEntropyLoss':
-                tlabels = tlabels.to(torch.long)
-            loss = loss_fcn(logits, tlabels.to(device)) # changed logits from logits[:,0] and left labels as int for multiclass. Does this break binary? Yes.
-            optimizer.zero_grad()
-            loss.backward()
-            optimizer.step()
-            total_loss += loss.detach().cpu().item()
-            ibatch += 1
-            cumulative_times[0] += batch_start - run
-            cumulative_times[1] += load - batch_start
-            run = time.time()
-            cumulative_times[2] += run - load
-            if ibatch % 1000 == 0:
-                print(f'Batch {ibatch} out of {len(train_loaders[0])}', end='\r')
-                # gpu_mem()
-        if (args.multigpu):
-            print(f'Rank {rank} Epoch Done.')
-        elif (args.multinode):
-            print(f'Rank {args.global_rank} Epoch Done.')
-        else:
-            print("Epoch Done.")
-        # validation
-        scores = []
-        labels = []
-        weights = []
-        model.eval()
-        with torch.no_grad():
-            for loader in test_loaders:
-                for batch, label, track, global_feats in loader:
-                    #Don't use compiled model for testing since we can't control the batch size.
-                    #We could before, but it assumes each dataset has the same number of batches...
-                    if is_padded:
-                        global_feats = torch.cat([global_feats, torch.zeros(1, len(global_feats[0]))])
-                    if nocompile:
-                        batch_scores = model(batch.to(device), global_feats.to(device))
-                    else:
-                        batch_scores = model._orig_mod(batch.to(device), global_feats.to(device))
-                    if is_padded:
-                        scores.append(batch_scores[:-1,:])
-                    else:
-                        scores.append(batch_scores)
-                    labels.append(label)
-                    weights.append(track[:,1])
-        eval_end = time.time()
-        cumulative_times[3] += eval_end - run
-        if scores == []: #If validation set is empty.
-            continue
-        logits = torch.concatenate(scores).to(device)
-        labels = torch.concatenate(labels).to(device)
-        weights = torch.concatenate(weights).to(device)
-        if (args.multigpu or args.multinode):
-            gathered_logits = [torch.zeros_like(logits) for _ in range(dist.get_world_size())]
-            gathered_labels = [torch.zeros_like(labels) for _ in range(dist.get_world_size())]
-            gathered_weights = [torch.zeros_like(weights) for _ in range(dist.get_world_size())]
-        if (args.multigpu or args.multinode):
-            dist.barrier()
-            if (args.multigpu and rank != 0) or (args.multinode and args.global_rank != 0):
-                dist.gather(logits, dst=0)
-                dist.gather(labels, dst=0)
-                dist.gather(weights, dst=0)
-                continue
-            else:
-                dist.gather(logits, gather_list=gathered_logits)
-                dist.gather(labels, gather_list=gathered_labels)
-                dist.gather(weights, gather_list=gathered_weights)
-            logits = torch.concatenate(gathered_logits)
-            labels = torch.concatenate(gathered_labels)
-            weights = torch.concatenate(gathered_weights)
-        wgt_mask = weights > 0
-        print(f"Num batches trained = {ibatch}")
-        #Note: This section is a bit ugly. Very conditional. Should maybe config defined behavior?
-        if (loss_fcn.__class__.__name__ == "ContrastiveClusterLoss"):
-            scores = logits
-            preds = scores
-            accuracy = 0
-            test_auc = 0
-            acc = 0
-            contrastive_cluster_loss = finish_fn(logits)
-        elif (loss_fcn.__class__.__name__ == "MultiLabelLoss"):
-            scores = finish_fn(logits)
-            preds = torch.round(scores)
-            multilabel_accuracy = []
-            threshold = 0.1  # 10% threshold
-            for i in range(len(labels[0])):
-                # accurate_count = torch.sum(torch.abs(preds[:, i].to("cpu") - labels[:, i].to("cpu")) / labels[:, i].to("cpu") <= threshold)
-                # multilabel_accruacy.append(accurate_count / len(labels))
-                multilabel_accuracy.append(torch.sum(preds[:, i].to("cpu") == labels[:, i].to("cpu")) / len(labels))
-            test_auc = 0
-            acc = np.mean(multilabel_accuracy)
-        elif logits.shape[1] == 1 and loss_fcn.__class__.__name__ == 'BCEWithLogitsLoss': #Proxy for binary classification.
-            test_auc = 0
-            acc = 0
-            logits = logits[:,0]
-            scores = finish_fn(logits)
-            labels =labels.to(torch.float)
-            preds = scores > 0.5
-            test_auc = roc_auc_score(labels[wgt_mask].to("cpu") == 1, scores[wgt_mask].to("cpu"), sample_weight=weights[wgt_mask].to("cpu"))
-            acc = torch.sum(preds.to("cpu") == labels.to("cpu")) / len(labels)
-        elif logits.shape[1] == 1 and loss_fcn.__class__.__name__ == 'MSELoss':
-            logits = logits[:,0]
-            scores = finish_fn(logits)
-            labels = labels.to(torch.float)
-            acc = 0
-            test_auc = 0
-        else:
-            preds = torch.argmax(logits, dim=1)
-            scores = finish_fn(logits)
-            if labels.dim() == 1: #Multi-class
-                acc = torch.sum(preds.to("cpu") == labels.to("cpu")) / len(labels) #TODO: Make each class weighted equally?
-                labels = labels.to("cpu")
-                weights = weights.to("cpu")
-                logits = logits.to("cpu")
-                wgt_mask = wgt_mask.to("cpu")
-                labels_onehot = np.zeros((len(labels), len(scores[0])))
-                labels_onehot[np.arange(len(labels)), labels] = 1
-                try:
-                    #test_auc = roc_auc_score(labels[wgt_mask].to("cpu") == 1, scores[wgt_mask].to("cpu"), multi_class='ovr', sample_weight=weights[wgt_mask].to("cpu"))
-                    test_auc = roc_auc_score(labels_onehot[wgt_mask], scores[wgt_mask].to("cpu"), multi_class='ovr', sample_weight=weights[wgt_mask].to("cpu"))
-                except ValueError:
-                    test_auc = np.nan
-            else: #Multi-loss
-                acc = torch.sum(preds.to("cpu") == labels[:,0].to("cpu")) / len(labels)
-                try:
-                    test_auc = roc_auc_score(labels[:,0][wgt_mask].to("cpu") == 1, scores[wgt_mask].to("cpu"), multi_class='ovr', sample_weight=weights[wgt_mask].to("cpu"))
-                except ValueError:
-                    test_auc = np.nan
-        # print(f"logits = {logits[:10]}")
-        # print(f"preds = {preds[:2]}")
-        # print(f"labels = {labels[:10]}")
-        # print(f"len(Unique logits) = {len(torch.unique(logits))}")
-        # print(f"Average of labels = {torch.mean(labels)}")
-        # print(f"unique logits = {torch.unique(logits)[0]:.4f}, {torch.unique(logits)[-1]:.4f}")
-        if (loss_fcn.__class__.__name__ == "MultiLabelLoss"):
-            multilabel_log_str = "MultiLabel_Accuracy "
-            for accuracy in multilabel_accuracy:
-                multilabel_log_str += f" | {accuracy:.4f}"
-            log.write(multilabel_log_str + '\n')
-            print(multilabel_log_str, flush=True)
-        elif (loss_fcn.__class__.__name__ == "ContrastiveClusterLoss"):
-            contrastive_cluster_log_str = "ContrastiveClusterLoss "
-            contrastive_cluster_log_str += f"Contrastive Loss: {contrastive_cluster_loss[0]:.4f}, Clustering Loss: {contrastive_cluster_loss[1]:.4f}, Variance Loss: {contrastive_cluster_loss[2]:.4f}"
-            log.write(contrastive_cluster_log_str + '\n')
-            print(contrastive_cluster_log_str, flush=True)
-        # test_loss = loss_fcn(logits, labels.to(device))
-        test_loss = loss_fcn(logits, labels)
-        end = time.time()
-        log_str = "Epoch {:05d} | LR {:.4e} | Loss {:.4f} | Accuracy {:.4f} | Test_Loss {:.4f} | Test_AUC {:.4f} | Time {:.4f} s".format(
-                epoch, optimizer.param_groups[0]['lr'], total_loss/ibatch, acc, test_loss, test_auc, end - start
-        )
-        log.write(log_str + '\n')
-        print(log_str, flush=True)
-        state_dict = model.state_dict()
-        if not nocompile:
-            state_dict = model._orig_mod.state_dict()
-        new_state_dict = {}
-        for k, v in state_dict.items():
-            new_key = k.replace('module.', '')
-            new_state_dict[new_key] = v
-        state_dict = new_state_dict
-        # print('Testing done')
-        # gpu_mem()
-        if epoch == 2:
-            # torch.cuda.cudart().cudaProfilerStop()
-            pass
-        torch.save({
-                'epoch': epoch,
-                'model_state_dict': state_dict,
-                'optimizer_state_dict': optimizer.state_dict(),
-                'early_stop': early_termination.to_dict()
-                }, os.path.join(config['Training_Directory'], f"model_epoch_{epoch}.pt"))
-        np.savez(os.path.join(config['Training_Directory'], f'model_epoch_{epoch}.npz'), scores=scores.to("cpu"), labels=labels.to("cpu"))
-        save_end = time.time()
-        cumulative_times[4] += save_end - eval_end
-        early_termination.update(test_loss)
-        if early_termination.should_stop:
-            log_str = f"Early Termination at Epoch {epoch}"
-            log.write(log_str + "\n")
-            print(log_str)
-            log_str = early_termination.to_str()
-            log.write(log_str + "\n")
-            print(log_str)
-            break
-        if (custom_scheduler):
-            custom_scheduler.step(model, {'test_auc':test_auc})
-        scheduler.step()
-    print(f"Load: {cumulative_times[0]:.4f} s")
-    print(f"Batch: {cumulative_times[1]:.4f} s")
-    print(f"Train: {cumulative_times[2]:.4f} s")
-    print(f"Eval: {cumulative_times[3]:.4f} s")
-    print(f"Save: {cumulative_times[4]:.4f} s")
-    log.close()
-def find_free_port():
-    import socket
-    from contextlib import closing
-    with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
-        s.bind(('', 0))
-        s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-        return str(s.getsockname()[1])
-def init_process_group(world_size, rank, port):
-    os.environ['MASTER_ADDR'] = 'localhost'
-    # os.environ['MASTER_PORT'] = find_free_port()
-    os.environ['MASTER_PORT'] = port
-    dist.init_process_group(
-        backend="nccl",  # change to 'nccl' for multiple GPUs (other was gloo)
-        init_method='env://',
-        world_size=world_size,
-        rank=rank,
-        timeout=datetime.timedelta(seconds=300),
-    )
-def main(rank=0, args=None, world_size=1, port=24500, seed=12345):
-    #Prevent simultaneous file access
-    #sleep_time = 120 * rank
-    #time.sleep(sleep_time)
-    #Load config file
-    config = utils.load_config(args.config)
-    if (args.directory):
-        print(f"New training directory: { config['Training_Directory'] + args.directory}")
-        config['Training_Directory'] = config['Training_Directory'] + args.directory
-    if not os.path.exists(config['Training_Directory']):
-        os.makedirs(config['Training_Directory'], exist_ok=True)
-    with open(config['Training_Directory'] + '/config.yaml', 'w') as f:
-        yaml.dump(config, f)
-    batch_size = config["Training"]["batch_size"]
-    if(args.plot):
-        rl = utils.read_log(config)
-        utils.plot_log(rl, config['Training_Directory'] + '/training.png')
-        print('Log at ' + config['Training_Directory'] + '/training.log')
-        print('Plotted at ' + config['Training_Directory'] + '/training.png')
-        exit()
-    if (args.multigpu):
-        print(f"Setting up multigpu")
-        start_time = time.time()
-        init_process_group(world_size, rank, port)
-        print("multigpu setup time: {:.4f} s".format(time.time() - start_time))
-        device = torch.device(f'cuda:{rank}')
-        torch.cuda.device(device)
-    elif (args.multinode):
-        device = torch.device(f'cuda:{rank}')
-        torch.cuda.device(device)
-        print(f"global rank = {args.global_rank}, local rank = {rank}, device = {device}")
-    else:
-        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    if (args.cpu):
-        print(f"Using CPU")
-        device = "cpu"
-    train_loaders = []
-    test_loaders = []
-    val_loaders = []
-    load_start = time.time()
-    torch.backends.cuda.matmul.allow_tf32 = True
-    ldr_type = datasets.LazyPreBatchedDataset if args.lazy else datasets.PreBatchedDataset
-    #Load datasets
-    if (pargs.statistics):
-        pargs.statistics = int(pargs.statistics)
-        print(f"Training Dataset Size: {pargs.statistics}")
-        num_batches = int(np.ceil(pargs.statistics / batch_size))
-        np.random.seed(pargs.seed)
-    for dset_conf in config["Datasets"]:
-        dset = utils.buildFromConfig(config["Datasets"][dset_conf])
-        if 'batch_size' in config["Datasets"][dset_conf]:
-            batch_size = config["Datasets"][dset_conf]['batch_size']
-        fold_conf = config["Datasets"][dset_conf]["folding"]
-        shuffle_chunks = config["Datasets"][dset_conf].get("shuffle_chunks", 10)
-        padding_mode = config["Datasets"][dset_conf].get("padding_mode", "STEPS")
-        mask_fn = utils.fold_selection(fold_conf, "train")
-        if args.preshuffle:
-            # ldr = ldr_type(start_dataset=dset, batch_size=batch_size, mask_fn=mask_fn, suffix = utils.fold_selection_name(fold_conf, 'train'), chunks = shuffle_chunks, padding_mode = padding_mode, use_ddp = args.multigpu, rank=rank, world_size=world_size)
-            ldr = ldr_type(start_dataset=dset, batch_size=batch_size, mask_fn=mask_fn, suffix = utils.fold_selection_name(fold_conf, 'train'), chunks = shuffle_chunks, padding_mode = padding_mode)
-            gsamp, _, _, global_samp = ldr[0]
-            sampler = None
-            if (pargs.statistics):
-                sampler = np.random.choice(range(len(ldr)), size=num_batches)
-            if (args.multigpu):
-                sampler = DistributedSampler(ldr, num_replicas=world_size, rank=rank, shuffle=False, drop_last=True)
-                # num_batches = len(ldr)
-                # sampler = list(sampler)
-                # if (sampler[0] >= num_batches % world_size):
-                #     sampler.pop()
-            if (args.multinode):
-                sampler = DistributedSampler(ldr, num_replicas=world_size, rank=pargs.global_rank, shuffle=False, drop_last=True)
-            train_loaders.append(torch.utils.data.DataLoader(ldr, batch_size = None, num_workers = 0, sampler = sampler))
-            sampler = None
-            ldr = ldr_type(start_dataset=dset, batch_size=batch_size, mask_fn=mask_fn, suffix = utils.fold_selection_name(fold_conf, 'test'), chunks = shuffle_chunks, padding_mode = padding_mode)
-            if (args.multigpu):
-                sampler = DistributedSampler(ldr, num_replicas=world_size, rank=rank, shuffle=False, drop_last=True)
-                # num_batches = len(ldr)
-                # sampler = list(sampler)
-                # if (rank >= num_batches % world_size):
-                #     sampler.pop()
-            if (args.multinode):
-                sampler = DistributedSampler(ldr, num_replicas=world_size, rank=pargs.global_rank, shuffle=False, drop_last=True)
-            test_loaders.append(torch.utils.data.DataLoader(ldr, batch_size = None, num_workers = 0, sampler=sampler))
-            if "validation" in fold_conf:
-                val_loaders.append(torch.utils.data.DataLoader((ldr_type(start_dataset=dset, batch_size=batch_size, mask_fn=utils.fold_selection(fold_conf, "validation"), suffix = utils.fold_selection_name(fold_conf, 'validation'), chunks = shuffle_chunks, padding_mode = padding_mode, rank=rank, world_size=1)), batch_size = None, num_workers = 0, sampler = sampler))
-            else:
-                print("No validation set for dataset ", dset_conf)
-        else:
-            train_loaders.append(datasets.GetBatchedLoader(dset, batch_size, utils.fold_selection(fold_conf, "train")))
-            gsamp, _, _, global_samp = dset[0]
-            test_loaders.append(datasets.GetBatchedLoader(dset, batch_size, utils.fold_selection(fold_conf, "test")))
-            if "validation" in fold_conf:
-                val_loaders.append(datasets.GetBatchedLoader(dset, batch_size, utils.fold_selection(fold_conf, "validation")))
-            else:
-                print("No validation set for dataset ", dset_conf)
-    load_end = time.time()
-    print("Load time: {:.4f} s".format(load_end - load_start))
-    model = utils.buildFromConfig(config["Model"], {'sample_graph': gsamp, 'sample_global': global_samp, 'seed': seed}).to(device)
-    if not args.nocompile:
-        model = torch.compile(model)
-    if args.multigpu:
-        print(f"Trying to create DDP model")
-        start_time = time.time()
-        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[device])
-        print("model creation time: {:.4f} s".format(time.time() - start_time))
-    if (args.multinode):
-        print(f"Trying to create DDP model")
-        start_time = time.time()
-        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[device])
-        print("model creation time: {:.4f} s".format(time.time() - start_time))
-    # total_params = 0
-    # for param_dict in model.parameters():
-    #     for param in param_dict['params']:
-    #         if param.requires_grad:
-    #             total_params += param.numel()
-    # print(f"Number of trainable parameters = {total_params}")
-    if(type(model) == GCN.Clustering):
-        print("clustering")
-    if args.evaluate != None:
-        evaluate(test_loaders, model, config, device, args.evaluate)
-        exit()
-    # model training
-    print("Training...")
-    gpu_mem()
-    train(train_loaders, test_loaders, model, device, config, args, rank)
-    # test the model
-    # print("Testing...")
-    # evaluate(val_loaders, model, config, device)
-    # if args.multigpu or args.multinode:
-    #     dist.destroy_process_group()
-    # if rank == 0:
-    #     rl = utils.read_log(config)
-    #     utils.plot_log(rl, config['Training_Directory'] + '/training.png')
-    #     print('Log at ' + config['Training_Directory'] + '/training.log')
-    #     print('Plotted at ' + config['Training_Directory'] + '/training.png')
-if __name__ == "__main__":
-    #Handle CLI arguments
-    parser = argparse.ArgumentParser()
-    add_arg = parser.add_argument
-    add_arg("--config", type=str, help="Config file.", required=True)
-    add_arg("--restart", action="store_true", help="Restart training from scratch.")
-    add_arg("--preshuffle", action="store_true", help="Shuffle data before training.")
-    add_arg("--lazy", action="store_true", help="Lazy loading of data.")
-    add_arg("--nocompile", action="store_true", help="Disable JIT compilation.")
-    add_arg("--evaluate", type = int, help="Skip training and go to evaluation.")
-    add_arg("--plot", action="store_true", help="Plot training logs.")
-    add_arg("--multigpu", action="store_true", help="Use multiple GPUs.")
-    add_arg("--multinode", action="store_true", help="Use multiple nodes.")
-    add_arg("--savecache", action="store_true", help="")
-    add_arg("--cpu", action="store_true", help="Uses the cpu only")
-    add_arg("--statistics", type=float, help="Size of training data")
-    add_arg("--directory", type=str, help="Append to Training Directory")
-    add_arg("--seed", type=int, default=2, help="Sets random seed")
-    pargs = parser.parse_args()
-    if pargs.multigpu:
-        port = find_free_port()
-        torch.backends.cudnn.enabled = False
-        mp.spawn(main, args=(pargs, 4, port), nprocs=4, join=True)
-    if pargs.multinode:
-        global_rank = int(os.environ["RANK"])
-        local_rank = int(os.environ["LOCAL_RANK"])
-        world_size = int(os.environ["WORLD_SIZE"])
-        print(f"global_rank = {global_rank}, local_rank = {local_rank}, world_size = {world_size}")
-        dist.init_process_group(backend="nccl")
-        torch.backends.cudnn.enabled = False
-        pargs.global_rank = global_rank
-        main(rank = local_rank, args=pargs, world_size=world_size)
-    else:
-        main(0, pargs)