import torch import torch.nn.functional as F from torch_geometric.nn import GATv2Conv, global_mean_pool, global_max_pool from torch.nn import Linear, Dropout import config class HeteroGAT(torch.nn.Module): def __init__(self, hidden_dim=256, out_channels=5, num_edge_types=config.NUM_EDGE_TYPES): super(HeteroGAT, self).__init__() # Exact params from your training code input_dim = 768 heads = 4 edge_dim = num_edge_types # Should be 6 # GAT Layer 1 # Matches: self.gat1 = GATv2Conv(input_dim, hidden_dim, heads=heads, ...) self.gat1 = GATv2Conv( input_dim, hidden_dim, heads=heads, edge_dim=edge_dim, concat=True ) # GAT Layer 2 # Matches: self.gat2 = GATv2Conv(hidden_dim * heads, hidden_dim, heads=1, ...) # Note: heads=1 here is the specific fix! self.gat2 = GATv2Conv( hidden_dim * heads, hidden_dim, heads=1, edge_dim=edge_dim, concat=False ) # Classifier # Matches: self.fc1 = torch.nn.Linear(hidden_dim * 2, hidden_dim) # Input is hidden_dim * 2 because we concat MaxPool + MeanPool self.fc1 = Linear(hidden_dim * 2, hidden_dim) # Matches: self.fc2 = torch.nn.Linear(hidden_dim, output_dim) self.fc2 = Linear(hidden_dim, out_channels) self.dropout = Dropout(p=0.5) def forward(self, x, edge_index, edge_attr, batch): # Layer 1 x = self.gat1(x, edge_index, edge_attr=edge_attr) x = F.elu(x) # We don't need edge_dropout for inference, just standard dropout x = self.dropout(x) # Layer 2 x = self.gat2(x, edge_index, edge_attr=edge_attr) x = F.elu(x) x = self.dropout(x) # Pooling (Global Max & Mean) x_max = global_max_pool(x, batch) x_mean = global_mean_pool(x, batch) x = torch.cat([x_max, x_mean], dim=1) # Classifier x = self.fc1(x) x = F.relu(x) x = self.dropout(x) x = self.fc2(x) # Return Raw Logits (main.py handles the softmax) return x