Am trying to train a GNN model (GAT) for node regression task where I have nodes having each one input and output feature only
I am using the following GAT model
import torch.nn as nn
"""
GAT: Graph Attention Network
Graph Attention Networks (Veličković et al., ICLR 2018)
https://arxiv.org/abs/1710.10903
"""
from gat_layer import GATLayer
from mlp_readout_layer import MLPReadout
class GATNet(nn.Module):
def __init__(self, net_params):
super().__init__()
in_dim_node = net_params['in_dim'] # node_dim (feat is an integer)
hidden_dim = net_params['hidden_dim']
out_dim = net_params['out_dim']
n_classes = net_params['n_classes']
num_heads = net_params['n_heads']
in_feat_dropout = net_params['in_feat_dropout']
dropout = net_params['dropout']
n_layers = net_params['L']
self.readout = net_params['readout']
self.batch_norm = net_params['batch_norm']
self.residual = net_params['residual']
self.dropout = dropout
self.n_classes = n_classes
self.device = net_params['device']
self.embedding_h = nn.Embedding(in_dim_node, hidden_dim * num_heads) # node feat is an integer
self.in_feat_dropout = nn.Dropout(in_feat_dropout)
self.layers = nn.ModuleList([GATLayer(hidden_dim * num_heads, hidden_dim, num_heads,
dropout, self.batch_norm, self.residual) for _ in range(n_layers - 1)])
self.layers.append(GATLayer(hidden_dim * num_heads, out_dim, 1, dropout, self.batch_norm, self.residual))
self.MLP_layer = MLPReadout(out_dim, n_classes)
def forward(self, g, h):
# input embedding
h = self.embedding_h(h)
h = self.in_feat_dropout(h)
# GAT
for conv in self.layers:
h = conv(g, h)
# output
h_out = self.MLP_layer(h)
return h_out
The “gat_layer” file used in “from import GATLayer”
import torch.nn as nn
import torch.nn.functional as F
from dgl.nn.pytorch import GATConv
"""
GAT: Graph Attention Network
Graph Attention Networks (Veličković et al., ICLR 2018)
https://arxiv.org/abs/1710.10903
"""
class GATLayer(nn.Module):
"""
Parameters
----------
in_dim :
Number of input features.
out_dim :
Number of output features.
num_heads : int
Number of heads in Multi-Head Attention.
dropout :
Required for dropout of attn and feat in GATConv
batch_norm :
boolean flag for batch_norm layer.
residual :
If True, use residual connection inside this layer. Default: ``False``.
activation : callable activation function/layer or None, optional.
If not None, applies an activation function to the updated node features.
Using dgl builtin GATConv by default:
https://github.com/graphdeeplearning/benchmarking-gnns/commit/206e888ecc0f8d941c54e061d5dffcc7ae2142fc
"""
def __init__(self, in_dim, out_dim, num_heads, dropout, batch_norm, residual=False, activation=F.elu):
super().__init__()
self.residual = residual
self.activation = activation
self.batch_norm = batch_norm
if in_dim != (out_dim * num_heads):
self.residual = False
self.gatconv = GATConv(in_dim, out_dim, num_heads, dropout, dropout)
if self.batch_norm:
self.batchnorm_h = nn.BatchNorm1d(out_dim * num_heads)
def forward(self, g, h):
h_in = h # for residual connection
h = self.gatconv(g, h).flatten(1)
if self.batch_norm:
h = self.batchnorm_h(h)
if self.activation:
h = self.activation(h)
if self.residual:
h = h_in + h # residual connection
return h
The “mlp_readout_layer” file used in from mlp_readout_layer import MLPReadout
class MLPReadout(nn.Module):
def __init__(self, input_dim, output_dim, L=2): # L=nb_hidden_layers
super().__init__()
list_FC_layers = [nn.Linear(input_dim // 2 ** l, input_dim // 2 ** (l + 1), bias=True) for l in range(L)]
list_FC_layers.append(nn.Linear(input_dim // 2 ** L, output_dim, bias=True))
self.FC_layers = nn.ModuleList(list_FC_layers)
self.L = L
def forward(self, x):
y = x
for l in range(self.L):
y = self.FC_layers[l](y)
y = F.relu(y)
y = self.FC_layers[self.L](y)
return y
I defined my own data as DGL data objects(same graph structure with graph objects having different node features values), and I defined a function to convert DGLGragh object into tensors and feed it to the data loader
def preprocess_graph_data(graph_data):
# Extract node features and target values from DGLGraph
node_features = graph_data.ndata['x'] # Assuming 'x' contains node features
target_values = graph_data.ndata['y'] # Assuming 'y' contains target values
# Ensure data consistency (e.g., data type conversion)
node_features = node_features.long()
target_values = target_values.long()
return node_features, target_values
class GraphDataDataset(Dataset):
def __init__(self, graph_data_list):
self.graph_data_list = graph_data_list
def __len__(self):
return len(self.graph_data_list)
def __getitem__(self, idx):
# Preprocess the graph data for this index
x, y = preprocess_graph_data(self.graph_data_list[idx])
return x, y
# Create data loaders
train_dataset = GraphDataDataset(train_data)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_dataset = GraphDataDataset(val_data)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=True)
test_dataset = GraphDataDataset(test_data)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=True)
My training loop is the following
net_params = {
'in_dim': 1, # 1 feature as input
'hidden_dim': 8,
'out_dim': 1, # 1 feature as input
'n_classes': 1,
'n_heads': 4,
'in_feat_dropout': 0.2,
'dropout': 0.2,
'L': 2,
'readout': 'sum',
'batch_norm': True,
'residual': True,
'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu')
}
model = GATNet(net_params)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = nn.MSELoss()
epochs = 100
# Training loop
for epoch in range(epochs):
model.train()
total_loss = 0.0
for batch_idx, (graph_data_batch, target_batch) in enumerate(train_loader):
optimizer.zero_grad()
# Get the node features and target values for the batch of graph data objects.
x_batch = graph_data_batch
y_batch = target_batch
# Make predictions on the batch of graph data objects.
pred = model(graph_data_batch, x_batch)
loss = criterion(pred, y_batch)
loss.backward()
optimizer.step()
total_loss += loss.item()
# Print the loss.
print('Epoch {} Batch {}: Loss {}'.format(epoch, batch_idx, loss.item()))
# Calculate training loss
avg_loss = total_loss / len(train_loader)
I get the following error
pred = model(graph_data_batch, x_batch)
File “/Users/torch/nn/functional.py”, in embedding
return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
IndexError: index out of range in self
Any ideas how to solve it? I can also share the code where the exception is triggered if this will help