Hi, I have a GCN layer defined as below. I’m processing a large graph (~300k entities and ~700k edges) and run out of memory on GPU. While checking the GPU usage at each line I noticed that the propagate function allocates a large amount of memory, that is not freed up after returning to the main training loop. I also noticed that there was a tensor of dimension [#nodes, #edges] allocated while calling update_all
. Is this necessary to be stored much after the messages have been propagated?
Any suggestions or tips would be appreciated.
class GCNLayer(RGCNLayer):
def __init__(self, in_feat, out_feat, num_rels, num_bases, bias=True,
activation=None, self_loop=False, dropout=0.2, bert=False, bert_trainable=False):
super(WGCNLayer, self).__init__(in_feat, out_feat, bias,
activation, self_loop=self_loop,
dropout=dropout, bert=bert, bert_trainable=bert_trainable)
self.num_rels = num_rels
self.in_feat = in_feat
self.out_feat = out_feat
self.weight = Parameter(torch.FloatTensor(self.in_feat, self.out_feat))
self.weight_rel = torch.nn.Embedding(self.num_rels, 1, padding_idx=0)
self.bn = torch.nn.BatchNorm1d(self.out_feat)
xavier_normal_(self.weight.data)
#stdv = 1. / np.sqrt(self.weight.size(1))
#self.weight.data.uniform_(-stdv, stdv)
def msg_func(self, edges):
"""
Compute messages only from source node features
"""
#alpha = self.weight_rel.index_select(0, edges.data['type'].squeeze())
#edge_types = edges.data['type'].squeeze().to('cpu')
edge_types = edges.data['type'].squeeze()
alpha = self.weight_rel(edge_types)
#edges.src['h'] = edges.src['h'].to("cpu")
#self.weight = self.weight.cpu()
node = torch.mm(edges.src['h'], self.weight)
#node = node.cuda()
msg = alpha.expand_as(node) * node
#msg = torch.bmm(node, weight).view(-1, self.out_feat)
return {'msg': msg}
def propagate(self, g):
g.update_all(self.msg_func, fn.sum(msg='msg', out='h'), self.apply_func)
def apply_func(self, nodes):
return {'h': nodes.data['h'] * nodes.data['norm']}```