@BarclayII
Thanks for the help, that seems to work now. I have another question about how to use this EdgeDataLoader to train.
I have been following the Link Prediction docs and have set up my model like so:
class TestModel(nn.Module):
# here we have a model that first computes the representation and then predicts the scores for the edges
def __init__(self, in_features, hidden_features, out_features, rel_names):
super().__init__()
self.sage = TestRGCN(in_features, hidden_features, out_features, rel_names)
self.pred = HeteroScorePredictor()
def forward(self, g, neg_g, blocks, etype):
h = self.sage(g, blocks)
return self.pred(g, h, etype), self.pred(neg_g, h, etype)
class TestRGCN(nn.Module):
def __init__(self, in_feats, hid_feats, out_feats, rel_names, n_layers=2, dropout=0.25, activation=None):
super(FakeNewsRGCN, self).__init__()
self.conv1 = dglnn.HeteroGraphConv({
rel : dglnn.GraphConv(in_feats, hid_feats, norm='right')
for rel in rel_names
})
self.conv2 = dglnn.HeteroGraphConv({
rel : dglnn.GraphConv(hid_feats, out_feats, norm='right')
for rel in rel_names
})
def forward(self, blocks, x):
x = self.conv1(blocks[0], x)
x = self.conv2(blocks[1], x)
return h
And then I am training it like above:
source_feats = g.nodes['source'].data['source_embedding']
user_feats = g.nodes['user'].data['user_embedding']
node_features = {'source': source_feats, 'user': user_feats}
# I'm not sure exactly what in_features value should be here since source_feats and user_feats have different dimensions
model = TestModel(in_features=800, hidden_features=512, out_features=256, rel_names=g.etypes)
...
for input_nodes, positive_graph, negative_graph, blocks in dataloader:
blocks = [b.to(torch.device('cuda')) for b in blocks]
positive_graph = positive_graph.to(torch.device('cuda'))
negative_graph = negative_graph.to(torch.device('cuda'))
input_features = blocks[0].srcdata['features']
pos_score, neg_score = model(positive_graph, negative_graph, input_features, ('source', 'has_follower', 'user'))
loss = compute_loss(pos_score, neg_score)
When running this, I am getting a key error at blocks in the forward operation of TestRGCN:
x = self.conv1(blocks[0], x)
File "pathlib/python3.6/site-packages/dgl/heterograph.py", line 1968, in __getitem__
raise DGLError('Invalid key "{}". Must be one of the edge types.'.format(orig_key))
dgl._ffi.base.DGLError: Invalid key "0". Must be one of the edge types.
I’m not sure what the issue is here. When I print blocks, I can see there are two of them in there and they have some data. I’m also not sure what my input_feats value should be since I have two input features.
[Block(num_src_nodes={'source': 188, 'user': 60},
num_dst_nodes={'source': 188, 'user': 60},
num_edges={('source', 'has_follower', 'user'): 271},
metagraph=[('source', 'user', 'has_follower')]), Block(num_src_nodes={'source': 188, 'user': 60},
num_dst_nodes={'source': 10, 'user': 60},
num_edges={('source', 'has_follower', 'user'): 271},
metagraph=[('source', 'user', 'has_follower')])]