version:
dgl 0.5.2
torch 1.6
E.G.
hg = dgl.heterograph({
('a', 'etype_1', 'a'):([0,1,2],[1,2,3]),
('a', 'etype_2', 'a'): ([1,2,3],[0,1,2]),
})
sampler = dgl.dataloading.MultiLayerFullNeighborSampler(1,return_eids=True)
collator = dgl.dataloading.NodeCollator(hg, {'a':[1]}, sampler)
dataloader = torch.utils.data.DataLoader(
collator.dataset, collate_fn=collator.collate,
batch_size=1024, shuffle=True, drop_last=False, num_workers=32)
for step, (input_nodes, output_nodes, blocks) in enumerate(dataloader):
dgl.metapath_reachable_graph(blocks[0],metapath = ['etype_2'])
break
code above will reproduce error: DGLError: The given number of nodes of node type a must be larger than the max ID in the data, but got 1 and 2.
anyone know how to deal with, I am really confused.
The problem seems caused by the g.adj(etype=etype, scipy_fmt='csr', transpose=True)
wiil return a matrix with shape (len(src),len(dst)), then adj.shape[1] will be the number of dst nodes(not the number of a), so will cause an error.
def metapath_reachable_graph(g, metapath):
adj = 1
for etype in metapath:
adj = adj * g.adj(etype=etype, scipy_fmt='csr', transpose=True)
adj = (adj != 0).tocsr()
srctype = g.to_canonical_etype(metapath[0])[0]
dsttype = g.to_canonical_etype(metapath[-1])[2]
new_g = convert.heterograph({(srctype, '_E', dsttype): adj.nonzero()},
{srctype: adj.shape[0], dsttype: adj.shape[1]},
idtype=g.idtype, device=g.device)
# copy srcnode features
new_g.nodes[srctype].data.update(g.nodes[srctype].data)
# copy dstnode features
if srctype != dsttype:
new_g.nodes[dsttype].data.update(g.nodes[dsttype].data)
return new_g