Hi, this is my attempt at the edge-type-irrelevant neighbor sampler.
class MultiLayerNeighborSampler(dgl.dataloading.BlockSampler):
def __init__(self, fanouts, g):
super().__init__(len(fanouts), return_eids=False)
self.fanouts = fanouts
self.homo_g = dgl.to_homogeneous(g)
self.mapping = self.get_mapping() # Map heterogenous node ids to homogenous node ids using _ID and _TYPE
def get_mapping(self):
...
return mapping
def sample_frontier(self, block_id, g, seed_nodes):
'''
seed_nodes are in the heterogenous format
We convert them to homogenous and then sample neighbors
and convert the frontier back to hetero
'''
seed_nodes = ... # convert to homogeneous ids using self.mapping
fanout = self.fanouts[block_id]
if fanout is None:
frontier = dgl.in_subgraph(self.homo_g, seed_nodes)
else:
frontier = dgl.sampling.sample_neighbors(self.homo_g, seed_nodes, fanout)
# Convert back to hetero
frontier = dgl.to_heterogeneous(frontier, g.ntypes, g.etypes)
return frontier
I get the following error during the dgl.sampling.sample_neighbors()
call -
Traceback (most recent call last):
File "hetero.py", line 660, in <module>
train(args, devices)
File "hetero.py", line 525, in train
run(0, n_gpus, args, devices, data)
File "hetero.py", line 412, in run
for step, (input_nodes, pos_graph, neg_graph, blocks) in enumerate(train_dataloader):
File "/home/ubuntu/anaconda3/envs/new/lib/python3.6/site-packages/dgl/dataloading/pytorch/__init__.py", line 173, in __next__
result_ = next(self.iter_)
File "/home/ubuntu/anaconda3/envs/new/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 435, in __next__
data = self._next_data()
File "/home/ubuntu/anaconda3/envs/new/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 1085, in _next_data
return self._process_data(data)
File "/home/ubuntu/anaconda3/envs/new/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 1111, in _process_data
data.reraise()
File "/home/ubuntu/anaconda3/envs/new/lib/python3.6/site-packages/torch/_utils.py", line 428, in reraise
raise self.exc_type(msg)
dgl._ffi.base.DGLError: Caught DGLError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/home/ubuntu/anaconda3/envs/new/lib/python3.6/site-packages/torch/utils/data/_utils/worker.py", line 198, in _worker_loop
data = fetcher.fetch(index)
File "/home/ubuntu/anaconda3/envs/new/lib/python3.6/site-packages/torch/utils/data/_utils/fetch.py", line 47, in fetch
return self.collate_fn(data)
File "/home/ubuntu/anaconda3/envs/new/lib/python3.6/site-packages/dgl/dataloading/pytorch/__init__.py", line 136, in collate
result = super().collate(items)
File "/home/ubuntu/anaconda3/envs/new/lib/python3.6/site-packages/dgl/dataloading/dataloader.py", line 720, in collate
return self._collate_with_negative_sampling(items)
File "/home/ubuntu/anaconda3/envs/new/lib/python3.6/site-packages/dgl/dataloading/dataloader.py", line 676, in _collate_with_negative_sampling
self.g_sampling, seed_nodes, exclude_eids=exclude_eids)
File "/home/ubuntu/anaconda3/envs/new/lib/python3.6/site-packages/dgl/dataloading/dataloader.py", line 253, in sample_blocks
block = transform.to_block(frontier, seed_nodes)
File "/home/ubuntu/anaconda3/envs/new/lib/python3.6/site-packages/dgl/transform.py", line 1829, in to_block
g._graph, dst_node_ids_nd, include_dst_in_src)
File "dgl/_ffi/_cython/./function.pxi", line 287, in dgl._ffi._cy3.core.FunctionBase.__call__
File "dgl/_ffi/_cython/./function.pxi", line 222, in dgl._ffi._cy3.core.FuncCall
File "dgl/_ffi/_cython/./function.pxi", line 211, in dgl._ffi._cy3.core.FuncCall3
File "dgl/_ffi/_cython/./base.pxi", line 155, in dgl._ffi._cy3.core.CALL
dgl._ffi.base.DGLError: [08:24:55] /opt/dgl/src/graph/transform/to_bipartite.cc:95: Check failed: new_dst.Ptr<IdType>()[i] != -1 (-1 vs. -1) : Node 12547 does not exist in `rhs_nodes`. Argument `rhs_nodes` must contain all the edge destination nodes.
Stack trace:
[bt] (0) /home/ubuntu/anaconda3/envs/new/lib/python3.6/site-packages/dgl/libdgl.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x4f) [0x7f3682f3c0ff]
[bt] (1) /home/ubuntu/anaconda3/envs/new/lib/python3.6/site-packages/dgl/libdgl.so(+0xd4ecc0) [0x7f368373fcc0]
[bt] (2) /home/ubuntu/anaconda3/envs/new/lib/python3.6/site-packages/dgl/libdgl.so(std::tuple<std::shared_ptr<dgl::BaseHeteroGraph>, std::vector<dgl::runtime::NDArray, std::allocator<dgl::runtime::NDArray> >, std::vector<dgl::runtime::NDArray, std::allocator<dgl::runtime::NDArray> > > dgl::transform::ToBlock<(DLDeviceType)1, long>(std::shared_ptr<dgl::BaseHeteroGraph>, std::vector<dgl::runtime::NDArray, std::allocator<dgl::runtime::NDArray> > const&, bool)+0x3a) [0x7f36837409ba]
[bt] (3) /home/ubuntu/anaconda3/envs/new/lib/python3.6/site-packages/dgl/libdgl.so(+0xd50281) [0x7f3683741281]
[bt] (4) /home/ubuntu/anaconda3/envs/new/lib/python3.6/site-packages/dgl/libdgl.so(+0xd50ce4) [0x7f3683741ce4]
[bt] (5) /home/ubuntu/anaconda3/envs/new/lib/python3.6/site-packages/dgl/libdgl.so(DGLFuncCall+0x48) [0x7f3683628a98]
[bt] (6) /home/ubuntu/anaconda3/envs/new/lib/python3.6/site-packages/dgl/_ffi/_cy3/core.cpython-36m-x86_64-linux-gnu.so(+0x16873) [0x7f367f8ff873]
[bt] (7) /home/ubuntu/anaconda3/envs/new/lib/python3.6/site-packages/dgl/_ffi/_cy3/core.cpython-36m-x86_64-linux-gnu.so(+0x16b9b) [0x7f367f8ffb9b]
[bt] (8) python(_PyObject_FastCallDict+0x8b) [0x55c97f46e0bb]
This happens only if I change the Neighbor sample to the one above. This is the only change to an otherwise heterogeneous GNN implementation. It works normally if I use any of dgls samplers.
Could you help me figure this one out? Thanks a lot