Hi again. This way keeps giving me memory issues. Code:
def mini_batch_prediction(g:dgl.heterograph, model:torch.nn.Module, device:torch.device):
eid_dict = {canonical_etype: torch.arange(g.num_edges(canonical_etype[1]), dtype=torch.int64) for canonical_etype in g.canonical_etypes}
neighbor_sampler = dgl.dataloading.MultiLayerFullNeighborSampler(2)
negative_sampler=dgl.dataloading.negative_sampler.Uniform(1)
dataloader = dgl.dataloading.EdgeDataLoader(g, eid_dict, neighbor_sampler,
negative_sampler=negative_sampler,
batch_size=32,
shuffle=True,
drop_last=False,
num_workers=0)
post_new_feats = []
user_new_feats = []
for input_nodes, positive_graph, negative_graph, blocks in dataloader:
blocks = [b.to(torch.device('cuda')) for b in blocks]
post_features = blocks[0].srcdata['feats']['post'].to(device)
user_features = blocks[0].srcdata['feats']['user'].to(device)
input_features = {'post':post_features, 'user': user_features}
x = model.rgcn(blocks, input_features)
user_feats = x['user']
user_idxes = blocks[-1].dstdata[dgl.NID]['user']
for i, user in enumerate(user_idxes):
user_new_feats.append((user, user_feats[i]))
Error:
Device: cuda:0
/opt/conda/lib/python3.7/site-packages/dgl/base.py:45: DGLWarning: EdgeDataLoader directly taking a BlockSampler will be deprecated and it will not support feature prefetching. Please use dgl.dataloading.as_edge_prediction_sampler to wrap it.
return warnings.warn(message, category=category, stacklevel=1)
Traceback (most recent call last):
File "main.py", line 85, in <module>
mini_batch_inference=True)
File "main.py", line 66, in main
preds = mini_batch_prediction(graph, model, device)
File "/code/dgl_recommendator/inference.py", line 117, in mini_batch_prediction
x = model.rgcn(blocks, input_features)
File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/code/dgl_recommendator/models.py", line 61, in forward
x = self.conv1(blocks[0], x)
File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/opt/conda/lib/python3.7/site-packages/dgl/nn/pytorch/hetero.py", line 178, in forward
**mod_kwargs.get(etype, {}))
File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/opt/conda/lib/python3.7/site-packages/dgl/nn/pytorch/conv/graphconv.py", line 423, in forward
graph.update_all(aggregate_fn, fn.sum(msg='m', out='h'))
File "/opt/conda/lib/python3.7/site-packages/dgl/heterograph.py", line 4876, in update_all
ndata = core.message_passing(g, message_func, reduce_func, apply_node_func)
File "/opt/conda/lib/python3.7/site-packages/dgl/core.py", line 357, in message_passing
ndata = invoke_gspmm(g, mfunc, rfunc)
File "/opt/conda/lib/python3.7/site-packages/dgl/core.py", line 332, in invoke_gspmm
z = op(graph, x)
File "/opt/conda/lib/python3.7/site-packages/dgl/ops/spmm.py", line 189, in func
return gspmm(g, 'copy_lhs', reduce_op, x, None)
File "/opt/conda/lib/python3.7/site-packages/dgl/ops/spmm.py", line 77, in gspmm
lhs_data, rhs_data)
File "/opt/conda/lib/python3.7/site-packages/dgl/backend/pytorch/sparse.py", line 757, in gspmm
return GSpMM.apply(gidx, op, reduce_op, lhs_data, rhs_data)
File "/opt/conda/lib/python3.7/site-packages/torch/cuda/amp/autocast_mode.py", line 118, in decorate_fwd
return fwd(*args, **kwargs)
File "/opt/conda/lib/python3.7/site-packages/dgl/backend/pytorch/sparse.py", line 126, in forward
out, (argX, argY) = _gspmm(gidx, op, reduce_op, X, Y)
File "/opt/conda/lib/python3.7/site-packages/dgl/sparse.py", line 233, in _gspmm
arg_e_nd)
File "dgl/_ffi/_cython/./function.pxi", line 287, in dgl._ffi._cy3.core.FunctionBase.__call__
File "dgl/_ffi/_cython/./function.pxi", line 232, in dgl._ffi._cy3.core.FuncCall
File "dgl/_ffi/_cython/./base.pxi", line 155, in dgl._ffi._cy3.core.CALL
dgl._ffi.base.DGLError: [10:14:22] /opt/dgl/src/runtime/cuda/cuda_device_api.cc:97: Check failed: e == cudaSuccess || e == cudaErrorCudartUnloading: CUDA: out of memory
Stack trace:
[bt] (0) /opt/conda/lib/python3.7/site-packages/dgl/libdgl.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x4f) [0x7f5e5a28149f]
[bt] (1) /opt/conda/lib/python3.7/site-packages/dgl/libdgl.so(dgl::runtime::CUDADeviceAPI::AllocDataSpace(DLContext, unsigned long, unsigned long, DLDataType)+0x108) [0x7f5e5a759748]
[bt] (2) /opt/conda/lib/python3.7/site-packages/dgl/libdgl.so(dgl::runtime::NDArray::Empty(std::vector<long, std::allocator<long> >, DLDataType, DLContext)+0x351) [0x7f5e5a5c7a71]
[bt] (3) /opt/conda/lib/python3.7/site-packages/dgl/libdgl.so(dgl::aten::NewIdArray(long, DLContext, unsigned char)+0x6d) [0x7f5e5a252b4d]
[bt] (4) /opt/conda/lib/python3.7/site-packages/dgl/libdgl.so(dgl::runtime::NDArray dgl::aten::impl::Range<(DLDeviceType)2, long>(long, long, DLContext)+0x9a) [0x7f5e5a77407a]
[bt] (5) /opt/conda/lib/python3.7/site-packages/dgl/libdgl.so(dgl::aten::Range(long, long, unsigned char, DLContext)+0x1fd) [0x7f5e5a252edd]
[bt] (6) /opt/conda/lib/python3.7/site-packages/dgl/libdgl.so(std::pair<dgl::runtime::NDArray, dgl::runtime::NDArray> dgl::aten::impl::Sort<(DLDeviceType)2, long>(dgl::runtime::NDArray, int)+0x50) [0x7f5e5a7823b0]
[bt] (7) /opt/conda/lib/python3.7/site-packages/dgl/libdgl.so(dgl::aten::Sort(dgl::runtime::NDArray, int)+0x21a) [0x7f5e5a26575a]
[bt] (8) /opt/conda/lib/python3.7/site-packages/dgl/libdgl.so(void dgl::aten::impl::COOSort_<(DLDeviceType)2, long>(dgl::aten::COOMatrix*, bool)+0x5b) [0x7f5e5a78caeb]
Isn’t there any other way to do this in a less memory consuming way?