Hello,
For the last few days I have been struggling with an error that I cannot understand. The error only occurs when using the GPU. It does not appear on the CPU setting. I run the code using conda. The error is as follows:
Traceback (most recent call last):
File "main_COLLAB_edge_classification.py", line 585, in <module>
main()
File "main_COLLAB_edge_classification.py", line 580, in main
train_val_pipeline(MODEL_NAME, dataset, params, net_params, dirs)
File "main_COLLAB_edge_classification.py", line 315, in train_val_pipeline
epoch_train_loss, optimizer, train_loader, val_loader, test_loader = train_epoch(model, optimizer, device, graph, train_edges, params['batch_size'], epoch, dataset, 4, monet_pseudo)
File "E:\link-prediction-V2\benchmarking\train\train_COLLAB_drnl_edge_classification.py", line 62, in train_epoch_sparse
for subgs, _ in train_loader:
File "F:\Aga\conda\envs\bench\lib\site-packages\dgl\dataloading\dataloader.py", line 512, in __next__
self._next_non_threaded() if not self.use_thread else self._next_threaded()
File "F:\Aga\conda\envs\bench\lib\site-packages\dgl\dataloading\dataloader.py", line 507, in _next_threaded
exception.reraise()
File "F:\Aga\conda\envs\bench\lib\site-packages\dgl\utils\exception.py", line 57, in reraise
raise exception
dgl._ffi.base.DGLError: Caught DGLError in prefetcher.
Original Traceback (most recent call last):
File "F:\Aga\conda\envs\bench\lib\site-packages\dgl\dataloading\dataloader.py", line 380, in _prefetcher_entry
batch, feats, stream_event = _prefetch(batch, dataloader, stream)
File "F:\Aga\conda\envs\bench\lib\site-packages\dgl\dataloading\dataloader.py", line 338, in _prefetch
batch = recursive_apply(batch, _record_stream, current_stream)
File "F:\Aga\conda\envs\bench\lib\site-packages\dgl\utils\internal.py", line 1038, in recursive_apply
return [recursive_apply(v, fn, *args, **kwargs) for v in data]
File "F:\Aga\conda\envs\bench\lib\site-packages\dgl\utils\internal.py", line 1038, in <listcomp>
return [recursive_apply(v, fn, *args, **kwargs) for v in data]
File "F:\Aga\conda\envs\bench\lib\site-packages\dgl\utils\internal.py", line 1040, in recursive_apply
return fn(data, *args, **kwargs)
File "F:\Aga\conda\envs\bench\lib\site-packages\dgl\dataloading\dataloader.py", line 307, in _record_stream
x.record_stream(stream)
File "F:\Aga\conda\envs\bench\lib\site-packages\dgl\heterograph.py", line 5605, in record_stream
self._graph.record_stream(stream)
File "F:\Aga\conda\envs\bench\lib\site-packages\dgl\heterograph_index.py", line 290, in record_stream
return _CAPI_DGLHeteroRecordStream(self, to_dgl_stream_handle(stream))
File "F:\Aga\conda\envs\bench\lib\site-packages\dgl\_ffi\_ctypes\function.py", line 188, in __call__
check_call(_LIB.DGLFuncCall(
File "F:\Aga\conda\envs\bench\lib\site-packages\dgl\_ffi\base.py", line 65, in check_call
raise DGLError(py_str(_LIB.DGLGetLastError()))
dgl._ffi.base.DGLError: [12:52:54] C:\Users\Administrator\dgl-0.5\src\runtime\ndarray.cc:284: Check failed: td->IsAvailable(): RecordStream only works when TensorAdaptor is available.