In the last weeks, I was running my code on CPU in a toy graph, but after a refactor to run it in a larger graph and using GPU I’m getting the following error:
RuntimeError: Tensor for 'out' is on CPU, Tensor for argument #1 'self' is on CPU, but expected them to be on GPU (while checking arguments for addmm)
The error message is in the Relu function in the following class:
class PongConv(nn.Module):
"""
Definition of convolution in Pong model.
"""
def __init__(self, src_dim: int, dest_dim: int):
super().__init__()
self.linear_src = nn.Linear(in_features=src_dim, out_features=src_dim, bias=True)
self.linear_dst = nn.Linear(in_features=dest_dim, out_features=src_dim, bias=True)
def forward(self, graph: dgl.DGLGraph, node_features: Tuple[torch.FloatTensor, torch.FloatTensor]) -> torch.FloatTensor:
with graph.local_scope():
src_features, dst_features = node_features
graph.srcdata['h'] = src_features
graph.dstdata['h'] = dst_features
# optimized implementation for a weighted average
graph.update_all(fn.copy_e('weight', 'm'), fn.sum('m', 'sum_weight'))
graph.apply_edges(fn.e_div_v('weight', 'sum_weight', 'normalized_weight'))
# average neighbors embeddings
graph.update_all(message_func=fn.u_mul_e('h', 'normalized_weight', 'h_ngh'),
reduce_func=fn.sum('h_ngh', 'neighbors_avg'))
result = F.relu(
torch.add(
self.linear_src(graph.dstdata['h']),
self.linear_dst(graph.dstdata['neighbors_avg'])
)
)
return result
I think the issue is with the nn.Linear
method, but no idea about how to solve it. Does anyone was some idea about why this is happening?
I’m using Pytorch and CUDA 11.0