In the last weeks, I was running my code on CPU in a toy graph, but after a refactor to run it in a larger graph and using GPU I’m getting the following error:

```
RuntimeError: Tensor for 'out' is on CPU, Tensor for argument #1 'self' is on CPU, but expected them to be on GPU (while checking arguments for addmm)
```

The error message is in the Relu function in the following class:

```
class PongConv(nn.Module):
"""
Definition of convolution in Pong model.
"""
def __init__(self, src_dim: int, dest_dim: int):
super().__init__()
self.linear_src = nn.Linear(in_features=src_dim, out_features=src_dim, bias=True)
self.linear_dst = nn.Linear(in_features=dest_dim, out_features=src_dim, bias=True)
def forward(self, graph: dgl.DGLGraph, node_features: Tuple[torch.FloatTensor, torch.FloatTensor]) -> torch.FloatTensor:
with graph.local_scope():
src_features, dst_features = node_features
graph.srcdata['h'] = src_features
graph.dstdata['h'] = dst_features
# optimized implementation for a weighted average
graph.update_all(fn.copy_e('weight', 'm'), fn.sum('m', 'sum_weight'))
graph.apply_edges(fn.e_div_v('weight', 'sum_weight', 'normalized_weight'))
# average neighbors embeddings
graph.update_all(message_func=fn.u_mul_e('h', 'normalized_weight', 'h_ngh'),
reduce_func=fn.sum('h_ngh', 'neighbors_avg'))
result = F.relu(
torch.add(
self.linear_src(graph.dstdata['h']),
self.linear_dst(graph.dstdata['neighbors_avg'])
)
)
return result
```

I think the issue is with the `nn.Linear`

method, but no idea about how to solve it. Does anyone was some idea about why this is happening?

I’m using Pytorch and CUDA 11.0