An error when execute autograd

Hi.I have a problem when I execute autograd in a GNN model build with DGL.

import dgl
import dgl.function as fn
import torch
from torch import nn
import numpy as np

src = [0, 0, 0, 0, 0, 1, 2, 3, 3, 4, 5, 6, 7, 7, 8, 9]
dst = [0, 1, 2, 3, 9, 6, 6, 3, 5, 7, 4, 9, 0, 1, 1, 2]
u = np.concatenate([src, dst])
v = np.concatenate([dst, src])
G = dgl.DGLGraph((u, v))
G.ndata['x'] = torch.randn(10, 4)
G.ndata['d'] = torch.randn(10, 4)

class net(nn.Module):
    def __init__(self):
        super(net, self).__init__()
        self.lin = nn.Linear(4, 1)
        
    def message_func(self, edges):
        feat = edges.dst['x'].to(device)
        message = self.lin(feat)
        return {'m': message}
    
    def forward(self, G, feat):
        graph = G.local_var()
        graph.srcdata['h'] = feat
        graph.update_all(self.message_func, fn.sum(msg='m', out='h'))
        rst = graph.dstdata['h']
        out = torch.sum(rst)
        d = - torch.autograd.grad(out.sum(), feat, create_graph=True)[0]
        return out, d

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = net().to(device)
feat = G.ndata['x'].to(device)
feat.requires_grad = True
out, d = model(G, feat)

I want to calculate the derivative of out to the input,i.e.G.ndata['x'].If torch.device('cpu'),the code could run.However I get the error when torch.device('cuda')

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-4-e56cb4569601> in <module>
      4 feat = G.ndata['x'].to(device)
      5 feat.requires_grad = True
----> 6 model(G, feat)

~/anaconda3/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    530             result = self._slow_forward(*input, **kwargs)
    531         else:
--> 532             result = self.forward(*input, **kwargs)
    533         for hook in self._forward_hooks.values():
    534             hook_result = hook(self, input, result)

<ipython-input-2-6274bd0ee471> in forward(self, G, feat)
     23         rst = graph.dstdata['h']
     24         out = torch.sum(rst)
---> 25         d = - torch.autograd.grad(out.sum(), feat, create_graph=True)[0]
     26         return out, d

~/anaconda3/lib/python3.6/site-packages/torch/autograd/__init__.py in grad(outputs, inputs, grad_outputs, retain_graph, create_graph, only_inputs, allow_unused)
    155     return Variable._execution_engine.run_backward(
    156         outputs, grad_outputs, retain_graph, create_graph,
--> 157         inputs, allow_unused)
    158 
    159 

RuntimeError: One of the differentiated Tensors appears to not have been used in the graph. Set allow_unused=True if this is the desired behavior.

If I set allow_unused=True,the derivative will be None.It usually happen when the feat isn’t a leaf,However if I print feat.is_leaf,it returns True.
I want to reproduce this error without DGL but only pytorch,but I can’t.

import torch
from torch import nn

class net(nn.Module):
    def __init__(self):
        super(net, self).__init__()
        self.lin = nn.Linear(4, 1)
        
    def forward(self, x):
        out = self.lin(x)
        d = torch.autograd.grad(out.sum(), x, create_graph=True)
        return out, d

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = net().to(device)

x = torch.randn(10, 4).to(device)
x.requires_grad = True

out, d = model(x)

which could run without any error.
So what’s wrong with the first code?Thank you.

It is because you save the input feat tensor as 'h' while you read 'x' in the message function. Changing it to 'h' solves the problem.

Yep,it works.Thank you very much.