I am trying to do Link Prediction with heterographs. Earlier, there was an error which I resolved after adding reverse edges but now I am getting this error, saying
DGLError: [11:11:38] /opt/dgl/include/dgl/./aten/coo.h:115: Check failed: (row)->ctx == (col)->ctx: Expected col to have the same device context as row(cuda:0). But got cpu:0.
I have loaded both my data and graph to my GPU so I’m really not sure why it is happening. Even checked all the new data in the Negative Graph function and they’re all on Cuda as well, so there shouldn’t be a problem. Can anybody help me with this?
Graph Dimensions:
Graph(num_nodes={‘customer’: 74845, ‘product’: 1835},
num_edges={(‘customer’, ‘customerBoughtProducts’, ‘product’): 266617, (‘product’, ‘customerBoughtProductsBack’, ‘customer’): 266617},
metagraph=[(‘customer’, ‘product’, ‘customerBoughtProducts’), (‘product’, ‘customer’, ‘customerBoughtProductsBack’)])
GNN Code:
class HeteroDotProductPredictor(nn.Module):
def forward(self, graph, h, etype):
with graph.local_scope():
graph.ndata['h'] = h
graph.apply_edges(fn.u_dot_v('h', 'h', 'score'), etype=etype)
return graph.edges[etype].data['score']
class RGCN(nn.Module):
def __init__(self, in_feats, hid_feats, out_feats, rel_names):
super().__init__()
self.conv1 = dglnn.HeteroGraphConv({
rel: dglnn.GraphConv(in_feats, hid_feats)
for rel in rel_names}, aggregate='sum')
self.conv2 = dglnn.HeteroGraphConv({
rel: dglnn.GraphConv(hid_feats, out_feats)
for rel in rel_names}, aggregate='sum')
def forward(self, graph, inputs):
h = self.conv1(graph, inputs)
h = {k: F.relu(v) for k, v in h.items()}
h = self.conv2(graph, h)
return h
def construct_negative_graph(graph, k, etype):
utype, _, vtype = etype
src, dst = graph.edges(etype=etype)
neg_src = src.repeat_interleave(k)
neg_dst = torch.randint(0, graph.num_nodes(vtype), (len(src) * k,))
return dgl.heterograph(
{etype: (neg_src, neg_dst)},
num_nodes_dict={ntype: graph.num_nodes(ntype) for ntype in graph.ntypes})
def compute_loss(pos_score, neg_score):
n_edges = pos_score.shape[0]
return (1 - pos_score.unsqueeze(1) + neg_score.view(n_edges, -1)).clamp(min=0).mean()
class Model(nn.Module):
def __init__(self, in_features, hidden_features, out_features, rel_names):
super().__init__()
self.sage = RGCN(in_features, hidden_features, out_features, rel_names)
self.pred = HeteroDotProductPredictor()
def forward(self, g, neg_g, x, etype):
h = self.sage(g, x)
return self.pred(g, h, etype), self.pred(neg_g, h, type)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
k = 5
graph = linkpred_graph
graph = graph.to(device)
model = Model(512, 1024, 256, graph.etypes)
model.to(device)
user_feats = graph.nodes['customer'].data['embedding'].float()
item_feats = graph.nodes['product'].data['embedding'].float()
node_features = {'customer': user_feats, 'product': item_feats}
opt = torch.optim.Adam(model.parameters())
for epoch in range(10):
negative_graph = construct_negative_graph(graph, k, ('customer', 'customerBoughtProducts', 'product'))
pos_score, neg_score = model(graph, negative_graph, node_features, ('customer', 'customerBoughtProducts', 'product'))
loss = compute_loss(pos_score, neg_score)
opt.zero_grad()
loss.backward()
opt.step()
print(loss.item())
Error Stacktrace:
DGLError Traceback (most recent call last)
<ipython-input-40-e86228f4d586> in <module>
13
14 for epoch in range(10):
---> 15 negative_graph = construct_negative_graph(graph, k, ('customer', 'customerBoughtProducts', 'product'))
16 pos_score, neg_score = model(graph, negative_graph, node_features, ('customer', 'customerBoughtProducts', 'product'))
17 loss = compute_loss(pos_score, neg_score)
<ipython-input-39-59df3828f303> in construct_negative_graph(graph, k, etype)
32 return dgl.heterograph(
33 {etype: (neg_src, neg_dst)},
---> 34 num_nodes_dict={ntype: graph.num_nodes(ntype) for ntype in graph.ntypes})
35
36 def compute_loss(pos_score, neg_score):
~/anaconda3/envs/tensorflow2_p36/lib/python3.6/site-packages/dgl/convert.py in heterograph(data_dict, num_nodes_dict, idtype, device)
371 sparse_fmt, arrays = node_tensor_dict[(srctype, etype, dsttype)]
372 g = create_from_edges(sparse_fmt, arrays, srctype, etype, dsttype,
--> 373 num_nodes_dict[srctype], num_nodes_dict[dsttype])
374 rel_graphs.append(g)
375
~/anaconda3/envs/tensorflow2_p36/lib/python3.6/site-packages/dgl/convert.py in create_from_edges(sparse_fmt, arrays, utype, etype, vtype, urange, vrange, row_sorted, col_sorted)
1672 hgidx = heterograph_index.create_unitgraph_from_coo(
1673 num_ntypes, urange, vrange, u, v, ['coo', 'csr', 'csc'],
-> 1674 row_sorted, col_sorted)
1675 else: # 'csr' or 'csc'
1676 indptr, indices, eids = arrays
~/anaconda3/envs/tensorflow2_p36/lib/python3.6/site-packages/dgl/heterograph_index.py in create_unitgraph_from_coo(num_ntypes, num_src, num_dst, row, col, formats, row_sorted, col_sorted)
1143 int(num_ntypes), int(num_src), int(num_dst),
1144 F.to_dgl_nd(row), F.to_dgl_nd(col),
-> 1145 formats, row_sorted, col_sorted)
1146
1147 def create_unitgraph_from_csr(num_ntypes, num_src, num_dst, indptr, indices, edge_ids,
dgl/_ffi/_cython/./function.pxi in dgl._ffi._cy3.core.FunctionBase.__call__()
dgl/_ffi/_cython/./function.pxi in dgl._ffi._cy3.core.FuncCall()
dgl/_ffi/_cython/./base.pxi in dgl._ffi._cy3.core.CALL()
DGLError: [11:11:38] /opt/dgl/include/dgl/./aten/coo.h:115: Check failed: (row)->ctx == (col)->ctx: Expected col to have the same device context as row(cuda:0). But got cpu:0.
Stack trace:
[bt] (0) /home/ec2-user/anaconda3/envs/tensorflow2_p36/lib/python3.6/site-packages/dgl/libdgl.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x4f) [0x7f79a28aee0f]
[bt] (1) /home/ec2-user/anaconda3/envs/tensorflow2_p36/lib/python3.6/site-packages/dgl/libdgl.so(dgl::aten::COOMatrix::CheckValidity() const+0x254) [0x7f79a28b6d64]
[bt] (2) /home/ec2-user/anaconda3/envs/tensorflow2_p36/lib/python3.6/site-packages/dgl/libdgl.so(dgl::UnitGraph::COO::COO(std::shared_ptr<dgl::GraphInterface>, long, long, dgl::runtime::NDArray, dgl::runtime::NDArray, bool, bool)+0x336) [0x7f79a2d51c16]
[bt] (3) /home/ec2-user/anaconda3/envs/tensorflow2_p36/lib/python3.6/site-packages/dgl/libdgl.so(dgl::UnitGraph::CreateFromCOO(long, long, long, dgl::runtime::NDArray, dgl::runtime::NDArray, bool, bool, unsigned char)+0x13a) [0x7f79a2d3e6fa]
[bt] (4) /home/ec2-user/anaconda3/envs/tensorflow2_p36/lib/python3.6/site-packages/dgl/libdgl.so(dgl::CreateFromCOO(long, long, long, dgl::runtime::NDArray, dgl::runtime::NDArray, bool, bool, unsigned char)+0x6e) [0x7f79a2c11b5e]
[bt] (5) /home/ec2-user/anaconda3/envs/tensorflow2_p36/lib/python3.6/site-packages/dgl/libdgl.so(+0x6b44d6) [0x7f79a2c574d6]
[bt] (6) /home/ec2-user/anaconda3/envs/tensorflow2_p36/lib/python3.6/site-packages/dgl/libdgl.so(+0x6b4754) [0x7f79a2c57754]
[bt] (7) /home/ec2-user/anaconda3/envs/tensorflow2_p36/lib/python3.6/site-packages/dgl/libdgl.so(DGLFuncCall+0x48) [0x7f79a2bd4358]
[bt] (8) /home/ec2-user/anaconda3/envs/tensorflow2_p36/lib/python3.6/site-packages/dgl/_ffi/_cy3/core.cpython-36m-x86_64-linux-gnu.so(+0x165ba) [0x7f79901725ba]