Hello,
I’m trying to train a Link Prediction model on a Heterograph and use custom negative edges. The negative edges are only for a certain edge type. Everything else should be trained as normal.
I created a CustomNegativeSampler as such. You can see that I pass in ids_to_push
which is a dict consisting of IDs of the nodes that should be negative edges. If the edge type is user_follows
, this dict is indexed and one of the negative edges for this source node is chosen.
"""Negative samplers"""
from collections.abc import Mapping
from dgl import backend as F
from dgl.dataloading.negative_sampler import _BaseNegativeSampler
import torch
import random
class _BaseNegativeSamplerCustom(object):
def _generate(self, g, eids, canonical_etype):
raise NotImplementedError
def __call__(self, g, eids):
if isinstance(eids, Mapping):
eids = {g.to_canonical_etype(k): v for k, v in eids.items()}
neg_pair = {k: self._generate(g, v, k) for k, v in eids.items()}
else:
assert len(g.etypes) == 1, \
'please specify a dict of etypes and ids for graphs with multiple edge types'
neg_pair = self._generate(g, eids, g.canonical_etypes[0])
return neg_pair
class Uniform_Unless_Given(_BaseNegativeSamplerCustom):
def __init__(self, k, ids_to_push=None):
self.k = k
self.ids_to_push = ids_to_push
def _generate(self, g, eids, canonical_etype):
_, c_etype, vtype = canonical_etype
shape = F.shape(eids)
dtype = F.dtype(eids)
ctx = F.context(eids)
# we want to generate k negative examples for each positive
shape = (shape[0] * self.k,)
# return the source and destination ID given the edges
src, _ = g.find_edges(eids, etype=canonical_etype)
# repeat it to get k of it
src = F.repeat(src, self.k, 0)
dst_placeholder = F.randint(shape, dtype, ctx, 0, g.number_of_nodes(vtype))
output_tensor = []
for src_value, dst_value in zip(src, dst_placeholder):
src_value = src_value.data.item()
dst_value = dst_value.data.item()
if self.ids_to_push is not None and src_value in self.ids_to_push and c_etype == 'user_follows':
# choose one of the ids at random
output_tensor.append(random.choice(self.ids_to_push[src_value]))
else:
output_tensor.append(dst_value)
dst = torch.LongTensor(output_tensor)
return src, dst
I then train this model for link prediction in the normal way as discussed in the docs:
negative_sampler_to_use = Uniform_Unless_Given(5, negative_samples)
train_eid_dict = {canonical_etype: torch.arange(g.num_edges(canonical_etype[1]), dtype=torch.int64) for canonical_etype in g.canonical_etypes}
dataloader_lp_for_communities = dgl.dataloading.EdgeDataLoader(overall_graph._g[0], train_eid_dict, sampler, negative_sampler=negative_sampler_to_use, batch_size=args.batch_size, shuffle=True, drop_last=False, pin_memory=True, num_workers=args.num_workers)
When running it, I get an error in my Score Predictor function:
class HeteroScorePredictor(nn.Module):
def forward(self, edge_subgraph, x):
with edge_subgraph.local_scope():
edge_subgraph.ndata['h'] = x
for etype in edge_subgraph.canonical_etypes:
if edge_subgraph.num_edges(etype) <= 0:
continue
edge_subgraph.apply_edges(dgl.function.u_dot_v('h', 'h', 'score'), etype=etype)
return edge_subgraph.edata['score']
Here is the error:
Traceback (most recent call last):
File "/home_directory/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/home_directory/lib/python3.6/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "model_training.py”, line 5189, in running_code
pos_score, neg_score = model(blocks, node_features, g=positive_graph, neg_g=negative_graph)
File "/home_directory/lib/python3.6/site-packages/torch/nn/modules/module.py", line 550, in __call__
result = self.forward(*input, **kwargs)
File "/home_directory/lib/python3.6/site-packages/torch/nn/parallel/distributed.py", line 458, in forward
output = self.module(*inputs[0], **kwargs[0])
File "/home_directory/lib/python3.6/site-packages/torch/nn/modules/module.py", line 550, in __call__
result = self.forward(*input, **kwargs)
File "model_architecture.py", line 338, in forward
pos_score = self.pred(g, x)
File "/home_directory/lib/python3.6/site-packages/torch/nn/modules/module.py", line 550, in __call__
result = self.forward(*input, **kwargs)
File "model_architecture.py", line 320, in forward
edge_subgraph.apply_edges(dgl.function.u_dot_v('h', 'h', 'score'), etype=etype)
File "/home_directory/lib/python3.6/site-packages/dgl/heterograph.py", line 4423, in apply_edges
edata = core.invoke_gsddmm(g, func)
File "/home_directory/lib/python3.6/site-packages/dgl/core.py", line 239, in invoke_gsddmm
y = alldata[func.rhs][func.rhs_field]
File "/home_directory/lib/python3.6/site-packages/dgl/view.py", line 66, in __getitem__
return self._graph._get_n_repr(self._ntid, self._nodes)[key]
File "/home_directory/lib/python3.6/site-packages/dgl/frame.py", line 393, in __getitem__
return self._columns[name].data
yError: 'h'
I’m not sure what the problem is here, and I’m not sure how to debug it.
Thanks in advance for the help!