To_block in negative sampling

Hello :slight_smile:

I created a sampling function to use negative sampling loss.

However, the following error appeared in a specific sample.

“Expect number of features to match number of nodes(len(u)). Got 4 and 3 instead.”

for example, tensor([29, 0, 2, 0]) , tensor([31,5, 31, 2])

I think there is an error only if the same node is sampled. (not sure)

Here is my code.

class NegativeSamplingDataset(Dataset):

def __init__(self, g: dgl.DGLGraph):
    self.g = g

def __len__(self):
    return self.g.number_of_nodes()

def __getitem__(self, seed):
    seed = torch.tensor(seed)
    pos_item = self.g.successors(seed)
    neg_item = torch.tensor(choice(setdiff1d(self.g.nodes(), pos_item), 1)[0])
    pos_item = torch.tensor(choice(pos_item, 1)[0])
    return seed, pos_item, neg_item

class MinibatchSampler(object):

def __init__(self, g, num_fanouts):


    num_fanouts : list of fanouts on each layer.


    self.g = dgl.graph(g.to_networkx()) # full graph


    self.num_fanouts = num_fanouts

    # g.readonly()

    # self.neg_sampler = EdgeSampler(self.g, 1) 


def sample(self, batch):

    seeds, pos_items, neg_items = zip(*batch)

    seeds = torch.stack(seeds)


    pos_items = torch.stack(pos_items)


    neg_items = torch.stack(neg_items)      


    target_blocks = []

    pos_blocks = []

    neg_blocks = []

    for fanout in reversed(self.num_fanouts):

        if fanout >= self.g.number_of_nodes():

            sampled_graph = dgl.in_subgraph(self.g, seeds) # reture a subgraph contained all the edges of the seed node

            pos_sampled_graph = dgl.in_subgraph(self.g, pos_items)

            neg_sampled_graph = dgl.in_subgraph(self.g, neg_items)


            sampled_graph = dgl.sampling.sample_neighbors(self.g, seeds, fanout)

            pos_sampled_graph = dgl.sampling.sample_neighbors(self.g, pos_items, fanout)

            neg_sampled_graph = dgl.sampling.sample_neighbors(self.g, neg_items, fanout)







        sampled_block = dgl.to_block(sampled_graph, seeds)


        pos_sampled_block = dgl.to_block(pos_sampled_graph, pos_items)


        neg_sampled_block = dgl.to_block(neg_sampled_graph, neg_items)

        seeds = sampled_block.srcdata[dgl.NID] 

        pos_items = pos_sampled_block.srcdata[dgl.NID]

        neg_items = neg_sampled_block.srcdata[dgl.NID]

        target_blocks.insert(0, sampled_block)

        pos_blocks.insert(0, pos_sampled_block)

        neg_blocks.insert(0, neg_sampled_block)

    return target_blocks, pos_blocks, neg_blocks

dataset = NegativeSamplingDataset(g)
sampler = MinibatchSampler(g, [2,2])
train_dataloader = DataLoader(dataset, batch_size=4, collate_fn=sampler.sample, shuffle=True)
next(iter(train_dataloader)) # error appeared in a specific sample!!!

the function for which the error ooccurs is “dgl.to_block(----)”.

:joy: :joy: :joy: Help me, please…

Why do you want to do neighbor sampling for negative sampling? For negative sampling, I think you can simply sample random nodes uniformly?

Can you provide a code snippet such that I can run the code myself to reproduce the error?