To_block in negative sampling

yeji · July 17, 2020, 8:19am

Hello

I created a sampling function to use negative sampling loss.

However, the following error appeared in a specific sample.

“Expect number of features to match number of nodes(len(u)). Got 4 and 3 instead.”

for example, tensor([29, 0, 2, 0]) , tensor([31,5, 31, 2])

I think there is an error only if the same node is sampled. (not sure)

Here is my code.

===================================================================
class NegativeSamplingDataset(Dataset):

def __init__(self, g: dgl.DGLGraph):
    self.g = g

def __len__(self):
    return self.g.number_of_nodes()

def __getitem__(self, seed):
    seed = torch.tensor(seed)
    pos_item = self.g.successors(seed)
    neg_item = torch.tensor(choice(setdiff1d(self.g.nodes(), pos_item), 1)[0])
    pos_item = torch.tensor(choice(pos_item, 1)[0])
    return seed, pos_item, neg_item

class MinibatchSampler(object):

def __init__(self, g, num_fanouts):

    """

    num_fanouts : list of fanouts on each layer.

    """

    self.g = dgl.graph(g.to_networkx()) # full graph

    

    self.num_fanouts = num_fanouts

    # g.readonly()

    # self.neg_sampler = EdgeSampler(self.g, 1) 

    

def sample(self, batch):

    seeds, pos_items, neg_items = zip(*batch)

    seeds = torch.stack(seeds)

    print(seeds)

    pos_items = torch.stack(pos_items)

    print(pos_items)

    neg_items = torch.stack(neg_items)      

    print(neg_items) 

    target_blocks = []

    pos_blocks = []

    neg_blocks = []

    for fanout in reversed(self.num_fanouts):

        if fanout >= self.g.number_of_nodes():

            sampled_graph = dgl.in_subgraph(self.g, seeds) # reture a subgraph contained all the edges of the seed node

            pos_sampled_graph = dgl.in_subgraph(self.g, pos_items)

            neg_sampled_graph = dgl.in_subgraph(self.g, neg_items)

       
        else:

            sampled_graph = dgl.sampling.sample_neighbors(self.g, seeds, fanout)

            pos_sampled_graph = dgl.sampling.sample_neighbors(self.g, pos_items, fanout)

            neg_sampled_graph = dgl.sampling.sample_neighbors(self.g, neg_items, fanout)

            print("sampled_graph")

            print(sampled_graph)

            print("pos_sampled_graph")

            print(pos_sampled_graph)

            print("neg_sampled_graph")

            print(neg_sampled_graph)

        sampled_block = dgl.to_block(sampled_graph, seeds)

        print("sampled_block")

        pos_sampled_block = dgl.to_block(pos_sampled_graph, pos_items)

        print("pos_sampled_block")

        neg_sampled_block = dgl.to_block(neg_sampled_graph, neg_items)

        seeds = sampled_block.srcdata[dgl.NID] 

        pos_items = pos_sampled_block.srcdata[dgl.NID]

        neg_items = neg_sampled_block.srcdata[dgl.NID]


        target_blocks.insert(0, sampled_block)

        pos_blocks.insert(0, pos_sampled_block)

        neg_blocks.insert(0, neg_sampled_block)

    return target_blocks, pos_blocks, neg_blocks

===============================================================
dataset = NegativeSamplingDataset(g)
sampler = MinibatchSampler(g, [2,2])
train_dataloader = DataLoader(dataset, batch_size=4, collate_fn=sampler.sample, shuffle=True)
next(iter(train_dataloader)) # error appeared in a specific sample!!!

the function for which the error ooccurs is “dgl.to_block(----)”.

Help me, please…

mufeili · July 17, 2020, 9:29am

Why do you want to do neighbor sampling for negative sampling? For negative sampling, I think you can simply sample random nodes uniformly?

mufeili · July 20, 2020, 7:20am

Can you provide a code snippet such that I can run the code myself to reproduce the error?