Hello
I created a sampling function to use negative sampling loss.
However, the following error appeared in a specific sample.
“Expect number of features to match number of nodes(len(u)). Got 4 and 3 instead.”
for example, tensor([29, 0, 2, 0]) , tensor([31,5, 31, 2])
I think there is an error only if the same node is sampled. (not sure)
Here is my code.
===================================================================
class NegativeSamplingDataset(Dataset):
def __init__(self, g: dgl.DGLGraph):
self.g = g
def __len__(self):
return self.g.number_of_nodes()
def __getitem__(self, seed):
seed = torch.tensor(seed)
pos_item = self.g.successors(seed)
neg_item = torch.tensor(choice(setdiff1d(self.g.nodes(), pos_item), 1)[0])
pos_item = torch.tensor(choice(pos_item, 1)[0])
return seed, pos_item, neg_item
class MinibatchSampler(object):
def __init__(self, g, num_fanouts):
"""
num_fanouts : list of fanouts on each layer.
"""
self.g = dgl.graph(g.to_networkx()) # full graph
self.num_fanouts = num_fanouts
# g.readonly()
# self.neg_sampler = EdgeSampler(self.g, 1)
def sample(self, batch):
seeds, pos_items, neg_items = zip(*batch)
seeds = torch.stack(seeds)
print(seeds)
pos_items = torch.stack(pos_items)
print(pos_items)
neg_items = torch.stack(neg_items)
print(neg_items)
target_blocks = []
pos_blocks = []
neg_blocks = []
for fanout in reversed(self.num_fanouts):
if fanout >= self.g.number_of_nodes():
sampled_graph = dgl.in_subgraph(self.g, seeds) # reture a subgraph contained all the edges of the seed node
pos_sampled_graph = dgl.in_subgraph(self.g, pos_items)
neg_sampled_graph = dgl.in_subgraph(self.g, neg_items)
else:
sampled_graph = dgl.sampling.sample_neighbors(self.g, seeds, fanout)
pos_sampled_graph = dgl.sampling.sample_neighbors(self.g, pos_items, fanout)
neg_sampled_graph = dgl.sampling.sample_neighbors(self.g, neg_items, fanout)
print("sampled_graph")
print(sampled_graph)
print("pos_sampled_graph")
print(pos_sampled_graph)
print("neg_sampled_graph")
print(neg_sampled_graph)
sampled_block = dgl.to_block(sampled_graph, seeds)
print("sampled_block")
pos_sampled_block = dgl.to_block(pos_sampled_graph, pos_items)
print("pos_sampled_block")
neg_sampled_block = dgl.to_block(neg_sampled_graph, neg_items)
seeds = sampled_block.srcdata[dgl.NID]
pos_items = pos_sampled_block.srcdata[dgl.NID]
neg_items = neg_sampled_block.srcdata[dgl.NID]
target_blocks.insert(0, sampled_block)
pos_blocks.insert(0, pos_sampled_block)
neg_blocks.insert(0, neg_sampled_block)
return target_blocks, pos_blocks, neg_blocks
===============================================================
dataset = NegativeSamplingDataset(g)
sampler = MinibatchSampler(g, [2,2])
train_dataloader = DataLoader(dataset, batch_size=4, collate_fn=sampler.sample, shuffle=True)
next(iter(train_dataloader)) # error appeared in a specific sample!!!
the function for which the error ooccurs is “dgl.to_block(----)”.
Help me, please…