I found Information leakage in EdgeDataLoader, how can I avoid this problem?
In detail, it seems that EdgeDataloader does not drop edges to be predicted in block/subgraph sampling.
code:
import torch
import dgl
dgl.seed(0)
from tqdm import tqdm
data_dict = {
('user', 'like', 'post'): (torch.tensor([0, 0, 2]), torch.tensor([0, 1, 2])),
('post', 'liked_by', 'user'): (torch.tensor([0, 1, 2]), torch.tensor([0, 0, 2])),
('user', 'send', 'post'): (torch.tensor([1, 1, 3]), torch.tensor([1, 2, 0])),
('post', 'sent_by', 'user'): (torch.tensor([1, 2, 0]), torch.tensor([1, 1, 3]))
}
g = dgl.heterograph(data_dict)
target_eids = g.edges(etype=('user', 'like', 'post'), form='eid')
val_eid_dict = {('user', 'like', 'post'): target_eids}
sampler = dgl.dataloading.MultiLayerFullNeighborSampler(2)
val_dataloader = dgl.dataloading.EdgeDataLoader(
g, val_eid_dict, sampler,
negative_sampler=dgl.dataloading.negative_sampler.Uniform(1),
batch_size=1,
shuffle=False,
drop_last=False,
num_workers=1)
for _, (input_nodes, positive_graph, negative_graph, blocks) in tqdm(enumerate(val_dataloader)):
positive_graph = positive_graph
negative_graph = negative_graph
break
print(input_nodes)
print(blocks)
print(blocks[0].ndata)
print(blocks[0].edata)
print(blocks[0].edges(etype='like'),blocks[0].edges(etype='liked_by'),blocks[0].edges(etype='send'),blocks[0].edges(etype='sent_by'))
print(blocks[1].ndata)
print(blocks[1].edata)
print(blocks[1].edges(etype='like'),blocks[1].edges(etype='liked_by'),blocks[1].edges(etype='send'),blocks[1].edges(etype='sent_by'))
print(positive_graph)
print(positive_graph.edata)
print(positive_graph.ndata)
output
{'post': tensor([0, 1]), 'user': tensor([0, 3, 1])}
[Block(num_src_nodes={'post': 2, 'user': 3},
num_dst_nodes={'post': 2, 'user': 2},
num_edges={('post', 'liked_by', 'user'): 2, ('post', 'sent_by', 'user'): 1, ('user', 'like', 'post'): 2, ('user', 'send', 'post'): 2},
metagraph=[('post', 'user', 'liked_by'), ('post', 'user', 'sent_by'), ('user', 'post', 'like'), ('user', 'post', 'send')]), Block(num_src_nodes={'post': 2, 'user': 2},
num_dst_nodes={'post': 1, 'user': 1},
num_edges={('post', 'liked_by', 'user'): 2, ('post', 'sent_by', 'user'): 0, ('user', 'like', 'post'): 1, ('user', 'send', 'post'): 1},
metagraph=[('post', 'user', 'liked_by'), ('post', 'user', 'sent_by'), ('user', 'post', 'like'), ('user', 'post', 'send')])]
defaultdict(<class 'dict'>, {'_ID': {'post': tensor([0, 1]), 'user': tensor([0, 3, 1])}})
defaultdict(<class 'dict'>, {'_ID': {('post', 'liked_by', 'user'): tensor([0, 1]), ('post', 'sent_by', 'user'): tensor([0]), ('user', 'like', 'post'): tensor([0, 1]), ('user', 'send', 'post'): tensor([0, 1])}})
(tensor([0, 0]), tensor([0, 1])) (tensor([0, 1]), tensor([0, 0])) (tensor([1, 2]), tensor([0, 1])) (tensor([0]), tensor([1]))
defaultdict(<class 'dict'>, {'_ID': {'post': tensor([0, 1]), 'user': tensor([0, 3])}})
defaultdict(<class 'dict'>, {'_ID': {('post', 'liked_by', 'user'): tensor([0, 1]), ('post', 'sent_by', 'user'): tensor([], dtype=torch.int64), ('user', 'like', 'post'): tensor([0]), ('user', 'send', 'post'): tensor([0])}})
(tensor([0]), tensor([0])) (tensor([0, 1]), tensor([0, 0])) (tensor([1]), tensor([0])) (tensor([], dtype=torch.int64), tensor([], dtype=torch.int64))
Graph(num_nodes={'post': 1, 'user': 1},
num_edges={('post', 'liked_by', 'user'): 0, ('post', 'sent_by', 'user'): 0, ('user', 'like', 'post'): 1, ('user', 'send', 'post'): 0},
metagraph=[('post', 'user', 'liked_by'), ('post', 'user', 'sent_by'), ('user', 'post', 'like'), ('user', 'post', 'send')])
defaultdict(<class 'dict'>, {'_ID': {('post', 'liked_by', 'user'): tensor([], dtype=torch.int64), ('post', 'sent_by', 'user'): tensor([], dtype=torch.int64), ('user', 'like', 'post'): tensor([0]), ('user', 'send', 'post'): tensor([], dtype=torch.int64)}})
defaultdict(<class 'dict'>, {'_ID': {'post': tensor([0]), 'user': tensor([0])}})
In the above case, I’d like to predict edge user0 like post0
, however, in the sampled blocks, user0 like post0
existed, in other words, all models learned representation of user0
and post0
with the relation preserved.