Hi, I have some questions about the link prediction task, which is extended from the discussion here. I successfully trained on a single graph via the tutorial document. Now, I want to train on batches of graphs, where the negative sampling needs to be done on batch:
import torch
import dgl
import dgl.function as fn
import torch.nn.functional as F
from dgl.nn.pytorch import HeteroGraphConv
from dgl.nn.pytorch.conv import GraphConv
class HeteroDotProductPredictor(torch.nn.Module):
def forward(self, graph, h, etype):
with graph.local_scope():
graph.ndata['h'] = h['face'] # node type "face"
graph.apply_edges(fn.u_dot_v('h', 'h', 'score'), etype=etype)
return graph.edges[etype].data['score']
class Model(torch.nn.Module):
def __init__(self, in_features, hidden_features, out_features, rel_names):
super().__init__()
self.sage = GNN(in_features, hidden_features, out_features, rel_names)
self.pred = HeteroDotProductPredictor()
def forward(self, g, neg_g, x, etype):
h = self.sage(g, x)
return self.pred(g, h, etype), self.pred(neg_g, h, etype)
class GNN(torch.nn.Module):
def __init__(self, in_feats, hid_feats, out_feats, rel_names):
super().__init__()
self.conv1 = HeteroGraphConv({
rel: GraphConv(in_feats, hid_feats)
for rel in rel_names}, aggregate='mean')
self.conv2 = HeteroGraphConv({
rel: GraphConv(hid_feats, out_feats)
for rel in rel_names}, aggregate='mean')
def forward(self, graph, inputs):
# inputs are features of nodes
# print(inputs["face"].shape)
h = self.conv1(graph, inputs)
#h["face"] = h["face"].reshape(-1, 80)
h = {k: F.relu(v) for k, v in h.items()}
h = self.conv2(graph, h)
return h
def compute_loss(pos_score, neg_score):
# Margin loss
n_edges = pos_score.shape[0]
return (1 - pos_score + neg_score.view(n_edges, -1)).clamp(min=0).mean()
def construct_negative_graph(graph, k, etype):
utype, _, vtype = etype
src, dst = graph.edges(etype=etype)
neg_src = src.repeat_interleave(k)
neg_dst = torch.randint(0, graph.num_nodes(vtype), (len(src) * k,))
return dgl.heterograph(
{etype: (neg_src, neg_dst)},
num_nodes_dict={ntype: graph.num_nodes(ntype) for ntype in graph.ntypes})
# train, val, test = 700, 100, 100 dgl graphs
dataloader = dgl.dataloading.GraphDataLoader(train, batch_size=128, shuffle=True, drop_last=False, num_workers=4)
k = 30
model = Model(train[0].ndata["x"].shape[1], 20, 1, train[0].etypes)
opt = torch.optim.Adam(model.parameters())
for epoch in range(2):
running_loss = 0.
for i, hetero_graph in enumerate(dataloader, 0):
node_features = hetero_graph.nodes["face"].data["x"].float()
node_features = {"face": node_features}
# TODO: negative sampling on batch
negative_graph = construct_negative_graph(hetero_graph, k, ("face", "type3", "face"))
pos_score, neg_score = model(hetero_graph, negative_graph, node_features, ("face", "type3", "face"))
loss = compute_loss(pos_score, neg_score)
opt.zero_grad()
loss.backward()
opt.step()
print(f"iter {i} loss {loss}")
running_loss += loss.item()
print(f'{epoch + 1} loss: {running_loss / 128:.5f}')
My questions are:
- How should I define the
construct_negative_graph()
to adapt to batched graphs obtained in theGraphDataLoader
? For my case, I don’t need to do neighbor sampling since the graphs are relatively small, so I only need the negative sampling. - How to extend the training loop to train for other edge types? My graphs have only one node type and multiple edge types. Should I construct pos/neg graphs for individual edge types and sum up the loss during the training? For example:
pos_score1, neg_score1 = #... neg sampling for edge_type_1
pos_score2, neg_score2 = #... neg sampling for edge_type_2
pos_score3, neg_score3 = #... neg sampling for edge_type_3
loss1 = compute_loss(pos_score1, neg_score1)
loss2 = compute_loss(pos_score2, neg_score2)
loss3 = compute_loss(pos_score3, neg_score3)
loss = loss1 + loss2 + loss3
opt.zero_grad()
loss.backward()
opt.step()
...
Thank you!