Using DGL, I found the following implementation of RCGN. It’s coped from https://github.com/dmlc/dgl/tree/master/examples/pytorch/hgt:
import dgl
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import dgl.function as fn
from dgl.nn.functional import edge_softmax
class HeteroRGCNLayer(nn.Module):
def __init__(self, in_size, out_size, etypes):
super(HeteroRGCNLayer, self).__init__()
# W_r for each relation
self.weight = nn.ModuleDict({
name: nn.Linear(in_size, out_size) for name in etypes
})
def forward(self, G, feat_dict):
# The input is a dictionary of node features for each type
funcs = {}
for srctype, etype, dsttype in G.canonical_etypes:
# Compute W_r * h
Wh = self.weight[etype](feat_dict[srctype])
# Save it in graph for message passing
G.nodes[srctype].data['Wh_%s' % etype] = Wh
# Specify per-relation message passing functions: (message_func, reduce_func).
# Note that the results are saved to the same destination feature 'h', which
# hints the type wise reducer for aggregation.
funcs[etype] = (fn.copy_u('Wh_%s' % etype, 'm'), fn.mean('m', 'h'))
# Trigger message passing of multiple types.
# The first argument is the message passing functions for each relation.
# The second one is the type wise reducer, could be "sum", "max",
# "min", "mean", "stack"
G.multi_update_all(funcs, 'sum')
# return the updated node feature dictionary
return {ntype: G.nodes[ntype].data['h'] for ntype in G.ntypes}
class HeteroRGCN(nn.Module):
def __init__(self, G, in_size, hidden_size, out_size):
super(HeteroRGCN, self).__init__()
# create layers
self.layer1 = HeteroRGCNLayer(in_size, hidden_size, G.etypes)
self.layer2 = HeteroRGCNLayer(hidden_size, out_size, G.etypes)
def forward(self, G, out_key):
input_dict = {ntype: G.nodes[ntype].data['inp'] for ntype in G.ntypes}
h_dict = self.layer1(G, input_dict)
h_dict = {k: F.leaky_relu(h) for k, h in h_dict.items()}
h_dict = self.layer2(G, h_dict)
# get appropriate logits
return h_dict[out_key]
import scipy.io
import urllib.request
import dgl
import math
import numpy as np
from model import *
import argparse
torch.manual_seed(0)
data_url = 'https://data.dgl.ai/dataset/ACM.mat'
data_file_path = '/tmp/ACM.mat'
urllib.request.urlretrieve(data_url, data_file_path)
data = scipy.io.loadmat(data_file_path)
parser = argparse.ArgumentParser(description='Training GNN on ogbn-products benchmark')
parser.add_argument('--n_epoch', type=int, default=200)
parser.add_argument('--n_hid', type=int, default=256)
parser.add_argument('--n_inp', type=int, default=256)
parser.add_argument('--clip', type=int, default=1.0)
parser.add_argument('--max_lr', type=float, default=1e-3)
args = parser.parse_args()
def get_n_params(model):
pp = 0
for p in list(model.parameters()):
nn = 1
for s in list(p.size()):
nn = nn * s
pp += nn
return pp
def train(model, G):
best_val_acc = torch.tensor(0)
best_test_acc = torch.tensor(0)
train_step = torch.tensor(0)
for epoch in np.arange(args.n_epoch) + 1:
model.train()
logits = model(G, 'paper')
# The loss is computed only for labeled nodes.
loss = F.cross_entropy(logits[train_idx], labels[train_idx].to(device))
optimizer.zero_grad()
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
optimizer.step()
train_step += 1
scheduler.step(train_step)
if epoch % 5 == 0:
model.eval()
logits = model(G, 'paper')
pred = logits.argmax(1).cpu()
train_acc = (pred[train_idx] == labels[train_idx]).float().mean()
val_acc = (pred[val_idx] == labels[val_idx]).float().mean()
test_acc = (pred[test_idx] == labels[test_idx]).float().mean()
if best_val_acc < val_acc:
best_val_acc = val_acc
best_test_acc = test_acc
print('Epoch: %d LR: %.5f Loss %.4f, Train Acc %.4f, Val Acc %.4f (Best %.4f), Test Acc %.4f (Best %.4f)' % (
epoch,
optimizer.param_groups[0]['lr'],
loss.item(),
train_acc.item(),
val_acc.item(),
best_val_acc.item(),
test_acc.item(),
best_test_acc.item(),
))
device = torch.device("cuda:0")
G = dgl.heterograph({
('paper', 'written-by', 'author'): data['PvsA'].nonzero(),
('author', 'writing', 'paper'): data['PvsA'].transpose().nonzero(),
('paper', 'citing', 'paper'): data['PvsP'].nonzero(),
('paper', 'cited', 'paper'): data['PvsP'].transpose().nonzero(),
('paper', 'is-about', 'subject'): data['PvsL'].nonzero(),
('subject', 'has', 'paper'): data['PvsL'].transpose().nonzero(),
})
print(G)
pvc = data['PvsC'].tocsr()
p_selected = pvc.tocoo()
# generate labels
labels = pvc.indices
labels = torch.tensor(labels).long()
# generate train/val/test split
pid = p_selected.row
shuffle = np.random.permutation(pid)
train_idx = torch.tensor(shuffle[0:800]).long()
val_idx = torch.tensor(shuffle[800:900]).long()
test_idx = torch.tensor(shuffle[900:]).long()
node_dict = {}
edge_dict = {}
for ntype in G.ntypes:
node_dict[ntype] = len(node_dict)
for etype in G.etypes:
edge_dict[etype] = len(edge_dict)
G.edges[etype].data['id'] = torch.ones(G.number_of_edges(etype), dtype=torch.long) * edge_dict[etype]
# Random initialize input feature
for ntype in G.ntypes:
emb = nn.Parameter(torch.Tensor(G.number_of_nodes(ntype), 256), requires_grad=False)
nn.init.xavier_uniform_(emb)
G.nodes[ntype].data['inp'] = emb
G = G.to(device)
model = HeteroRGCN(G,
in_size=args.n_inp,
hidden_size=args.n_hid,
out_size=labels.max().item()+1).to(device)
optimizer = torch.optim.AdamW(model.parameters())
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, total_steps=args.n_epoch, max_lr=args.max_lr)
print('Training RGCN with #param: %d' % (get_n_params(model)))
train(model, G)
From the training code I gather this has been written to predict new labels for the unlabeled nodes in the graph.
I’m quite new to working with graphs, and graph neural networks and was wondering if anyone could point me to resources that might explain how I can re-write this as a node regression type of model.