First of all, I am a beginner in the field of graph neural networks. The purpose of my code is to realize the classification of heterogeneous graphs, where there exists a different number of nodes and edges per graph, and, of course, a different type of nodes and edges appearing in each graph.
I used my own dataset, and currently did data processing to generate a DGL heterogeneous graph dataset containing all heterogeneous graphs, the dataset contains heterogeneous graphs and labeled binary groups.
Now I try to train with the official documented RGCN model classification code for heterogeneous graphs, but I get the following error: dgl.ffi.base.DGLError: [19:08:58] C:\Users\Administrator\DGL_scripts\release\win-64\dgl\src\ graph. /heterograph.h:67: Check failed: meta_graph->HasVertex(vtype): Invalid vertex type: 1.
Part of the training code:
def collate(samples):
graphs, labels = map(list, zip(*samples))
return dgl.batch(graphs), torch.tensor(labels)
class RGCN(nn.Module):
def __init__(self, in_feats, hid_feats, out_feats, rel_names):
super().__init__()
self.conv1 = dglnn.HeteroGraphConv({
rel: dglnn.GraphConv(in_feats, hid_feats)
for rel in rel_names}, aggregate='sum')
self.conv2 = dglnn.HeteroGraphConv({
rel: dglnn.GraphConv(hid_feats, out_feats)
for rel in rel_names}, aggregate='sum')
def forward(self, graph, inputs):
h = self.conv1(graph, inputs)
h = {k: F.relu(v) for k, v in h.items()}
h = self.conv2(graph, h)
return h
class HeteroClassifier(nn.Module):
def __init__(self, in_dim, hidden_dim, n_classes, rel_names):
super().__init__()
self.rgcn = RGCN(in_dim, hidden_dim, hidden_dim, rel_names)
self.classify = nn.Linear(hidden_dim, n_classes)
def forward(self, g):
h = g.ndata['feat']
h = self.rgcn(g, h)
with g.local_scope():
g.ndata['h'] = h
hg = 0
for ntype in g.ntypes:
hg = hg + dgl.mean_nodes(g, 'h', ntype=ntype)
return self.classify(hg)
with open("./data/raw_trace_label_list.txt", "r") as traceLabelFile:
result = traceLabelFile.read().splitlines()
for index_label, item_label in enumerate(result):
MyDataset.append((MyGraph[index_label], torch.tensor([int(item_label)])))
print(MyDataset)
dataloader = GraphDataLoader(
MyDataset,
batch_size=1024,
drop_last=False,
shuffle=True,
collate_fn=collate)
etypes = ['excall', 'incall', 'ethertrans', 'tokentrans', 'suicide', 'create']
model = HeteroClassifier(10, 20, 5, etypes)
opt = torch.optim.Adam(model.parameters())
for epoch in range(20):
for batched_graph, labels in dataloader:
logits = model(batched_graph)
loss = F.cross_entropy(logits, labels)
opt.zero_grad()
loss.backward()
opt.step()
print(loss.item())
Each heterograph is of the following form:
(Graph(num_nodes={‘eoa’: 3},
num_edges={(‘eoa’, ‘excall’, ‘eoa’): 1, (‘eoa’, ‘incall’, ‘eoa’): 2},
metagraph=[(‘eoa’, ‘eoa’, ‘excall’), (‘eoa’, ‘eoa’, ‘incall’)]), tensor([11]))