Need help: Heterogenous graph classification error

Srini · January 9, 2024, 2:58am

Hello,
I am working on heterogeneous graph classification on a list of graphs ( len=1000). I am running into
following error. I used the code provided in DGL graph classification user guide.

ERROR************
RuntimeError Traceback (most recent call last)
in <cell line: 49>()
49 for epoch in range(20):
50 for batched_graph, batched_labels in train_dataloader:
—> 51 logits = model(batched_graph)
52 loss = F.cross_entropy(logits, batched_labels)
53 opt.zero_grad()

11 frames
/usr/local/lib/python3.10/dist-packages/dgl/nn/pytorch/conv/graphconv.py in forward(self, graph, feat, weight, edge_weight)
458 rst = graph.dstdata[“h”]
459 if weight is not None:
→ 460 rst = th.matmul(rst, weight)
461
462 if self._norm in [“right”, “both”]:

RuntimeError: mat1 and mat2 shapes cannot be multiplied (456x1 and 27x64)

**Graph information used is as follows:
Graphs: 1000
nodes= A,B,C,D
Node A has 1 feature
Node A has 1 feature
Node C has 20 feature
Node D has 5 feature
Total features in nodes is 27
canonical_etypes= [(‘A’, ‘part of’, ‘D’), (‘C’, ‘closer’, ‘A’), (‘B’, ‘familiar’, ‘D’), (‘B’, ‘resides’, ‘C’), (‘B’, ‘likes’, ‘A’)]

Code data loader**********

from dgl.dataloading import GraphDataLoader
from torch.utils.data.sampler import SubsetRandomSampler
num_examples = len(dataset)
num_train = int(num_examples * 0.8)
train_sampler = SubsetRandomSampler(torch.arange(num_train))
test_sampler = SubsetRandomSampler(torch.arange(num_train, num_examples))
train_dataloader = GraphDataLoader(
dataset, sampler=train_sampler, batch_size=38, drop_last=False
)
test_dataloader = GraphDataLoader(
dataset, sampler=test_sampler, batch_size=2, drop_last=False
)

Model/Train**********
import dgl.nn.pytorch as dglnn
import torch.nn as nn
import torch.nn.functional as F

class RGCN(nn.Module):

def __init__(self, in_feats, hid_feats, out_feats, rel_names):

    super().__init__()
    self.conv1 = dglnn.HeteroGraphConv({
        rel: dglnn.GraphConv(in_feats, hid_feats)
        for rel in rel_names}, aggregate='sum')

    self.conv2 = dglnn.HeteroGraphConv({
        rel: dglnn.GraphConv(hid_feats, out_feats)
        for rel in rel_names}, aggregate='sum')

def forward(self, graph, inputs):
    # inputs is features of nodes
    h = self.conv1(graph, inputs)
    h = {k: F.relu(v) for k, v in h.items()}
    h = self.conv2(graph, h)
    return h

class HeteroClassifier(nn.Module):

def __init__(self, in_dim, hidden_dim, n_classes, rel_names):
    super().__init__()
    self.rgcn = RGCN(in_dim, hidden_dim, hidden_dim, rel_names)
    self.classify = nn.Linear(hidden_dim, n_classes)

def forward(self, g):
    h = g.ndata['feat']
    h = self.rgcn(g, h)
    with g.local_scope():
        g.ndata['h'] = h

        # Calculate graph representation by average readout.
        hg = 0
        for ntype in g.ntypes:
            hg = hg + dgl.mean_nodes(g, 'h', ntype=ntype)
        return self.classify(hg)

#in_feats: total number of node features in nodes A,B,C,D is 27
#hid_feats: embedding size assumed at 64
#out_feats: Binary classification used 2
etypes= [(‘A’, ‘part of’, ‘D’), (‘C’, ‘part of’, ‘A’), (‘B’, ‘familiar’, ‘D’), (‘B’, ‘resides’, ‘C’), (‘B’, ‘likes’, ‘A’)]

model = HeteroClassifier(27, 64, 2, etypes)
opt = torch.optim.Adam(model.parameters())

for epoch in range(20):
for batched_graph, batched_labels in train_dataloader:
logits = model(batched_graph)
loss = F.cross_entropy(logits, batched_labels)
opt.zero_grad()
loss.backward()
opt.step()

czkkkkkk · January 11, 2024, 1:58am

Hi @Srini, could you have some syntax highlight on your code? It is a bit hard to read. You may use ```python``` to wrap them.

Srini · January 11, 2024, 5:23am

Thank you for the response, i wrapped the code as suggested.

#Graph information used is as follows:
#Graphs: 1000
#nodes= A,B,C,D
#Node A has 1 feature
#Node A has 1 feature
#Node C has 20 feature
#Node D has 5 feature
#Total features in nodes (A,B,C,D) is 27
#canonical_etypes= [(‘A’, ‘part of’, ‘D’), (‘C’, ‘closer’, ‘A’), (‘B’, ‘familiar’, ‘D’), (‘B’, ‘resides’, ‘C’), (‘B’, ‘likes’, ‘A’)]

#code for data loader

from dgl.dataloading import GraphDataLoader
from torch.utils.data.sampler import SubsetRandomSampler

num_examples = len(dataset)
num_train = int(num_examples * 0.8)
train_sampler = SubsetRandomSampler(torch.arange(num_train))
test_sampler = SubsetRandomSampler(torch.arange(num_train, num_examples))
train_dataloader = GraphDataLoader(
dataset, sampler=train_sampler, batch_size=38, drop_last=False)
test_dataloader = GraphDataLoader(
dataset, sampler=test_sampler, batch_size=2, drop_last=False)


#Model/Train

import dgl.nn.pytorch as dglnn
import torch.nn as nn
import torch.nn.functional as F

class RGCN(nn.Module):

def __init__(self, in_feats, hid_feats, out_feats, rel_names):
    super().__init__()
    self.conv1 = dglnn.HeteroGraphConv({  rel: dglnn.GraphConv(in_feats, hid_feats)
        for rel in rel_names}, aggregate='sum')
    self.conv2 = dglnn.HeteroGraphConv({ rel: dglnn.GraphConv(hid_feats, out_feats)
        for rel in rel_names}, aggregate='sum')

def forward(self, graph, inputs):

    # inputs is features of nodes
    h = self.conv1(graph, inputs)
    h = {k: F.relu(v) for k, v in h.items()}
    h = self.conv2(graph, h)
    return h

class HeteroClassifier(nn.Module):

  def __init__(self, in_dim, hidden_dim, n_classes, rel_names):
      super().__init__()
      self.rgcn = RGCN(in_dim, hidden_dim, hidden_dim, rel_names)
      self.classify = nn.Linear(hidden_dim, n_classes)

def forward(self, g):
    h = g.ndata['feat']
    h = self.rgcn(g, h)
    with g.local_scope():
        g.ndata['h'] = h

        # Calculate graph representation by average readout.
        hg = 0

        for ntype in g.ntypes:
            hg = hg + dgl.mean_nodes(g, 'h', ntype=ntype)
        return self.classify(hg)

#in_feats: total number of node features in nodes A,B,C,D is 27
#hid_feats: embedding size assumed at 64
#out_feats: Binary classification used 2

etypes= [(‘A’, ‘part of’, ‘D’), (‘C’, ‘part of’, ‘A’), (‘B’, ‘familiar’, ‘D’), (‘B’, ‘resides’, ‘C’), (‘B’, ‘likes’, ‘A’)]

model = HeteroClassifier(27, 64, 2, etypes)
opt = torch.optim.Adam(model.parameters())

for epoch in range(20):
  for batched_graph, batched_labels in train_dataloader:
    logits = model(batched_graph)
    loss = F.cross_entropy(logits, batched_labels)
    opt.zero_grad()
    loss.backward()
    opt.step()

czkkkkkk · January 18, 2024, 2:29am

Looks like you mass up heterogeneous features and feature dimensions. You said total number of features is 27 but in fact, the node feature for a node type is only for this type. Node features across different types cannot be added up.

system · February 17, 2024, 2:29am

This topic was automatically closed 30 days after the last reply. New replies are no longer allowed.