Need help: Heterogenous graph classification error

Hello,
I am working on heterogeneous graph classification on a list of graphs ( len=1000). I am running into
following error. I used the code provided in DGL graph classification user guide.

ERROR************
RuntimeError Traceback (most recent call last)
in <cell line: 49>()
49 for epoch in range(20):
50 for batched_graph, batched_labels in train_dataloader:
—> 51 logits = model(batched_graph)
52 loss = F.cross_entropy(logits, batched_labels)
53 opt.zero_grad()

11 frames
/usr/local/lib/python3.10/dist-packages/dgl/nn/pytorch/conv/graphconv.py in forward(self, graph, feat, weight, edge_weight)
458 rst = graph.dstdata[“h”]
459 if weight is not None:
→ 460 rst = th.matmul(rst, weight)
461
462 if self._norm in [“right”, “both”]:

RuntimeError: mat1 and mat2 shapes cannot be multiplied (456x1 and 27x64)

**Graph information used is as follows:
Graphs: 1000
nodes= A,B,C,D
Node A has 1 feature
Node A has 1 feature
Node C has 20 feature
Node D has 5 feature
Total features in nodes is 27
canonical_etypes= [(‘A’, ‘part of’, ‘D’), (‘C’, ‘closer’, ‘A’), (‘B’, ‘familiar’, ‘D’), (‘B’, ‘resides’, ‘C’), (‘B’, ‘likes’, ‘A’)]

Code data loader**********

from dgl.dataloading import GraphDataLoader
from torch.utils.data.sampler import SubsetRandomSampler
num_examples = len(dataset)
num_train = int(num_examples * 0.8)
train_sampler = SubsetRandomSampler(torch.arange(num_train))
test_sampler = SubsetRandomSampler(torch.arange(num_train, num_examples))
train_dataloader = GraphDataLoader(
dataset, sampler=train_sampler, batch_size=38, drop_last=False
)
test_dataloader = GraphDataLoader(
dataset, sampler=test_sampler, batch_size=2, drop_last=False
)

Model/Train**********
import dgl.nn.pytorch as dglnn
import torch.nn as nn
import torch.nn.functional as F

class RGCN(nn.Module):

def __init__(self, in_feats, hid_feats, out_feats, rel_names):

    super().__init__()
    self.conv1 = dglnn.HeteroGraphConv({
        rel: dglnn.GraphConv(in_feats, hid_feats)
        for rel in rel_names}, aggregate='sum')

    self.conv2 = dglnn.HeteroGraphConv({
        rel: dglnn.GraphConv(hid_feats, out_feats)
        for rel in rel_names}, aggregate='sum')

def forward(self, graph, inputs):
    # inputs is features of nodes
    h = self.conv1(graph, inputs)
    h = {k: F.relu(v) for k, v in h.items()}
    h = self.conv2(graph, h)
    return h

class HeteroClassifier(nn.Module):

def __init__(self, in_dim, hidden_dim, n_classes, rel_names):
    super().__init__()
    self.rgcn = RGCN(in_dim, hidden_dim, hidden_dim, rel_names)
    self.classify = nn.Linear(hidden_dim, n_classes)

def forward(self, g):
    h = g.ndata['feat']
    h = self.rgcn(g, h)
    with g.local_scope():
        g.ndata['h'] = h

        # Calculate graph representation by average readout.
        hg = 0
        for ntype in g.ntypes:
            hg = hg + dgl.mean_nodes(g, 'h', ntype=ntype)
        return self.classify(hg)

#in_feats: total number of node features in nodes A,B,C,D is 27
#hid_feats: embedding size assumed at 64
#out_feats: Binary classification used 2
etypes= [(‘A’, ‘part of’, ‘D’), (‘C’, ‘part of’, ‘A’), (‘B’, ‘familiar’, ‘D’), (‘B’, ‘resides’, ‘C’), (‘B’, ‘likes’, ‘A’)]

model = HeteroClassifier(27, 64, 2, etypes)
opt = torch.optim.Adam(model.parameters())

for epoch in range(20):
for batched_graph, batched_labels in train_dataloader:
logits = model(batched_graph)
loss = F.cross_entropy(logits, batched_labels)
opt.zero_grad()
loss.backward()
opt.step()

Hi @Srini, could you have some syntax highlight on your code? It is a bit hard to read. You may use ```python``` to wrap them.

Thank you for the response, i wrapped the code as suggested.

#Graph information used is as follows:
#Graphs: 1000
#nodes= A,B,C,D
#Node A has 1 feature
#Node A has 1 feature
#Node C has 20 feature
#Node D has 5 feature
#Total features in nodes (A,B,C,D) is 27
#canonical_etypes= [(‘A’, ‘part of’, ‘D’), (‘C’, ‘closer’, ‘A’), (‘B’, ‘familiar’, ‘D’), (‘B’, ‘resides’, ‘C’), (‘B’, ‘likes’, ‘A’)]

#code for data loader

from dgl.dataloading import GraphDataLoader
from torch.utils.data.sampler import SubsetRandomSampler

num_examples = len(dataset)
num_train = int(num_examples * 0.8)
train_sampler = SubsetRandomSampler(torch.arange(num_train))
test_sampler = SubsetRandomSampler(torch.arange(num_train, num_examples))
train_dataloader = GraphDataLoader(
dataset, sampler=train_sampler, batch_size=38, drop_last=False)
test_dataloader = GraphDataLoader(
dataset, sampler=test_sampler, batch_size=2, drop_last=False)


#Model/Train

import dgl.nn.pytorch as dglnn
import torch.nn as nn
import torch.nn.functional as F

class RGCN(nn.Module):

def __init__(self, in_feats, hid_feats, out_feats, rel_names):
    super().__init__()
    self.conv1 = dglnn.HeteroGraphConv({  rel: dglnn.GraphConv(in_feats, hid_feats)
        for rel in rel_names}, aggregate='sum')
    self.conv2 = dglnn.HeteroGraphConv({ rel: dglnn.GraphConv(hid_feats, out_feats)
        for rel in rel_names}, aggregate='sum')

def forward(self, graph, inputs):

    # inputs is features of nodes
    h = self.conv1(graph, inputs)
    h = {k: F.relu(v) for k, v in h.items()}
    h = self.conv2(graph, h)
    return h

class HeteroClassifier(nn.Module):

  def __init__(self, in_dim, hidden_dim, n_classes, rel_names):
      super().__init__()
      self.rgcn = RGCN(in_dim, hidden_dim, hidden_dim, rel_names)
      self.classify = nn.Linear(hidden_dim, n_classes)

def forward(self, g):
    h = g.ndata['feat']
    h = self.rgcn(g, h)
    with g.local_scope():
        g.ndata['h'] = h

        # Calculate graph representation by average readout.
        hg = 0

        for ntype in g.ntypes:
            hg = hg + dgl.mean_nodes(g, 'h', ntype=ntype)
        return self.classify(hg)

#in_feats: total number of node features in nodes A,B,C,D is 27
#hid_feats: embedding size assumed at 64
#out_feats: Binary classification used 2

etypes= [(‘A’, ‘part of’, ‘D’), (‘C’, ‘part of’, ‘A’), (‘B’, ‘familiar’, ‘D’), (‘B’, ‘resides’, ‘C’), (‘B’, ‘likes’, ‘A’)]

model = HeteroClassifier(27, 64, 2, etypes)
opt = torch.optim.Adam(model.parameters())

for epoch in range(20):
  for batched_graph, batched_labels in train_dataloader:
    logits = model(batched_graph)
    loss = F.cross_entropy(logits, batched_labels)
    opt.zero_grad()
    loss.backward()
    opt.step()

Looks like you mass up heterogeneous features and feature dimensions. You said total number of features is 27 but in fact, the node feature for a node type is only for this type. Node features across different types cannot be added up.

This topic was automatically closed 30 days after the last reply. New replies are no longer allowed.