Building a Graphormer Model

I am trying to build a graphormer model with GraphormerLayers. This is how the model looks and the class file for reference. But, I’m facing issue when trying to write the forward function. I’m not sure how to use the attr_bias or mask variables. I’ve checked couple of blogs and repos, but still couldn’t get a clear idea. Can someone please help with this ?

import numpy as np
import torch as th
import dgl
from dgl.nn import NNConv, GraphormerLayer
from dgl.nn import nn

class Graphormer(th.nn.Module):
    def __init__(self, gnn_layers, num_feats, n_classes, hidden, num_edge_feats, activation, num_heads, final_activation, dropout):
        super(Graphormer, self).__init__()
        self._gnn_layers = gnn_layers
        self._num_feats = num_feats
        self._n_classes = n_classes
        self._num_hiden_features = hidden
        self.activation = activation
        self._num_edge_feats = num_edge_feats
        self._final_activation = final_activation
        self._num_heads = num_heads
        self.dropout = dropout


    def build_model(self):
        self.layers = nn.ModuleList()
        # input to hidden
        i2h = self.build_input_layer()
        # hidden to hidden
        for i in range(self._gnn_layers - 2):
            h2h = self.build_hidden_layer(i)
        # hidden to output
        h2o = self.build_output_layer()

    def build_input_layer(self):
        print('Building an INPUT  layer of {}x{}'.format(self._num_feats, self._num_hiden_features[0]))
        return GraphormerLayer(self._num_feats, self._num_hiden_features[0], self._num_heads, self.dropout, activation=self.activation)

    def build_hidden_layer(self, i):
        print('Building an HIDDEN  layer of {}x{}'.format(self._num_hiden_features[i], self._num_hiden_features[i+1]))
        return GraphormerLayer(self._num_hiden_features[i], self._num_hiden_features[i+1], self._num_heads, self.dropout, activation=self.activation)

    def build_output_layer(self):
        print('Building an OUTPUT  layer of {}x{}'.format(self._num_hiden_features[-1], self._n_classes))
        return GraphormerLayer(self._num_hiden_features[-1], self._n_classes, self._num_heads, self.dropout, activation=self._final_activation)

    def edge_function(f_in, f_out):
        a = int(f_in*0.666 + f_out*0.334)
        b = int(f_in*0.334 + f_out*0.666)
        return th.nn.Sequential(
            th.nn.Linear(f_in, a),
            th.nn.Linear(a, b),
            th.nn.Linear(b, f_out)

    def set_g(self, g):
        self.g = g
        for l in range(self._gnn_layers):
            self.layers[l].g = g

    def forward(self, graph, feat, efeat):

        # x = self.input_layer(graph)

        a, b = feat.edges()
        feat_t = th.stack([a, b], dim=0)
        x = graph

        for idx, layer in enumerate(self.layers):
            x = layer(x, efeat, feat)
            x = x.flatten(1)            
            x = self.activation(x)
            print(idx, ' iter done')
            print('before: ', x.shape)
            # x = x.reshape(1, -1, self._num_hiden_features[idx])
            # print('after: ', x.shape)

        # x = self.output_layer(x)
        if self._final_activation is not None:
            logits = self._final_activation(x)
            logits = x

        return logits

Right now the shape of the input tensor doesn’t change after each layer. The model looks as follows:

  (activation): ELU()
  (final_activation): ReLU()
  (gnn_object): Graphormer(
    (activation): ELU()
    (_final_activation): ReLU()
    (layers): ModuleList(
      (0): GraphormerLayer(
        (attn): BiasedMHA(
          (q_proj): Linear(in_features=42, out_features=42, bias=True)
          (k_proj): Linear(in_features=42, out_features=42, bias=True)
          (v_proj): Linear(in_features=42, out_features=42, bias=True)
          (out_proj): Linear(in_features=42, out_features=42, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
        (ffn): Sequential(
          (0): Linear(in_features=42, out_features=42, bias=True)
          (1): ELU()
          (2): Dropout(p=0.1, inplace=False)
          (3): Linear(in_features=42, out_features=42, bias=True)
          (4): Dropout(p=0.1, inplace=False)
        (dropout): Dropout(p=0.1, inplace=False)
        (attn_layer_norm): LayerNorm((42,), eps=1e-05, elementwise_affine=True)
        (ffn_layer_norm): LayerNorm((42,), eps=1e-05, elementwise_affine=True)
      (1): GraphormerLayer(
        (attn): BiasedMHA(
          (q_proj): Linear(in_features=42, out_features=42, bias=True)
          (k_proj): Linear(in_features=42, out_features=42, bias=True)
          (v_proj): Linear(in_features=42, out_features=42, bias=True)
          (out_proj): Linear(in_features=42, out_features=42, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
        (ffn): Sequential(
          (0): Linear(in_features=42, out_features=35, bias=True)
          (1): ELU()
          (2): Dropout(p=0.1, inplace=False)
          (3): Linear(in_features=35, out_features=42, bias=True)
          (4): Dropout(p=0.1, inplace=False)
        (dropout): Dropout(p=0.1, inplace=False)
        (attn_layer_norm): LayerNorm((42,), eps=1e-05, elementwise_affine=True)
        (ffn_layer_norm): LayerNorm((42,), eps=1e-05, elementwise_affine=True)
      (2): GraphormerLayer(
        (attn): BiasedMHA(
          (q_proj): Linear(in_features=35, out_features=35, bias=True)
          (k_proj): Linear(in_features=35, out_features=35, bias=True)
          (v_proj): Linear(in_features=35, out_features=35, bias=True)
          (out_proj): Linear(in_features=35, out_features=35, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
        (ffn): Sequential(
          (0): Linear(in_features=35, out_features=28, bias=True)
          (1): ELU()
          (2): Dropout(p=0.1, inplace=False)
          (3): Linear(in_features=28, out_features=35, bias=True)
          (4): Dropout(p=0.1, inplace=False)
        (dropout): Dropout(p=0.1, inplace=False)
        (attn_layer_norm): LayerNorm((35,), eps=1e-05, elementwise_affine=True)
        (ffn_layer_norm): LayerNorm((35,), eps=1e-05, elementwise_affine=True)
      (3): GraphormerLayer(
        (attn): BiasedMHA(
          (q_proj): Linear(in_features=28, out_features=28, bias=True)
          (k_proj): Linear(in_features=28, out_features=28, bias=True)
          (v_proj): Linear(in_features=28, out_features=28, bias=True)
          (out_proj): Linear(in_features=28, out_features=28, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
        (ffn): Sequential(
          (0): Linear(in_features=28, out_features=14, bias=True)
          (1): ELU()
          (2): Dropout(p=0.1, inplace=False)
          (3): Linear(in_features=14, out_features=28, bias=True)
          (4): Dropout(p=0.1, inplace=False)
        (dropout): Dropout(p=0.1, inplace=False)
        (attn_layer_norm): LayerNorm((28,), eps=1e-05, elementwise_affine=True)
        (ffn_layer_norm): LayerNorm((28,), eps=1e-05, elementwise_affine=True)
      (4): GraphormerLayer(
        (attn): BiasedMHA(
          (q_proj): Linear(in_features=7, out_features=7, bias=True)
          (k_proj): Linear(in_features=7, out_features=7, bias=True)
          (v_proj): Linear(in_features=7, out_features=7, bias=True)
          (out_proj): Linear(in_features=7, out_features=7, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
        (ffn): Sequential(
          (0): Linear(in_features=7, out_features=2, bias=True)
          (1): ReLU()
          (2): Dropout(p=0.1, inplace=False)
          (3): Linear(in_features=2, out_features=7, bias=True)
          (4): Dropout(p=0.1, inplace=False)
        (dropout): Dropout(p=0.1, inplace=False)
        (attn_layer_norm): LayerNorm((7,), eps=1e-05, elementwise_affine=True)
        (ffn_layer_norm): LayerNorm((7,), eps=1e-05, elementwise_affine=True)

This is what the logs in forward call and error looks like:

0  iter done
after:  torch. Size([1, 5793, 42])
1  iter done
after:  torch. Size([1, 5793, 42])

RuntimeError: mat1 and mat2 shapes cannot be multiplied (5793x42 and 35x35)

In GraphormerLayer, the second argument hidden_size means the hidden size of feed forward layers in the Graphormer layer. The output feature size will be the same with the input feature size. So in your model, the mismatch happens at the forward from layer 1 (size 42) to layer 2 (size 35).

This topic was automatically closed 30 days after the last reply. New replies are no longer allowed.