Hi! currently I’m working on deploying node classification model with GAT to our service.
Our GAT model are made of 4 layers of message passing like below (3 for hidden layers, 1 for output projection). I trained the model with full-graph.
As we cannot fit our huge graph into memory at our inference server, I’m wondering whether following solution would be valid.
- Get 4-hop neighbors subgraph from GraphDB for a target node
- Inference with this subgraph
Is this approach only approximate the inference result of full-graph, as it cannot get message beyond 4-hop neighbors?
Or should I train the model with 4-hop neighbors subgraph instead of full-graph, then inference?
Any suggestions would be really helpful.
Sincerely
Jinwoo
class GAT(nn.Module):
# Reference : https://github.com/dmlc/dgl/blob/master/examples/pytorch/gat/train.py
def __init__(self,
num_layers=3,
in_dim=48,
num_hidden=16,
num_classes=2,
num_heads=3,
num_out_heads=1,
activation=F.relu,
feat_drop=0,
attn_drop=0,
negative_slope=0.2,
residual=False):
super(GAT, self).__init__()
self.heads = ([num_heads] * num_layers) + [num_out_heads]
self.num_layers = num_layers
self.gat_layers = nn.ModuleList()
self.activation = activation
# input projection (no residual)
self.gat_layers.append(GATConv(
in_dim, num_hidden, self.heads[0],
feat_drop, attn_drop, negative_slope, False, self.activation))
# hidden layers
for layer in range(1, num_layers):
# due to multi-head, the in_dim = num_hidden * num_heads
self.gat_layers.append(GATConv(
num_hidden * self.heads[layer - 1], num_hidden, self.heads[layer],
feat_drop, attn_drop, negative_slope, residual, self.activation))
# output projection
self.gat_layers.append(GATConv(
num_hidden * self.heads[-2], num_classes, self.heads[-1],
feat_drop, attn_drop, negative_slope, residual, None))
def forward(self, g, features):
self.g = g
h = features
for layer in range(self.num_layers):
h = self.gat_layers[layer](self.g, h).flatten(1)
# output projection
logits = self.gat_layers[-1](self.g, h).mean(1)
return logits