Hi there! I am trying to perform a simple training on a GCN but it has been really challenging for me. I tried to use two code snippets from the tutorial on DGL, here is what I am trying.
You can find my input CSVs in this link: DGL doubts - Google Sheets
import dgl
from dgl.data import DGLDataset
import torch
import os
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
#root="aes_cipher_top/"
root="myDataSet/"
class fromYosysDS(DGLDataset):
def __init__(self):
super().__init__(name='mydata_from_yosys')
def process(self):
#nodes_data = pd.read_csv(root+'aes_DGLcells_labeled.csv')
nodes_data = pd.read_csv('gatesToHeat.csv')
# node_labels = torch.from_numpy(nodes_data['ConnCount'].to_numpy())
# node_features = torch.from_numpy(nodes_data['type'].astype('category').cat.codes.to_numpy())
#edges_data = pd.read_csv(root+'aes_DGLedges.csv')
edges_data = pd.read_csv('DGLedges.csv')
edges_src = torch.from_numpy(edges_data['Src'].to_numpy())
edges_dst = torch.from_numpy(edges_data['Dst'].to_numpy())
#edge_features = torch.from_numpy(edges_data['Weight'].to_numpy())
print("nodes_data.shape[0]",nodes_data.shape[0])
self.graph = dgl.graph((edges_src, edges_dst), num_nodes=nodes_data.shape[0])
self.graph.ndata['type'] = torch.from_numpy(nodes_data['type'].astype('category').cat.codes.to_numpy())
print("classes:",self.graph.ndata['type'])
self.graph.ndata['conCount'] = torch.from_numpy(nodes_data['conCount'].to_numpy())
#self.graph.ndata['label'] = torch.from_numpy(nodes_data['congestion'].to_numpy())
self.graph.ndata['placementHeat'] = torch.from_numpy (nodes_data['placementHeat'].to_numpy())
self.graph.ndata['powerHeat'] = torch.from_numpy (nodes_data['powerHeat'].to_numpy())
self.graph.ndata['routingHeat'] = torch.from_numpy (nodes_data['routingHeat'].to_numpy())
self.graph.ndata['irDropHeat'] = torch.from_numpy (nodes_data['irDropHeat'].to_numpy())
#self.graph.edata['weight'] = edge_features
# If your dataset is a node classification dataset, you will need to assign
# masks indicating whether a node belongs to training, validation, and test set.
n_nodes = nodes_data.shape[0]
n_train = int(n_nodes * 0.6)
n_val = int(n_nodes * 0.2)
train_mask = torch.zeros(n_nodes, dtype=torch.bool)
val_mask = torch.zeros(n_nodes, dtype=torch.bool)
test_mask = torch.zeros(n_nodes, dtype=torch.bool)
train_mask[:n_train] = True
val_mask[n_train:n_train + n_val] = True
test_mask[n_train + n_val:] = True
self.graph.ndata['train_mask'] = train_mask
self.graph.ndata['val_mask'] = val_mask
self.graph.ndata['test_mask'] = test_mask
def __getitem__(self, i):
return self.graph
def __len__(self):
return 1
from dgl.nn import GraphConv
class GCN(nn.Module):
def __init__(self, in_feats, h_feats, num_classes):
super(GCN, self).__init__()
self.conv1 = GraphConv(in_feats, h_feats)
self.conv2 = GraphConv(h_feats, num_classes)
def forward(self, g, in_feat):
h = self.conv1(g, in_feat)
h = F.relu(h)
h = self.conv2(g, h)
return h
def train(g, model):
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
g = dgl.add_self_loop(g)
best_val_acc = 0
best_test_acc = 0
features = g.ndata['type']
labels = g.ndata['placementHeat']
train_mask = g.ndata['train_mask']
val_mask = g.ndata['val_mask']
test_mask = g.ndata['test_mask']
for e in range(100):
# Forward
logits = model(g, features)
# Compute prediction
pred = logits.argmax(1)
# Compute loss
# Note that you should only compute the losses of the nodes in the training set.
loss = F.cross_entropy(logits[train_mask], labels[train_mask])
# Compute accuracy on training/validation/test
train_acc = (pred[train_mask] == labels[train_mask]).float().mean()
val_acc = (pred[val_mask] == labels[val_mask]).float().mean()
test_acc = (pred[test_mask] == labels[test_mask]).float().mean()
# Save the best validation accuracy and the corresponding test accuracy.
if best_val_acc < val_acc:
best_val_acc = val_acc
best_test_acc = test_acc
# Backward
optimizer.zero_grad()
loss.backward()
optimizer.step()
if e % 5 == 0:
print('In epoch {}, loss: {:.3f}, val acc: {:.3f} (best {:.3f}), test acc: {:.3f} (best {:.3f})'.format(
e, loss, val_acc, best_val_acc, test_acc, best_test_acc))
dataset = fromYosysDS()
print("dataset:",dataset)
graph = dataset[0]
print("graph:",graph,type(graph))
#print("graph.ndata:",graph.ndata)
print("len graph.ndata:",len(graph.ndata))
print("type:",type(graph.ndata))
print("\n\n\nmodel:")
# Create the model with given dimensions
#model = GCN( graph.ndata['type'].shape[0], 365, 31 )
model = GCN( 1, 1, 31 )
print("model type:",type(model))
print("\n\n\ntrain:")
train(graph, model)
I have a lot of questions I can’t figure out by myself:
I can’t make sense of the feature size and other parameters on line “model = GCN ( 1, 1, 31 )”. It seems The original code has a different datatype for “graph.ndata”. In my case I would like my features to be from columns “type” and “conCount”. I tried doing a slice with: “features = g.ndata[:1]” but it seems I can’t slice this type. I wanted to do only one feature from column “type” at a first moment, but even like this is not working
Also I would like to be able to choose between the four possible labels from columns with “Heat” in their names. To do so I would have to set a different value on the last parameter on "GCN ( …, …, … ), correct? On the original code it was “dataset.num_classes”, although with my generated dataset there is no such method “num_classes”.
The tutorial is awkward, I don’t understand why there is no training code for the user generated data: Make Your Own Dataset — DGL 0.9.1post1 documentation
Another awkward thing with the documentation is that when I do “type(graph)” I get “<class ‘dgl.view.HeteroNodeDataView’>” although there is no such type on the DGL documentation. This makes it really hard to learn how to code with the library by myself!!