Custom dataset with multiple CSV

Hey guys it has been a struggle for me to use DGL to do what I want to. It seems simple: I have multiple pairs of CSVs (nodes and edges) and I would like to do a regression node prediction while using some of the graphs as train and other graphs as test.

I am having trouble creating the dataset with multiple CSVs, the DGL documentation seems limited to me and I couldn’t find other examples doing what I want to. Here is what I have so far:

import dgl
from dgl.data import DGLDataset
import torch
import os
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
from pathlib import Path
import dgl.nn as dglnn

print("dgl.__version_",dgl.__version__)
#designPath = ""

class DataSetFromYosys(DGLDataset):
	def __init__( self, designPath ):
		self.path = designPath
		super().__init__(name='mydata_from_yosys')
		

	def process( self ):
		nodes_data = pd.read_csv( self.path / 'gatesToHeat.csv' )
		edges_data = pd.read_csv( self.path / 'DGLedges.csv')
		edges_src = torch.from_numpy(edges_data['Src'].to_numpy())
		edges_dst = torch.from_numpy(edges_data['Dst'].to_numpy())

		self.graph = dgl.graph((edges_src, edges_dst), num_nodes=nodes_data.shape[0])
		self.graph.ndata['type'] = torch.from_numpy(nodes_data['type'].astype('category').cat.codes.to_numpy())
		self.graph.ndata['conCount'] = torch.from_numpy(nodes_data['conCount'].to_numpy())
		
		self.graph.ndata['placementHeat'] = torch.from_numpy (nodes_data['placementHeat'].to_numpy())
		self.graph.ndata['powerHeat'] = torch.from_numpy (nodes_data['powerHeat'].to_numpy())
		self.graph.ndata['routingHeat'] = torch.from_numpy (nodes_data['routingHeat'].to_numpy())
		self.graph.ndata['irDropHeat'] = torch.from_numpy (nodes_data['irDropHeat'].to_numpy())
		
		############
		self.graph.ndata['label'] = self.graph.ndata['placementHeat']
		############

		# If your dataset is a node classification dataset, you will need to assign
		# masks indicating whether a node belongs to training, validation, and test set.
#		n_nodes = nodes_data.shape[0]
#		n_train = int(n_nodes * 0.6)
#		n_val = int(n_nodes * 0.2)
#		train_mask = torch.zeros(n_nodes, dtype=torch.bool)
#		val_mask = torch.zeros(n_nodes, dtype=torch.bool)
#		test_mask = torch.zeros(n_nodes, dtype=torch.bool)
#		train_mask[:n_train] = True
#		val_mask[n_train:n_train + n_val] = True
#		test_mask[n_train + n_val:] = True
#		self.graph.ndata['train_mask'] = train_mask
#		self.graph.ndata['val_mask'] = val_mask
#		self.graph.ndata['test_mask'] = test_mask

	def __getitem__(self, i):
		return self.graph

	def __len__(self):
		return 1




class SAGE(nn.Module):
    def __init__(self, in_feats, hid_feats, out_feats):
        super().__init__()
        self.conv1 = dglnn.SAGEConv(
            in_feats=in_feats, out_feats=hid_feats, aggregator_type='lstm')
        self.conv2 = dglnn.SAGEConv(
            in_feats=hid_feats, out_feats=out_feats, aggregator_type='lstm')

    def forward(self, graph, inputs):
        # inputs are features of nodes
        h = self.conv1(graph, inputs)
        h = F.relu(h)
        h = self.conv2(graph, h)
        return h


def evaluate( model, graph, features, labels, valid_mask, train_mask ):
	model.eval()
	with torch.no_grad():
		logits = model(graph, features)
		logits = logits[valid_mask]
		labelsAux = labels[valid_mask]
		_, indices = torch.max(logits, dim=1)
		correct = torch.sum(indices == labelsAux)
		validAcc = correct.item() * 1.0 / len(labelsAux)
		
		logits = model(graph, features)
		logits = logits[train_mask]
		labels = labels[train_mask]
		_, indices = torch.max(logits, dim=1)
		correct = torch.sum(indices == labels)
		trainAcc = correct.item() * 1.0 / len(labels)
		
		return trainAcc, validAcc
		
def evaluateNoMask( model, graph, features, labels ):
	model.eval()
	with torch.no_grad():
#		logits = model(graph, features)
#		logits = logits[valid_mask]
#		labelsAux = labels[valid_mask]
#		_, indices = torch.max(logits, dim=1)
#		correct = torch.sum(indices == labelsAux)
#		validAcc = correct.item() * 1.0 / len(labelsAux)
		
		logits = model(graph, features)
#		logits = logits[train_mask]
#		labels = labels[train_mask]
		_, indices = torch.max(logits, dim=1)
		correct = torch.sum(indices == labels)
		trainAcc = correct.item() * 1.0 / len(labels)
		
		return trainAcc#, validAcc


def regressionTrain(graph):
#	node_features = graph.ndata['type'][None:1]
	print("\n#################\n### TRAINING ####\n#################\n")
	print("graph.ndata['type']",type(graph.ndata['type']),graph.ndata['type'].shape,graph.ndata['type'].type())
	print("graph.ndata['conCount']",type(graph.ndata['conCount']),graph.ndata['conCount'].shape,graph.ndata['conCount'].type())
	node_features = torch.cat([graph.ndata['type'].float()[:,None], graph.ndata['conCount'].float()[:,None]], dim=1)
	print("node_features",type(node_features),node_features.shape)
	node_labels = graph.ndata['placementHeat'].long()
	node_labels[ node_labels == -1 ] = 0
	train_mask = graph.ndata['train_mask']
	valid_mask = graph.ndata['val_mask']
	test_mask = graph.ndata['test_mask']
	n_features = node_features.shape[1]
	n_labels = int(node_labels.max().item() + 1)

	model = SAGE(in_feats=n_features, hid_feats=100, out_feats=n_labels)
	opt = torch.optim.Adam(model.parameters(), lr=0.03)
	loss_hist = []
	trainAccHist = []
	validAccHist = []
	epochs = 10
	
	for epoch in range( epochs ):
		model.train()
		# forward propagation by using all nodes
		logits = model(graph, node_features)
		# compute loss
		loss = F.cross_entropy( logits[train_mask], node_labels[train_mask] )
		loss_hist.append( loss.item() )
		# compute validation accuracy
		#trainAcc, validAcc  = evaluate( model, graph, node_features, node_labels, valid_mask, train_mask )
		trainAcc  = evaluateNoMask( model, graph, node_features, node_labels )#, valid_mask, train_mask )
		validAcc = 0
		trainAccHist.append( trainAcc )
		validAccHist.append( validAcc )
		# backward propagation
		opt.zero_grad()
		loss.backward()
		opt.step()
	
	print("\n#################\n### END TRAIN ###\n#################\n")
	print( "loss", loss.item(), "trainAcc", trainAcc, "validAcc", validAcc )
	
	fig, ax1 = plt.subplots()
	ax2 = ax1.twinx()
	epochs_list = [i for i in range(epochs)]
	ax1.plot(epochs_list, trainAccHist, label='Training accuracy')
	ax1.plot(epochs_list, validAccHist, label='Validation accuracy')
	ax1.set_ylabel('Accuracy')
	ax1.set_xlabel('epochs')
	ax1.legend()

	ax2.plot(epochs_list, loss_hist, label='Training loss', color = 'g')
#	ax2.plot(epochs_list, val_loss, label='Validation loss')
	ax2.set_ylabel('Loss')
	ax2.set_xlabel('epochs')
	ax2.legend()
	plt.draw()
	plt.show()
	#ax2.savefig(V5_Full_Loss.png)


def printGraph(graph):
#	graph = all_graphs[0][0]
	print( "graph len:\n", graph )
	#graph.ndata['type'] = torch.nn.functional.one_hot(graph.ndata['type'].to(torch.int64))
	#graph.ndata['type'] = torch.from_numpy(graph.ndata['type'].astype('category').cat.codes.to_numpy())

	print("graph:",type(graph))
	print('We have %d nodes.' % graph.number_of_nodes())
	print('We have %d edges.' % graph.number_of_edges())

#	import networkx as nx
#	import matplotlib.pyplot as plt
#	nx_G = graph.to_networkx()
#	pos = nx.kamada_kawai_layout(nx_G)
#	nx.draw(nx_G, pos, with_labels=True, node_color=[[.7, .7, .7]])
#	plt.show()

	print("len graph.ndata:",len(graph.ndata))
	print("type graph.ndata:",type(graph.ndata))

	regressionTrain(graph)
	
	
#all_graphs = []
print("Path.cwd():",Path.cwd())
for designPath in Path( Path.cwd() ).iterdir():
	if designPath.is_dir():
		print("designPath:",designPath)
		dataset = DataSetFromYosys( designPath )
		#all_graphs.append( dataset )
	
#regressionTrain( all_graphs[0][0] )

#print("type(all_graphs[0]):", type(all_graphs[0]))
#print("type(all_graphs[0][0]):", type(all_graphs[0][0]))
#print("\n\n\n\n")
#masterGraph = dgl.batch( [all_graphs[0][0], all_graphs[1][0]])#, all_graphs[2][0]] )
#print( "type(masterGraph):",type(masterGraph) )
#print( "masterGraph.batch_size:", masterGraph.batch_size )
#print( "masterGraph.batch_num_nodes:", masterGraph.batch_num_nodes() )
#print( "masterGraph.batch_num_edges:", masterGraph.batch_num_edges() )


print("len(dataset)", len(dataset))
print("dataset:", dataset)
print("dataset[0]:", dataset[0])
masterGraph = dgl.data.AsNodePredDataset( dataset, [0.8, 0.1, 0.1])#, target_ntype = "type" )
print( "len(masterGraph):",len(masterGraph) )
print( "type(masterGraph):",type(masterGraph) )
print( "type(masterGraph[0]):",type(masterGraph[0]) )
print( "type(masterGraph[1]):",type(masterGraph[1]) )
print( "type(masterGraph[2]):",type(masterGraph[2]) )
print( "type(masterGraph[3]):",type(masterGraph[3]) )
print( "type(masterGraph[4]):",type(masterGraph[4]) )
print( "type(masterGraph[5]):",type(masterGraph[5]) )
print( "type(masterGraph[6]):",type(masterGraph[6]) )
print( "type(masterGraph[7]):",type(masterGraph[7]) )

#regressionTrain( masterGraph[0] )

I have a bunch of questions I couldn’t find the answers myself with the documentation and previous questions in this forum:

  1. How can I properly insert multiple graphs to a DGLdataset? Should I read all CSVs at once and have only one big graph?

  2. Shouldn’t there be a method like “myDataset.insert(myNewGraph)”?

  3. How do I modify the __len__ method to return the proper number of graphs? How do I modify the __getitem__ method to return the proper graph from its index?

  4. Should I use DataLoader instead of what I have? Should I use graph batching? Am I on the right path with my code?

  5. In another discussion a member told me to use AsNodePredDataset — DGL 1.0 documentation, although there are no examples on it and the documentation is superficial. It should help me split the data right? How do I use it, how do I extract the train or test graphs after using AsNodePredDataset?

I am assuming that after properly creating and splitting my dataset it should be easy to mimic everything on the PPI example dgl/train_ppi.py at master · dmlc/dgl · GitHub

According to your description, the scenario is inductive training, similar to the PPI example as you mentioned. In this case, your dataset should maintain a list of graphs.

Here is an example to modify your current dataset:

class DataSetFromYosys(DGLDataset):
    def __init__(self, designPath, mode='train'):
        self.path = designPath
        self.mode = mode
        # graph indices for training, valid, or test split
        if mode == 'train':
            self.graph_indices = list(range(20))
        elif mode == 'valid':
            ...
        else:
            ...
        super().__init__(name='mydata_from_yosys_'+mode)

    def process(self):
        self.graphs = []
        for idx in self.graph_indices:
            graph = self._process_single(idx)
            self.graphs.append(graph)

    def _process_single(idx):
        # your original processing code for a single graph
        ...

    def __getitem__(self, i):
        return self.graphs[i]

    def __len__(self):
        return len(self.graphs)

For the other parts, you can just follow the PPI example to modify the code accordingly. AsNodePredDataset is used in transductive setting instead of the inductive training setting in your case.

Thanks a lot @dyru!! I believe I got unstuck.

Now I am having one trouble trying to modify the PPI example to my situation. Here is my code this far:

import dgl
from dgl.data import DGLDataset
import torch
import os
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
from pathlib import Path
import dgl.nn as dglnn
from dgl.dataloading import GraphDataLoader
from dgl.data.ppi import PPIDataset

print("dgl.__version_",dgl.__version__)
#designPath = ""

class DataSetFromYosys( DGLDataset ):
	def __init__( self, mode='train' ):
		self.graphPaths = []
		print("Path.cwd():",Path.cwd())
		for designPath in Path( Path.cwd() ).iterdir():
			if designPath.is_dir():
				print("designPath:",designPath)
				self.graphPaths.append( designPath )
		self.mode = mode
		super().__init__(name='mydata_from_yosys_'+mode)
		

	def _process_single( self, designPath ):
#				nodes_data = pd.read_csv('/home/gudeh/Desktop/OpenROAD-flow-scripts/flow/myStuff/c17/gatesToHeat.csv')
#				edges_data = pd.read_csv('/home/gudeh/Desktop/OpenROAD-flow-scripts/flow/myStuff/c17/DGLedges.csv')
		nodes_data = pd.read_csv( designPath / 'gatesToHeat.csv' )
		edges_data = pd.read_csv( designPath / 'DGLedges.csv')
		edges_src = torch.from_numpy(edges_data['Src'].to_numpy())
		edges_dst = torch.from_numpy(edges_data['Dst'].to_numpy())
		#edge_features = torch.from_numpy(edges_data['Weight'].to_numpy())

		self.graph = dgl.graph((edges_src, edges_dst), num_nodes=nodes_data.shape[0])
		self.graph.ndata['type'] = torch.from_numpy(nodes_data['type'].astype('category').cat.codes.to_numpy())
		#print("self.graph.ndata['type']!",type(self.graph.ndata['type']), "!!!!", self.graph.ndata['type'].shape, self.graph.ndata['type'].type())
		self.graph.ndata['conCount'] = torch.from_numpy(nodes_data['conCount'].to_numpy())
		
		self.graph.ndata['placementHeat'] = torch.from_numpy (nodes_data['placementHeat'].to_numpy())
		self.graph.ndata['powerHeat'] = torch.from_numpy (nodes_data['powerHeat'].to_numpy())
		self.graph.ndata['routingHeat'] = torch.from_numpy (nodes_data['routingHeat'].to_numpy())
		self.graph.ndata['irDropHeat'] = torch.from_numpy (nodes_data['irDropHeat'].to_numpy())
		
		############
		#self.graph.ndata['label'] = self.graph.ndata['placementHeat']
		############
		#self.graph.edata['weight'] = edge_features

		# If your dataset is a node classification dataset, you will need to assign
		# masks indicating whether a node belongs to training, validation, and test set.
#		n_nodes = nodes_data.shape[0]
#		n_train = int(n_nodes * 0.6)
#		n_val = int(n_nodes * 0.2)
#		train_mask = torch.zeros(n_nodes, dtype=torch.bool)
#		val_mask = torch.zeros(n_nodes, dtype=torch.bool)
#		test_mask = torch.zeros(n_nodes, dtype=torch.bool)
#		train_mask[:n_train] = True
#		val_mask[n_train:n_train + n_val] = True
#		test_mask[n_train + n_val:] = True
#		self.graph.ndata['train_mask'] = train_mask
#		self.graph.ndata['val_mask'] = val_mask
#		self.graph.ndata['test_mask'] = test_mask


	
	def process( self ):
		self.graphs = []
		for path in self.graphPaths:
		    graph = self._process_single( path )
		    self.graphs.append( graph )
            
	def __getitem__( self, i ):
		return self.graph

	def __len__( self ):
		#return 1
		return len( self.graphs )
		

class SAGE( nn.Module ):
    def __init__(self, in_feats, hid_feats, out_feats):
        super().__init__()
        self.conv1 = dglnn.SAGEConv(
            in_feats=in_feats, out_feats=hid_feats, aggregator_type='lstm')
        self.conv2 = dglnn.SAGEConv(
            in_feats=hid_feats, out_feats=out_feats, aggregator_type='lstm')

    def forward(self, graph, inputs):
        # inputs are features of nodes
        h = self.conv1(graph, inputs)
        h = F.relu(h)
        h = self.conv2(graph, h)
        return h


def evaluate( model, graph, features, labels, valid_mask, train_mask ):
	model.eval()
	with torch.no_grad():
		logits = model(graph, features)
		logits = logits[valid_mask]
		labelsAux = labels[valid_mask]
		_, indices = torch.max(logits, dim=1)
		correct = torch.sum(indices == labelsAux)
		validAcc = correct.item() * 1.0 / len(labelsAux)
		
		logits = model(graph, features)
		logits = logits[train_mask]
		labels = labels[train_mask]
		_, indices = torch.max(logits, dim=1)
		correct = torch.sum(indices == labels)
		trainAcc = correct.item() * 1.0 / len(labels)
		
		return trainAcc, validAcc
		
def evaluateNoMask( model, graph, features, labels ):
	model.eval()
	with torch.no_grad():
#		logits = model(graph, features)
#		logits = logits[valid_mask]
#		labelsAux = labels[valid_mask]
#		_, indices = torch.max(logits, dim=1)
#		correct = torch.sum(indices == labelsAux)
#		validAcc = correct.item() * 1.0 / len(labelsAux)
		
		logits = model(graph, features)
#		logits = logits[train_mask]
#		labels = labels[train_mask]
		_, indices = torch.max(logits, dim=1)
		correct = torch.sum(indices == labels)
		trainAcc = correct.item() * 1.0 / len(labels)
		
		return trainAcc#, validAcc


def regressionTrain(graph):
#	node_features = graph.ndata['type'][None:1]
	print("\n#################\n### TRAINING ####\n#################\n")
	#print("graph.ndata['type']",type(graph.ndata['type']),graph.ndata['type'].shape,graph.ndata['type'].type())
	#print("graph.ndata['conCount']",type(graph.ndata['conCount']),graph.ndata['conCount'].shape,graph.ndata['conCount'].type())
	node_features = torch.cat([graph.ndata['type'].float()[:,None], graph.ndata['conCount'].float()[:,None]], dim=1)
	#print("node_features",type(node_features),node_features.shape)
	node_labels = graph.ndata['placementHeat'].long()
	node_labels[ node_labels == -1 ] = 0
	train_mask = graph.ndata['train_mask']
	valid_mask = graph.ndata['val_mask']
	test_mask = graph.ndata['test_mask']
	n_features = node_features.shape[1]
	n_labels = int(node_labels.max().item() + 1)

	model = SAGE(in_feats=n_features, hid_feats=100, out_feats=n_labels)
	opt = torch.optim.Adam(model.parameters(), lr=0.03)
	loss_hist = []
	trainAccHist = []
	validAccHist = []
	epochs = 10
	
	for epoch in range( epochs ):
		model.train()
		# forward propagation by using all nodes
		logits = model(graph, node_features)
		# compute loss
		loss = F.cross_entropy( logits[train_mask], node_labels[train_mask] )
		loss_hist.append( loss.item() )
		# compute validation accuracy
		#trainAcc, validAcc  = evaluate( model, graph, node_features, node_labels, valid_mask, train_mask )
		trainAcc  = evaluateNoMask( model, graph, node_features, node_labels )#, valid_mask, train_mask )
		validAcc = 0
		trainAccHist.append( trainAcc )
		validAccHist.append( validAcc )
		# backward propagation
		opt.zero_grad()
		loss.backward()
		opt.step()
	
	print("\n#################\n### END TRAIN ###\n#################\n")
	print( "loss", loss.item(), "trainAcc", trainAcc, "validAcc", validAcc )
	
	fig, ax1 = plt.subplots()
	ax2 = ax1.twinx()
	epochs_list = [i for i in range(epochs)]
	ax1.plot(epochs_list, trainAccHist, label='Training accuracy')
	ax1.plot(epochs_list, validAccHist, label='Validation accuracy')
	ax1.set_ylabel('Accuracy')
	ax1.set_xlabel('epochs')
	ax1.legend()

	ax2.plot(epochs_list, loss_hist, label='Training loss', color = 'g')
#	ax2.plot(epochs_list, val_loss, label='Validation loss')
	ax2.set_ylabel('Loss')
	ax2.set_xlabel('epochs')
	ax2.legend()
	plt.draw()
	plt.show()
	#ax2.savefig(V5_Full_Loss.png)


def printGraph(graph):
#	graph = all_graphs[0][0]
	print( "graph len:\n", graph )
	#graph.ndata['type'] = torch.nn.functional.one_hot(graph.ndata['type'].to(torch.int64))
	#graph.ndata['type'] = torch.from_numpy(graph.ndata['type'].astype('category').cat.codes.to_numpy())

	print("graph:",type(graph))
	print('We have %d nodes.' % graph.number_of_nodes())
	print('We have %d edges.' % graph.number_of_edges())

#	import networkx as nx
#	import matplotlib.pyplot as plt
#	nx_G = graph.to_networkx()
#	pos = nx.kamada_kawai_layout(nx_G)
#	nx.draw(nx_G, pos, with_labels=True, node_color=[[.7, .7, .7]])
#	plt.show()

	print("len graph.ndata:",len(graph.ndata))
	print("type graph.ndata:",type(graph.ndata))

	regressionTrain(graph)
	

class GAT(nn.Module):
	def __init__(self, in_size, hid_size, out_size, heads):
		super().__init__()
		self.gat_layers = nn.ModuleList()
		# three-layer GAT
		#        self.gat_layers.append(dglnn.GATConv(in_size, hid_size, heads[0], activation=F.elu))
		#        self.gat_layers.append(dglnn.GATConv(hid_size*heads[0], hid_size, heads[1], residual=True, activation=F.elu))
		#        self.gat_layers.append(dglnn.GATConv(hid_size*heads[1], out_size, heads[2], residual=True, activation=None))
		self.gat_layers.append(dglnn.GATConv(in_size, hid_size, heads[0], activation=F.elu, allow_zero_in_degree=True) )
		self.gat_layers.append(dglnn.GATConv(hid_size*heads[0], hid_size, heads[1], residual=True, activation=F.elu) )
		self.gat_layers.append(dglnn.GATConv(hid_size*heads[1], out_size, heads[2], residual=True, activation=None ) )

	def forward(self, g, inputs):
		h = inputs
		for i, layer in enumerate(self.gat_layers):
			print("layer:",i)	
			h = layer(g, h)
			if i == 2:  # last layer 
				h = h.mean(1)
			else:       # other layer(s)
				h = h.flatten(1)
			return h
	

def train(train_dataloader, val_dataloader, device, model):
    # define loss function and optimizer
    loss_fcn = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=5e-3, weight_decay=0)

    # training loop
    for epoch in range(400):
        model.train()
        logits = []
        total_loss = 0
        # mini-batch loop
        for batch_id, batched_graph in enumerate(train_dataloader):
            batched_graph = batched_graph.to(device)
#            features = batched_graph.ndata['feat'].float()
#            labels = batched_graph.ndata['label'].float()
            features = batched_graph.ndata['type'].float()
            labels = batched_graph.ndata['placementHeat'].float()
            logits = model(batched_graph, features)
            loss = loss_fcn(logits, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print("Epoch {:05d} | Loss {:.4f} |". format(epoch, total_loss / (batch_id + 1) ))

        if (epoch + 1) % 5 == 0:
            avg_score = evaluate_in_batches(val_dataloader, device, model) # evaluate F1-score instead of loss
            print("                            Acc. (F1-score) {:.4f} ". format(avg_score))


#dataset = DataSetFromYosys(  )
#print( "DS size:", len(dataset))
#train_dataloader = GraphDataLoader(dataset, batch_size=2)
#print( "data_loader", train_dataloader )

if __name__ == '__main__':
	print(f'Training PPI Dataset with DGL built-in GATConv module.')
	#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	device = torch.device('cpu')

	# load and preprocess datasets
	ppi_dataset = PPIDataset(mode='train')
	ppi_dataloader = GraphDataLoader(ppi_dataset, batch_size=2)
	#    val_dataset = PPIDataset(mode='valid')
	#    test_dataset = PPIDataset(mode='test')
	train_dataset = DataSetFromYosys(  )
	val_dataset   = DataSetFromYosys( mode='valid' )
	test_dataset  = DataSetFromYosys( mode='test' )
#	features = train_dataset[0].ndata['feat']


	print( "\n\n\nppi_dataset",   type(ppi_dataset),  "\n", ppi_dataset)
	print( "ppi_dataloader",   type(ppi_dataloader),  "\n", ppi_dataloader)
	print( "train_dataset", type(train_dataset),"\n", train_dataset)
	

	# create GAT model
#	features = torch.cat( [graph.ndata['type'].float()[:,None], graph.ndata['conCount'].float()[:,None]], dim=1 )
	features = torch.cat( [train_dataset[0].ndata['type'].float()[:,None]], dim=1 )
	in_size = features.shape[1]
#	out_size = train_dataset.num_labels
	node_labels = train_dataset[0].ndata['placementHeat'].long()
	node_labels[ node_labels == -1 ] = 0
	out_size = int(node_labels.max().item() + 1)
	print("in_size",in_size,",  out_size",out_size)
	#model = GAT(in_size, 256, out_size, heads=[4,4,6]).to(device)
	model = SAGE( in_feats = in_size, hid_feats=100, out_feats = out_size )

	# model training
	print('Training...')
	train_dataloader = GraphDataLoader(train_dataset, batch_size=2)
	val_dataloader = GraphDataLoader(val_dataset, batch_size=2)
	print( "\n\ntrain_dataloader", type(train_dataloader),"\n", train_dataloader)
	print( "val_dataloader", type(val_dataloader),"\n", val_dataloader)
	train(train_dataloader, val_dataloader, device, model)

	# test the model
	print('Testing...')
	test_dataloader = GraphDataLoader(test_dataset, batch_size=2)
	avg_score = evaluate_in_batches(test_dataloader, device, model)
	print("Test Accuracy (F1-score) {:.4f}".format(avg_score))




I had to modify the GAT implementation for my regression problem.
I am getting two different errors when trying to use SAGE (my previous version) and GAT (from PPI).

With GAT:

Traceback (most recent call last):
File “/home/gudeh/Desktop/OpenROAD-flow-scripts/flow/myStuff/newRegression.py”, line 322, in
train(train_dataloader, val_dataloader, device, model)
File “/home/gudeh/Desktop/OpenROAD-flow-scripts/flow/myStuff/newRegression.py”, line 265, in train
logits = model(batched_graph, features)
File “/home/gudeh/.local/lib/python3.10/site-packages/torch/nn/modules/module.py”, line 1190, in _call_impl
return forward_call(*input, **kwargs)
File “/home/gudeh/Desktop/OpenROAD-flow-scripts/flow/myStuff/newRegression.py”, line 240, in forward
h = layer(g, h)
File “/home/gudeh/.local/lib/python3.10/site-packages/torch/nn/modules/module.py”, line 1190, in _call_impl
return forward_call(*input, **kwargs)
File “/home/gudeh/.local/lib/python3.10/site-packages/dgl/nn/pytorch/conv/gatconv.py”, line 282, in forward
feat_src = feat_dst = self.fc(h_src).view(
File “/home/gudeh/.local/lib/python3.10/site-packages/torch/nn/modules/module.py”, line 1190, in _call_impl
return forward_call(*input, **kwargs)
File “/home/gudeh/.local/lib/python3.10/site-packages/torch/nn/modules/linear.py”, line 114, in forward
return F.linear(input, self.weight, self.bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x21320 and 1x1024)

And with SAGE:>

Traceback (most recent call last):
File “/home/gudeh/Desktop/OpenROAD-flow-scripts/flow/myStuff/newRegression.py”, line 322, in
train(train_dataloader, val_dataloader, device, model)
File “/home/gudeh/Desktop/OpenROAD-flow-scripts/flow/myStuff/newRegression.py”, line 265, in train
logits = model(batched_graph, features)
File “/home/gudeh/.local/lib/python3.10/site-packages/torch/nn/modules/module.py”, line 1190, in _call_impl
return forward_call(*input, **kwargs)
File “/home/gudeh/Desktop/OpenROAD-flow-scripts/flow/myStuff/newRegression.py”, line 94, in forward
h = self.conv1(graph, inputs)
File “/home/gudeh/.local/lib/python3.10/site-packages/torch/nn/modules/module.py”, line 1190, in _call_impl
return forward_call(*input, **kwargs)
File “/home/gudeh/.local/lib/python3.10/site-packages/dgl/nn/pytorch/conv/sageconv.py”, line 261, in forward
graph.update_all(msg_fn, self._lstm_reducer)
File “/home/gudeh/.local/lib/python3.10/site-packages/dgl/heterograph.py”, line 4895, in update_all
ndata = core.message_passing(g, message_func, reduce_func, apply_node_func)
File “/home/gudeh/.local/lib/python3.10/site-packages/dgl/core.py”, line 372, in message_passing
ndata = invoke_udf_reduce(g, rfunc, msgdata, orig_nid=orig_nid)
File “/home/gudeh/.local/lib/python3.10/site-packages/dgl/core.py”, line 143, in invoke_udf_reduce
bkt_rsts.append(func(nbatch))
File “/home/gudeh/.local/lib/python3.10/site-packages/dgl/nn/pytorch/conv/sageconv.py”, line 176, in _lstm_reducer
_, (rst, _) = self.lstm(m, h)
File “/home/gudeh/.local/lib/python3.10/site-packages/torch/nn/modules/module.py”, line 1190, in _call_impl
return forward_call(*input, **kwargs)
File “/home/gudeh/.local/lib/python3.10/site-packages/torch/nn/modules/rnn.py”, line 765, in forward
raise RuntimeError(msg)
RuntimeError: For unbatched 2-D input, hx and cx should also be 2-D but got (3-D, 3-D) tensors

It seems to be a problem with input shape. I don’t have access to your data. Could you check the input data and make sure the feature tensor has a shape like (N, in_size), where N is the number of nodes.

I was able to fix the problems! I had some indentation errors, some errors with transforming the PPI example to a regression problem, and also I was misinterpreting the variables in_size, I thought it was related to the number of nodes, but it is actually the number of features, which in my situation is only 1.

This topic was automatically closed 30 days after the last reply. New replies are no longer allowed.