If I set the edge_types
to 8, the grad
of the bias
or weight
of ggnn.ggcn.linears.5
or ggnn.ggcn.linears.3
is None
, when I try to see the change of the gradient of the model in the process of training the model.
This is the test code:
import torch
import torch.nn.functional as F
from dgl.nn.pytorch import GatedGraphConv
import dgl
import numpy as np
class MyGGCN(torch.nn.Module):
def __init__(self, in_feats, out_feats):
super(MyGGCN, self).__init__()
self.ggcn = GatedGraphConv(in_feats=in_feats, out_feats=out_feats, n_etypes=8, n_steps=5)
def forward(self, g, feat, edge_types):
X = self.ggcn(g, feat, edge_types)
return X
class ConvBlock(torch.nn.Module):
def __init__(self, kernel_h, emb_size, max_line):
super(ConvBlock, self).__init__()
self.cnn = torch.nn.Conv1d(in_channels=emb_size, out_channels=10, kernel_size=kernel_h)
self.max_pool = torch.nn.MaxPool1d(kernel_size=(max_line - kernel_h + 1))
def forward(self, X):
X = self.cnn(X.squeeze(1).permute(0, 2, 1))
X = F.relu(X)
# X = X.squeeze(-1)
X = self.max_pool(X)
X = X.squeeze(-1)
return X
class MyTextCNN(torch.nn.Module):
def __init__(self, emb_size, max_line):
super(MyTextCNN, self).__init__()
self.block2 = ConvBlock(3, emb_size, max_line)
self.block3 = ConvBlock(4, emb_size, max_line)
self.block4 = ConvBlock(5, emb_size, max_line)
def forward(self, X):
X = X.unsqueeze(1)
X_2 = self.block2(X)
X_3 = self.block3(X)
X_4 = self.block4(X)
X = torch.cat([X_2, X_3, X_4], dim=1)
return X
class AssembleModel(torch.nn.Module):
def __init__(self):
super(AssembleModel, self).__init__()
self.cnn_1 = MyTextCNN(300, 200)
self.cnn_2 = MyTextCNN(300, 500)
self.ggnn = MyGGCN(300, 300)
self.dropout=torch.nn.Dropout(0.3)
# self.fc1=torch.nn.Linear(99,10)
self.fc = torch.nn.Linear(30 * 2 + 300, 2)
def forward(self, X_1, X_2, X_3):
X_1 = np.array(X_1)
X_2 = np.array(X_2)
X_1 = torch.tensor(X_1, dtype=torch.float)
X_2 = torch.tensor(X_2, dtype=torch.float)
X_1 = self.cnn_1(X_1)
X_1=self.dropout(X_1)
X_2 = self.cnn_2(X_2)
X_3 = self.ggnn(X_3, X_3.ndata["h"], X_3.edata["e"])
X_3=X_3[0:10]
# X_3=self.fc1(X_3)
X_3 = self.dropout(X_3)
new_X_1 = []
new_X_2 = []
for idx in range(X_1.shape[0]):
if idx < 10:
new_X_1.append(X_1[idx])
new_X_2.append(X_2[idx])
else:
break
X_1=torch.stack(new_X_1,0)
X_2 = torch.stack(new_X_2, 0)
X = torch.cat([X_1, X_2, X_3], dim=1)
out = self.fc(X)
return out
device = torch.device("cuda")
model = AssembleModel()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=1e-3)
X_1 = []
X_2 = []
for idx in range(20):
X_1.append(np.random.random((200, 300)))
X_2.append(np.random.random((500, 300)))
# X_1 = torch.randn(10, 200, 300)
# X_2 = torch.randn(10, 500, 300)
X_3 = dgl.graph(([34, 96, 79, 13, 46, 11, 98, 68, 23, 50, 57, 4, 42, 85, 38, 19, 52, 96, 97, 42, 3, 79, 21, 4, 72, 97, 38, 34, 4, 33, 2, 67, 4, 4, 4, 24, 19, 57, 75, 4, 2, 41, 39, 26, 34, 35, 4, 54, 26, 2, 75, 49, 34, 77, 30, 24, 31, 60, 50, 40, 41, 4, 40, 79, 2, 14, 43, 34, 68, 52, 60, 4, 26, 31, 79, 19, 34, 24, 42, 34, 2, 2, 54, 17, 77, 34, 21, 51, 23, 41, 43, 16, 41, 51, 89, 34, 34, 34, 4, 23, 23, 23, 34, 49, 4, 95, 9, 4, 23, 73, 43, 76, 49, 86, 72, 67, 51, 23, 0, 9, 17, 79, 9, 92, 26, 7, 38, 96, 17, 98, 71, 64, 31, 74, 21, 20, 44, 92, 96, 11, 76, 40, 76, 35, 26, 7, 3, 97, 4, 26, 26, 23, 38, 92, 39, 81, 34, 77, 43, 39, 30, 4, 26, 10, 4, 67, 91, 75, 44, 7, 3, 4, 50, 23, 38, 17, 45, 0, 57, 60, 68, 42, 49, 0, 85, 19, 74, 51, 7, 52, 41, 87, 3, 19, 30, 35, 70, 72, 2, 79, 4, 4, 2, 41, 79, 85, 2, 38, 4, 34, 79, 4, 34, 49], [0, 78, 68, 60, 68, 42, 34, 52, 28, 73, 27, 25, 28, 28, 3, 74, 28, 28, 2, 80, 28, 26, 32, 38, 77, 81, 98, 70, 79, 75, 91, 40, 41, 89, 19, 28, 43, 95, 66, 34, 87, 28, 25, 94, 13, 79, 28, 85, 57, 28, 33, 28, 64, 24, 91, 3, 86, 19, 22, 67, 21, 77, 30, 45, 56, 76, 31, 23, 46, 68, 18, 51, 33, 28, 63, 31, 60, 77, 2, 19, 62, 79, 26, 47, 8, 51, 41, 64, 31, 49, 5, 97, 53, 70, 41, 74, 7, 28, 7, 43, 60, 74, 31, 34, 96, 57, 46, 44, 93, 50, 92, 10, 71, 72, 83, 87, 43, 48, 6, 69, 28, 88, 45, 23, 75, 13, 11, 17, 42, 31, 0, 7, 39, 23, 10, 54, 28, 37, 50, 16, 29, 28, 14, 52, 95, 23, 73, 40, 2, 55, 28, 58, 17, 7, 51, 35, 43, 96, 28, 43, 59, 42, 85, 21, 43, 15, 30, 85, 34, 64, 24, 23, 96, 61, 49, 96, 9, 34, 33, 13, 84, 65, 76, 71, 75, 77, 19, 82, 12, 9, 77, 67, 72, 90, 87, 20, 51, 86, 40, 52, 14, 17, 67, 19, 46, 57, 30, 1, 26, 36, 9, 37, 86, 41]))
X_3.ndata['h'] = torch.randn(99, 30)
X_3.edata["e"]=torch.tensor([0, 0, 7, 1, 1, 0, 0, 1, 2, 0, 0, 2, 6, 1, 0, 6, 1, 2, 0, 0, 2, 7, 0, 0, 1, 0, 0, 7, 2, 1, 7, 1, 2, 4, 2, 2, 2, 0, 0, 2, 7, 2, 0, 0, 7, 0, 0, 0, 7, 2, 0, 2, 7, 6, 0, 6, 1, 1, 0, 0, 0, 2, 0, 7, 0, 1, 6, 7, 0, 0, 0, 2, 7, 2, 0, 2, 7, 0, 7, 7, 0, 7, 0, 0, 0, 7, 1, 1, 2, 6, 0, 0, 0, 0, 2, 7, 7, 2, 2, 6, 0, 0, 7, 2, 2, 1, 1, 4, 0, 1, 0, 1, 1, 1, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 7, 1, 0, 6, 6, 0, 1, 1, 0, 6, 0, 0, 2, 0, 0, 0, 0, 6, 0, 0, 7, 2, 1, 0, 2, 0, 1, 0, 0, 0, 0, 0, 7, 2, 2, 0, 0, 2, 7, 1, 2, 0, 1, 1, 2, 0, 0, 2, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 1, 0, 0, 1, 0, 1, 0, 7, 7, 1, 2, 7, 2, 7, 0, 7, 0, 2, 0, 7, 2, 1, 0])
label = torch.randint(0, 2, (10,))
model.train()
optimizer.zero_grad()
preds = model(X_1, X_2, X_3)
# preds=torch.tensor(preds)
loss = criterion(preds, label)
loss.backward()
optimizer.step()
for name, param in model.named_parameters():
try:
print(name, param.grad.abs().sum())
except:
print("error: {} no grad".format(name))
Can you help me? Thanks!