Hi everyone!
I’m working on a node classification problem using graphs. I’m new to DGL and bothered by the reproducibility problem. This is my code, I wondering whether there is some randomness computation?
I’m using dgl==0.6.1, pytorch==1.9.0. The experiments are running in 3090(24GB)
my seed is fixed as follows:
random.seed(seed)
np.random.seed(seed)
if is_torch_available():
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
# ^^ safe to call this function even if cuda is not available
if is_tf_available():
tf.random.set_seed(seed)
import dgl
import dgl.function as fn
import torch.nn as nn
import torch.nn.functional as F
from typing import List, Union
import torch
from dgl import DGLGraph
gcn_msg=fn.copy_src(src="h",out="m")
gcn_reduce=fn.sum(msg="m",out="h")
class RGCNLayer(nn.Module):
def __init__(self, feat_size, num_rels, activation=None, gated = True):
super(RGCNLayer, self).__init__()
self.feat_size = feat_size
self.num_rels = num_rels
self.activation = activation
self.gated = gated
self.weight = nn.Parameter(torch.Tensor(self.num_rels, self.feat_size, self.feat_size))
# init trainable parameters
nn.init.xavier_uniform_(self.weight,gain=nn.init.calculate_gain('relu'))
if self.gated:
self.gate_weight = nn.Parameter(torch.Tensor(self.num_rels, self.feat_size, 1))
nn.init.xavier_uniform_(self.gate_weight,gain=nn.init.calculate_gain('sigmoid'))
def forward(self, g):
weight = self.weight
gate_weight = self.gate_weight
def message_func(edges):
w = weight[edges.data['rel_type']]
msg = torch.bmm(edges.src['h'].unsqueeze(1), w).squeeze()
msg = msg * edges.data['norm']
if self.gated:
gate_w = gate_weight[edges.data['rel_type']]
gate = torch.bmm(edges.src['h'].unsqueeze(1), gate_w).squeeze().reshape(-1,1)
gate = torch.sigmoid(gate)
msg = msg * gate
return {'msg': msg}
def apply_func(nodes):
h = nodes.data['h']
h = self.activation(h)
return {'h': h}
g.update_all(message_func, fn.sum(msg='msg', out='h'), apply_func)
class RGCNModel(nn.Module):
def __init__(self, h_dim, num_rels, num_hidden_layers=1, gated = True):
super(RGCNModel, self).__init__()
self.h_dim = h_dim
self.num_rels = num_rels
self.num_hidden_layers = num_hidden_layers
self.gated = gated
# create rgcn layers
self.build_model()
def build_model(self):
self.layers = nn.ModuleList()
for _ in range(self.num_hidden_layers):
rgcn_layer = RGCNLayer(self.h_dim, self.num_rels, activation=F.relu, gated = self.gated)
self.layers.append(rgcn_layer)
def forward(self, g):
for layer in self.layers:
layer(g)
rst_hidden = []
for sub_g in dgl.unbatch(g):
rst_hidden.append(sub_g.ndata['h'])
return rst_hidden
class transformerEntailmentWhole(nn.Module):
def __init__(self, config,transformer_encoder):
super().__init__()
self.config = config
self.dropout = nn.Dropout(0.1)
self.relation_key_pair = {'Comment': 0, 'Clarification_question': 1, 'Elaboration': 2, 'Acknowledgement': 3,
'Continuation': 4, 'Explanation': 5, 'Conditional': 6, 'Question-answer_pair': 7,
'Alternation': 8, 'Q-Elab': 9, 'Result': 10, 'Background': 11, 'Narration': 12,
'Correction': 13, 'Parallel': 14, 'Contrast': 15}
# if config.d_model == 1024:
# encoder_layer = TransformerEncoderLayer(config.d_model, 16, 4 * self.config.d_model)
# else:
# encoder_layer = TransformerEncoderLayer(config.d_model, 12, 4 * self.config.d_model)
#
# encoder_norm = nn.LayerNorm(config.d_model)
self.num_decoupling = 1
if config.d_model == 1024:
electra_config = ElectraConfig.from_pretrained("google/electra-large-discriminator", cache_dir=None)
else:
electra_config = ElectraConfig.from_pretrained("google/electra-base-discriminator", cache_dir=None)
self.fuse = FuseLayer(config)
self.localMHA = nn.ModuleList([MHA(electra_config) for _ in range(1)])
self.globalMHA = nn.ModuleList([MHA(electra_config) for _ in range(1)])
self.transformer_encoder = transformer_encoder
self.relation_embeds = nn.Embedding(18, config.d_model)
self.edge_embeds = nn.Embedding(6, config.d_model)
self.GCN = RGCNModel(config.d_model, 6, 1, True)
# self._reset_transformer_parameters()
def _reset_transformer_parameters(self):
r"""Initiate parameters in the transformer model."""
for name, param in self.named_parameters():
if 'transformer' in name and param.dim() > 1:
xavier_uniform_(param)
def forward(self, input_ids,hidden_states, rule_idx, relationInput, user_idx, scenario, entailment_len):
tenc_input, tenc_mask, tenc_input_gcn,tenc_input_rule = [], [], [],[]
rule_mask = []
userinfo_input, rule_input = [], []
for idx in range(len(rule_idx)):
G = dgl.DGLGraph().to(hidden_states.device)
relation = []
edge_type = [] # in total six type of edges
edges = []
relationTemp = relationInput[idx]
rule_idxTemp = rule_idx[idx]
user_idxTemp = user_idx[idx]
for item in relationTemp:
if item['type'] not in relation:
relation.append(item['type'])
G.add_nodes(rule_idxTemp.shape[0] + 1 + len(relation)) # total utterance nodes in the graph
# Graph Construction
for item in relationTemp:
# add default_in and default_out edges
G.add_edges(item['y'], relation.index(item['type']) + rule_idxTemp.shape[0] + 1)
edge_type.append(0)
edges.append([item['y'], relation.index(item['type']) + rule_idxTemp.shape[0] + 1])
# G.edges[item['y'], relation.index(item['type'])+e['entail']['rule_idx'].shape[0]+1].data['rel_type'] = self.edge_embeds(Variable(torch.LongTensor([0,]).to(self.device)))
G.add_edges(relation.index(item['type']) + rule_idxTemp.shape[0] + 1, item['x'])
edge_type.append(1)
edges.append([relation.index(item['type']) + rule_idxTemp.shape[0] + 1, item['x']])
# G.edges[relation.index(item['type'])+e['entail']['rule_idx'].shape[0]+1, item['x']].data['rel_type'] = self.edge_embeds(Variable(torch.LongTensor([1,]).to(self.device)))
# add reverse_out and reverse_in edges
G.add_edges(relation.index(item['type']) + rule_idxTemp.shape[0] + 1, item['y'])
edge_type.append(2)
edges.append([relation.index(item['type']) + rule_idxTemp.shape[0] + 1, item['y']])
# G.edges[relation.index(item['type'])+e['entail']['rule_idx'].shape[0]+1, item['y']].data['rel_type'] = self.edge_embeds(Variable(torch.LongTensor([2,]).to(self.device)))
G.add_edges(item['x'], relation.index(item['type']) + rule_idxTemp.shape[0] + 1)
edge_type.append(3)
edges.append([item['x'], relation.index(item['type']) + rule_idxTemp.shape[0] + 1])
# G.edges[item['x'], relation.index(item['type'])+e['entail']['rule_idx'].shape[0]+1].data['rel_type'] = self.edge_embeds(Variable(torch.LongTensor([3,]).to(self.device)))
# add self edges
for x in range(rule_idxTemp.shape[0] + 1 + len(relation)):
G.add_edges(x, x)
edge_type.append(4)
edges.append([x, x])
# G.edges[x,x].data['rel_type'] = self.edge_embeds(Variable(torch.LongTensor([4,]).to(self.device)))
# add global edges
for x in range(rule_idxTemp.shape[0] + 1 + len(relation)):
if x != rule_idxTemp.shape[0]:
G.add_edges(rule_idxTemp.shape[0], x)
edge_type.append(5)
edges.append([rule_idxTemp.shape[0], x])
# G.edges[x,x].data['rel_type'] = self.edge_embeds(Variable(torch.LongTensor([5,]).to(self.device)))
# add node feature
for i in range(rule_idxTemp.shape[0] + 1 + len(relation)):
if i < rule_idxTemp.shape[0]:
G.nodes[[i]].data['h'] = torch.index_select(hidden_states[idx], 0,
torch.LongTensor([rule_idxTemp[i], ]).to(
hidden_states.device))
elif i == rule_idxTemp.shape[0]:
if scenario[idx] != -1:
G.nodes[[i]].data['h'] = torch.index_select(hidden_states[idx], 0, torch.LongTensor(
[user_idxTemp[1], ]).to(hidden_states.device))
else:
G.nodes[[i]].data['h'] = self.relation_embeds(
Variable(torch.LongTensor([16, ]).to(hidden_states.device)))
else:
index_relation = self.relation_key_pair[relation[i - rule_idxTemp.shape[0] - 1]]
G.nodes[[i]].data['h'] = self.relation_embeds(
Variable(torch.LongTensor([index_relation, ]).to(hidden_states.device)))
edge_norm = []
for e1, e2 in edges:
if e1 == e2:
edge_norm.append(1)
else:
edge_norm.append(1 / (G.in_degrees(e2) - 1))
edge_type = torch.tensor(edge_type).to(hidden_states.device)
edge_norm = torch.tensor(edge_norm).unsqueeze(1).float().to(hidden_states.device)
G.edata.update({'rel_type': edge_type, })
G.edata.update({'norm': edge_norm})
X = self.GCN(G)[0] # [bz, hdim]
tenc_idx = torch.cat([rule_idxTemp, user_idxTemp], dim=-1).to(hidden_states.device)
gcn_user = torch.index_select(hidden_states[idx], 0, user_idxTemp.to(hidden_states.device))
gcn_rule_idx = torch.LongTensor([i for i in range(rule_idxTemp.shape[0])]).to(hidden_states.device)
gcn_rule = torch.index_select(X, 0, gcn_rule_idx)
tenc_input_gcn.append(torch.cat([gcn_rule, gcn_user], dim=0))
tenc_input_ = []
tenc_input_global = []
inp_ = user_idxTemp[0]
inp = input_ids
ruleidx = rule_idxTemp
# construct mask matrix for multihead attention
M1 = M2 = torch.zeros(inp_, inp_)
for id_ in range(len(ruleidx) - 1):
M1[ruleidx[id_]:ruleidx[id_ + 1], ruleidx[id_]:ruleidx[id_ + 1]] = 1.0
M1[ruleidx[-1]:inp_, ruleidx[-1]:inp_] = 1.0
M2 = 1.0 - M1
M1 = (1.0 - M1) * -10000.0
M2 = (1.0 - M2) * -10000.0
M1 = M1.unsqueeze(0).unsqueeze(1)
M2 = M2.unsqueeze(0).unsqueeze(1)
s = [i for i in range(user_idxTemp[0])]
s = torch.LongTensor(s)
rule_selected = torch.index_select(hidden_states[idx], 0, s.to(hidden_states.device))
rule_selected = rule_selected.unsqueeze(0)
local_word_level = self.localMHA[0](rule_selected, rule_selected, attention_mask=M1.to(hidden_states.device))[0]
global_word_level = self.globalMHA[0](rule_selected, rule_selected, attention_mask=M2.to(hidden_states.device))[0]
for t in range(1, self.num_decoupling):
local_word_level = \
self.localMHA[t](local_word_level, local_word_level, attention_mask=M1.to(hidden_states.device))[0]
global_word_level = \
self.globalMHA[t](global_word_level, global_word_level, attention_mask=M2.to(hidden_states.device))[0]
context_word_level = self.fuse(rule_selected, local_word_level, global_word_level)
rule_input.append(
torch.index_select(context_word_level.squeeze(0), 0, rule_idxTemp.to(hidden_states.device)))
userinfo_input.append(torch.index_select(hidden_states[idx], 0, user_idxTemp.to(hidden_states.device)))
for i in rule_input[-1]:
tenc_input_.append(i)
for j in userinfo_input[-1]:
tenc_input_.append(j)
tenc_input_rule.append(torch.Tensor([t.cpu().detach().numpy() for t in tenc_input_]))
tenc_input.append(torch.index_select(hidden_states[idx], 0, tenc_idx))
tenc_mask.append(torch.tensor([False] * tenc_idx.shape[0], dtype=torch.bool))
rule_mask.append(torch.tensor([1] * rule_idxTemp.shape[0], dtype=torch.bool))
tenc_input_gcn_padded = torch.nn.utils.rnn.pad_sequence(tenc_input_gcn).to(hidden_states.device)
tenc_input_padded = torch.nn.utils.rnn.pad_sequence(tenc_input).to(hidden_states.device) # [seqlen, N, dim]
tenc_input_rule_padded = torch.nn.utils.rnn.pad_sequence(tenc_input_rule).to(hidden_states.device)
tenc_mask_padded = torch.nn.utils.rnn.pad_sequence(tenc_mask, batch_first=True, padding_value=True).to(
hidden_states.device)
# tenc_out = self.transformer_encoder(tenc_input_padded, src_key_padding_mask=tenc_mask_padded)
tenc_out_gcn = self.transformer_encoder(tenc_input_gcn_padded, src_key_padding_mask=tenc_mask_padded).transpose(0,1)
tenc_out_rule = self.transformer_encoder(tenc_input_rule_padded, src_key_padding_mask=tenc_mask_padded).transpose(0,1)
return tenc_out_gcn+tenc_out_rule
Thanks a lot!