Hello!
I am trying to implement a LADIES layer based sampler using the DGL library.
Essentially, this is done by calculating the probability of two nodes being in the same neighbourhood via the laplacian and then sampling based on this probability.
I implement (a very simplified one layer version) using:
class LADIESSampler(dgl.dataloading.Sampler):
def __init__(self, nodes_per_layer, laplacian):
super().__init__()
self.fanouts = nodes_per_layer
self.laplacian = laplacian
def get_layer_nodes(self, g, seed_nodes, nodes_per_layer):
U = self.laplacian[seed_nodes, :]
pi = np.array(np.sum(U.multiply(U), axis=0))[0]
p = pi / np.sum(pi)
s_num = np.min([np.sum(p > 0), nodes_per_layer])
selected_nodes = np.random.choice(g.num_nodes(), s_num, p = p, replace = False)
all_nodes = torch.concat((torch.tensor(selected_nodes), seed_nodes))
print("selected", selected_nodes, "entire layer", all_nodes, p)
return all_nodes
def sample_blocks(self, g, seed_nodes):
print("seed nodes", seed_nodes)
blocks = []
all_nodes = self.get_layer_nodes(g, torch.tensor(seed_nodes), 2)
frontier2 = dgl.node_subgraph(g, all_nodes)
frontier=frontier2
eid = frontier.edata[dgl.EID]
block = dgl.to_block(frontier, seed_nodes)
block.edata[dgl.EID] = eid
seed_nodes = block.srcdata[dgl.NID]
blocks.insert(0, block)
return seed_nodes, output_nodes, blocks
But I get the following error:
DGLError Traceback (most recent call last)
Input In [228], in <cell line: 1>()
----> 1 s.sample_blocks(g, [2])
Input In [226], in LADIESSampler.sample_blocks(self, g, seed_nodes)
20 frontier=frontier2
21 eid = frontier.edata[dgl.EID]
---> 22 block = dgl.to_block(frontier, seed_nodes)
23 block.edata[dgl.EID] = eid
24 seed_nodes = block.srcdata[dgl.NID]
File ~/.conda/envs/nsv4/lib/python3.9/site-packages/dgl/transforms/functional.py:2286, in to_block(g, dst_nodes, include_dst_in_src, src_nodes)
2282 else:
2283 # use an empty list to signal we need to generate it
2284 src_node_ids_nd = []
-> 2286 new_graph_index, src_nodes_ids_nd, induced_edges_nd = _CAPI_DGLToBlock(
2287 g._graph, dst_node_ids_nd, include_dst_in_src, src_node_ids_nd)
2289 # The new graph duplicates the original node types to SRC and DST sets.
2290 new_ntypes = (g.ntypes, g.ntypes)
File dgl/_ffi/_cython/./function.pxi:293, in dgl._ffi._cy3.core.FunctionBase.__call__()
File dgl/_ffi/_cython/./function.pxi:239, in dgl._ffi._cy3.core.FuncCall()
DGLError: [11:26:58] /opt/dgl/src/graph/transform/to_bipartite.cc:119: Check failed: new_dst.Ptr<IdType>()[i] != -1 (-1 vs. -1) : Node 0 does not exist in `rhs_nodes`. Argument `rhs_nodes` must contain all the edge destination nodes.
To recreate this error, run:
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import torch
import dgl
#create baby graph
edges = [(1, 2), (1,3), (1,7), (2,5), (2,8), (3,11), (3,12), (3,4),
(4,1), (4,5), (4,12), (5,4), (5,6), (5,2), (7,13), (10, 9),
(10, 11), (11, 12)]
src = [e[0]-1 for e in edges]
dst = [e[1]-1 for e in edges]
#visualize the graph
G = dgl.to_networkx(g)
nx.draw(G, arrows = None, with_labels = True) # networkx draw()
#make sure its undirected
g = dgl.to_bidirected(dgl.graph((src, dst)))
#initialize sampler
s = LADIESSampler([2], lap_matrix)
#try to sample a block
s.sample_blocks(g, [2])
Any advice on what I’m doing wrong? I have a feeling that it’s either the graph creation or the process for making it a block.
Thank you in advance!