Hey, I’m trying to use the ConceptNet (only for english) for a link prediction task and I struggle implementing a DGLDataset.
This is what I have until now taken from the Dataset Tutorial. But it doesn’t seem to work.
class ConceptNetDataset(DGLDataset):
def __init__(self, path, sep):
self.path = path
self.sep = sep
super().__init__(name='concept_net')
self.bidirections = ["RelatedTo","Synonym", "Antonym", "DistinctFrom", "LocatedNear", "SimilarTo", "EtymologicallyRelatedTo"]
def process(self):
data = pd.read_csv(self.path, sep=self.sep)
# get all the entities
nodes = pd.concat([data["e1"], data["e2"]], axis=0).unique()
entities = {y: x for x,y in enumerate(nodes)}
# encode all entities
data["e1"] = data["e1"].apply(lambda x: entities[x])
data["e2"] = data["e2"].apply(lambda x: entities[x])
# encode all entities in the nodes list
encode = lambda x: entities[x]
nodes = [encode(x) for x in nodes]
nodes = np.array(nodes)
# create node labels
node_labels = torch.from_numpy(nodes)
edge_features = torch.from_numpy(data['score'].to_numpy())
edges_src = torch.from_numpy(data['e1'].to_numpy())
edges_dst = torch.from_numpy(data['e2'].to_numpy())
self.graph = dgl.heterograph((edges_src, edges_dst))
self.graph.ndata['label'] = node_labels
self.graph.edata['weight'] = edge_features
def __getitem__(self, i):
return self.graph[i]
def __len__(self):
return 1
I have also taken a look at Hetero-graph Tutorial, but I don’t understand how I would go about creating the DGLDataset.
I thought about creating a dictionary similar to:
graph_data = {
('drug', 'interacts', 'drug'): (th.tensor([0, 1]), th.tensor([1, 2])),
('drug', 'interacts', 'gene'): (th.tensor([0, 1]), th.tensor([2, 3])),
('drug', 'treats', 'disease'): (th.tensor([1]), th.tensor([2]))
}
But I don’t know how to add the edge features.
Any hint/help/suggestion is welcomed.
Thank you!