I try to replace this function by random walk with metapath. However, seems not perfect.
because random walk create much more edges than metapath_reachable_graphs. I still cannot figure it out. the coding logic is:
There are definitely duplicated edges in the random walk. I removed them directly using (set). Assuming there is a pattern like author-paper-author, such as APAPAPAP, I will extract the set from 1357, then consider 3 as a neighbor of 1, and 5 as a neighbor of 3.
#%%
from dgl.data.rdf import AIFBDataset
from collections import defaultdict
import dgl
from tqdm import tqdm
import pickle
import torch
dataset = AIFBDataset()
g = dataset[0]
prefix = '../data/'
metapaths = [
[('Personen', 'ontology#name', '_Literal'),('_Literal', 'rev-ontology#name', 'Personen')],
[('Personen', 'rev-ontology#author', 'Publikationen'), ('Publikationen', 'ontology#author', 'Personen')],
[('Personen', 'rev-ontology#member', 'Projekte'), ('Projekte', 'ontology#member', 'Personen')]]
def build_and_update_neighbors_dict(traces, dict):
for path in traces:
for i in range(2, len(path), 2): # start and ends nodes should be 1,3,5,7 ... in randomwalk
src, dst = path[i-2].item(), path[i].item()
# 确保 self-loop
if src != -1:
dict[src].add(src)
if dst != -1:
dict[dst].add(dst)
# add to dictionary
if src != -1 and dst != -1:
dict[src].add(dst)
dict[dst].add(src)
final_neighbors_dict = defaultdict(set)
for i, metapath in tqdm(enumerate(metapaths)):
metapath_neighbors_dict = defaultdict(set)
# create subgraph
# etypes_of_interest = metapath
# sg = g.edge_type_subgraph(etypes_of_interest)
srctype = g.to_canonical_etype(metapath[0])[0]
nodes = g.nodes(srctype)
traces, _ = dgl.sampling.random_walk(
g,
nodes=nodes,
metapath=metapath * 100
)
build_and_update_neighbors_dict(traces, dict=metapath_neighbors_dict)
build_and_update_neighbors_dict(traces, dict=final_neighbors_dict)
sorted_dict_metapath = {k: metapath_neighbors_dict[k] for k in sorted(metapath_neighbors_dict)}
with open(prefix + '{}_adjlists.pickle'.format(i+1), 'wb') as f:
pickle.dump(sorted_dict_metapath, f)
f.close()
sorted_dict = {k: final_neighbors_dict[k] for k in sorted(final_neighbors_dict)}
with open(prefix + 'homo_adjlists.pickle', 'wb') as f:
pickle.dump(sorted_dict, f)
f.close()