Great, thank you. I apologise if I have confused my project by asking questions about the individual modules. Below, I have included my model in its entirety. The features on each edge/node were normalised in the csv that I used to construct the graphs in the first place.

```
import torch
from dgl.dataloading import GraphDataLoader
import dgl
import torch
import torch.nn as nn
import dgl.function as fn
import torch.nn.functional as F
class InitialEmbedModule(nn.Module):
def __init__(
self,
node_feat_dims: dict[str, int],
edge_feat_dims: dict[str, int],
node_embed_dims: dict[str, int],
edge_embed_dims: dict[str, int],
global_dim: int
):
super(InitialEmbedModule, self).__init__()
# Node embeddings
self.race_embed = nn.Sequential(
nn.Linear(node_feat_dims["race"], node_embed_dims["race"]),
nn.BatchNorm1d(node_embed_dims["race"])
)
self.horse_embed = nn.Sequential(
nn.Linear(node_feat_dims["horse"], node_embed_dims["horse"]),
nn.BatchNorm1d(node_embed_dims["horse"])
)
self.runner_embed = nn.Sequential(
nn.Linear(node_feat_dims["runner"] + global_dim, node_embed_dims["runner"]),
nn.BatchNorm1d(node_embed_dims["runner"])
)
# Edge embeddings
self.run_horse_embed = nn.Sequential(
nn.Linear(edge_feat_dims["run"], edge_embed_dims["run"]),
nn.BatchNorm1d(edge_embed_dims["run"])
)
self.run_runner_embed = nn.Sequential(
nn.Linear(edge_feat_dims["run"], edge_embed_dims["run"]),
nn.BatchNorm1d(edge_embed_dims["run"])
)
def forward(self, g: dgl.DGLHeteroGraph, global_context: torch.tensor):
# Create node embeddings
race_embed = self.race_embed(g.nodes["race"].data["features"])
g.nodes["race"].data["h"] = race_embed
horse_embed = self.horse_embed(g.nodes["horse"].data["features"])
g.nodes["horse"].data["h"] = horse_embed
def expand_global_to_edges(batched_graph, global_context):
node_counts = batched_graph.batch_num_nodes("runner")
expanded_context = []
for i, count in enumerate(node_counts):
expanded_context.append(global_context[i].expand(count, -1))
return torch.cat(expanded_context, dim=0)
global_context_expanded = expand_global_to_edges(g, global_context)
runner_embed = self.runner_embed(torch.concat([g.nodes["runner"].data["features"],
global_context_expanded], dim=-1))
g.nodes["runner"].data["h"] = runner_embed
# Create canonical edge embeddings
run_horse_embed = self.run_horse_embed(g.edges[("horse", "run", "race")].data["features"])
g.edges[("horse", "run", "race")].data["h"] = run_horse_embed
run_runner_embed = self.run_runner_embed(g.edges[("race", "run", "runner")].data["features"])
g.edges[("race", "run", "runner")].data["h"] = run_runner_embed
return g
class RaceUpdateModule(nn.Module):
def __init__(
self,
horse_embed_dim: int,
run_edge_dim: int,
race_embed_dim: int
):
super(RaceUpdateModule, self).__init__()
self.get_node_updates = nn.Sequential(
nn.Linear(horse_embed_dim + run_edge_dim, race_embed_dim),
nn.Dropout(0.4),
nn.ReLU(race_embed_dim),
nn.Linear(race_embed_dim, race_embed_dim),
nn.Dropout(0.4)
)
self.normalise_nodes = nn.BatchNorm1d(race_embed_dim)
def forward(self, g: dgl.DGLHeteroGraph):
def concat_message_func(edges):
return {"concat_features": torch.cat([edges.src["h"], edges.data["h"]], dim=-1)}
def learnable_reduce_func(nodes):
h_mean = torch.mean(nodes.mailbox["concat_features"], dim=1)
return {"h_agg": self.get_node_updates(h_mean)}
g.multi_update_all(
{("horse", "run", "race"): (concat_message_func, learnable_reduce_func)},
"sum"
)
g.nodes["race"].data["h"] = self.normalise_nodes(g.nodes["race"].data["h"]
+ g.nodes["race"].data["h_agg"])
return g
class RunnerUpdateModule(nn.Module):
def __init__(self, runner_embed_dim, race_embed_dim, run_edge_dim):
super(RunnerUpdateModule, self).__init__()
self.get_node_updates = nn.Sequential(
nn.Linear(race_embed_dim + run_edge_dim, runner_embed_dim),
nn.Dropout(0.4),
nn.ReLU(runner_embed_dim),
nn.Linear(runner_embed_dim, runner_embed_dim),
nn.Dropout(0.4)
)
self.normalise_nodes = nn.BatchNorm1d(runner_embed_dim)
attn_input_size = run_edge_dim + race_embed_dim + runner_embed_dim
self.attention_fc = nn.Sequential(
nn.Linear(attn_input_size, attn_input_size // 2),
nn.Dropout(0.4),
nn.ReLU(),
nn.Linear(attn_input_size // 2, 1),
nn.Dropout(0.4),
nn.LeakyReLU()
)
def forward(self, g: dgl.DGLHeteroGraph, graph_metadata):
def compute_attention_score(edges):
concat_features = torch.cat([edges.src["h"], edges.dst["h"], edges.data["h"]], dim=-1)
scores = self.attention_fc(concat_features)
return {"a_raw": scores}
g.apply_edges(compute_attention_score, etype=("race", "run", "runner"))
def message_func(edges):
concat_features = torch.cat([edges.src["h"], edges.data["h"]], dim=-1)
return {"concat_features": concat_features, "a_raw": edges.data["a_raw"], "run_ids": edges.data["run_id"]}
def learnable_reduce_func(nodes):
attn_norm = torch.softmax(nodes.mailbox["a_raw"], dim=1)
h_sum = torch.sum(attn_norm * nodes.mailbox["concat_features"], dim=1)
return {"h_agg": self.get_node_updates(h_sum)}
g.multi_update_all(
{("race", "run", "runner"): (message_func, learnable_reduce_func)},
"sum"
)
g.nodes["runner"].data["h"] = self.normalise_nodes(g.nodes["runner"].data["h"]
+ g.nodes["runner"].data["h_agg"])
return g
class HorseRacePredictionGNN(nn.Module):
def __init__(
self,
node_feat_dims: dict[str, int],
edge_feat_dims: dict[str, int],
node_embed_dims: dict[str, int],
edge_embed_dims: dict[str, int],
global_dim: int,
max_runners: int,
num_race_update_layers: int,
num_runner_update_layers: int
):
super(HorseRacePredictionGNN, self).__init__()
self.max_runners = max_runners
self.initial_embed = InitialEmbedModule(node_feat_dims, edge_feat_dims,
node_embed_dims, edge_embed_dims,
global_dim)
self.update_races_layers = nn.ModuleList([
RaceUpdateModule(node_embed_dims["horse"],
edge_embed_dims["run"],
node_embed_dims["race"])
for _ in range(num_race_update_layers)
])
self.update_runner_layers = nn.ModuleList([
RunnerUpdateModule(node_embed_dims["runner"],
node_embed_dims["race"],
edge_embed_dims["run"])
for _ in range(num_runner_update_layers)
])
class_input_dim = max_runners * node_embed_dims["runner"] + global_dim
self.classifier = nn.Sequential(
nn.Linear(class_input_dim, class_input_dim // 2),
nn.Dropout(0.5),
nn.BatchNorm1d(class_input_dim // 2),
nn.ReLU(),
nn.Linear(class_input_dim // 2, max_runners)
)
def get_ordered_runners_embedding(self, g: dgl.DGLHeteroGraph, max_runners: int) -> torch.Tensor:
num_graphs = g.batch_size
runner_node_batches = g.batch_num_nodes("runner")
runner_box_numbers = g.nodes["runner"].data["run_box_number"]
runner_embeddings = g.nodes["runner"].data["h"]
all_ordered_embeddings = torch.zeros((num_graphs, max_runners, runner_embeddings.size(-1)), dtype=torch.float32,
device=runner_embeddings.device)
for i in range(num_graphs):
batch_start = sum(runner_node_batches[:i])
batch_end = batch_start + runner_node_batches[i]
for j, box_num in enumerate(runner_box_numbers[batch_start:batch_end]):
runner_embedding = runner_embeddings[batch_start + j]
all_ordered_embeddings[i, box_num - 1] = runner_embedding
concatenated_embeddings = all_ordered_embeddings.view(num_graphs, -1)
return concatenated_embeddings
def forward(self, g: dgl.DGLHeteroGraph, global_context: torch.Tensor, graph_metadata):
with g.local_scope():
g = self.initial_embed(g, global_context)
for update_race_layer in self.update_races_layers:
g = update_race_layer(g)
for update_runner_layer in self.update_runner_layers:
g = update_runner_layer(g, graph_metadata)
runners_representation = self.get_ordered_runners_embedding(g, self.max_runners)
output = self.classifier(torch.concat([runners_representation, global_context], dim=-1))
return output
```

Here is what an example graph looks like.

```
Graph(num_nodes={'horse': 884, 'race': 236, 'runner': 8},
num_edges={('horse', 'run', 'race'): 1541, ('race', 'run', 'runner'): 254},
metagraph=[('horse', 'race', 'run'), ('race', 'runner', 'run')])
```

The “current race” refers to the race for which the outcome is being predicted using a graph.

A `horse`

is a historical competitor with static features (`breed`

, `colour`

, etc.)

A `runner`

is a competitor in the current race with static features (`breed`

, `colour`

, etc.) and features about its run in the current race (using only information that would be available prior to the race start incl. `starting_gate`

, `weight`

, `age`

.)

A `race`

is a historical race with static features about that race not related to horses that ran in that race (`track_name`

, `distance`

.)

A `('horse', 'run', 'race')`

is a historical horse’s run in a historical race with features about its performance in that race (`starting_gate`

, `weight`

, `age`

, `finishing_time`

etc.)

A `('race', 'runner', 'run')`

is a historical performance of a runner in the current race with features about its performance in that race (`starting_gate`

, `weight`

, `age`

, `finishing_time`

etc.)

My pipeline is this:

- Initialise embeddings for each node/edge.
`runner`

embeddings are initialised with global context (static features about the current race such as distance.)
- Aggregate information from
`horse`

nodes and their corresponding `run`

edges to update the pertinent historical `race`

node. This new `race`

embedding should now be informed of the competitors in their race and their performances.
- For each
`runner`

node, compute attention scores for each incoming historical `run`

edge. These scores are stored on the respective edges.
- Update the embedding of each
`runner`

node by taking a weighted sum of its historical performances (where each weight is an attention score.) Effectively, the updated `runner`

node should now be imbued with information about its historical performances in historical races and the competition in those races based on which historical races the model has learned are more important for that given runner.

I understand that there is a lot here and it may appear convoluted. If assisting to this degree is outside the scope of this support forum, I apologise and will refrain from asking such questions. I appreciate your helpfulness and don’t wish to bombard you with complex/unclear questions. Thanks.

p.s. I have debugged the pipeline and the model does overfit on a small dataset without regularisation.