from torch import nn class Attention(nn.Module): def __init__(self, hidden_dim, num_heads): super(Attention, self).__init__() # TODO: implement Attention pass def forward(self, x): # TODO: implement Attention; return both result of attention mechanism and # attention weights (for visualization). # x shape: (seqlen, batch, hiddendim) result, att_weights = x, None # placeholder pass return result, att_weights