from torch import nn

class Attention(nn.Module):
  def __init__(self, hidden_dim, num_heads):
    super(Attention, self).__init__()
    # TODO: implement Attention
    pass

  def forward(self, x):
    # TODO: implement Attention; return both result of attention mechanism and
    # attention weights (for visualization).
    # x shape: (seqlen, batch, hiddendim)
    result, att_weights = x, None # placeholder
    pass
    return result, att_weights