diff options
author | Marcin Chrzanowski <mc370754@students.mimuw.edu.pl> | 2021-05-27 21:05:36 +0200 |
---|---|---|
committer | Marcin Chrzanowski <mc370754@students.mimuw.edu.pl> | 2021-05-27 21:05:36 +0200 |
commit | 0fead7ba8062c5704b4a27c9a1c57427b6e8ecea (patch) | |
tree | 495814c1070aba7fdead59b8473b89c92aa92feb /model | |
parent | 0226b13c96e048282cc1d1868eaeb59fd89877b3 (diff) |
Allow GPU use
Diffstat (limited to 'model')
-rw-r--r-- | model/attention.py | 7 | ||||
-rw-r--r-- | model/encoder.py | 11 | ||||
-rw-r--r-- | model/encoder_layer.py | 4 |
3 files changed, 15 insertions, 7 deletions
diff --git a/model/attention.py b/model/attention.py index ffc07d3..75ff5a0 100644 --- a/model/attention.py +++ b/model/attention.py @@ -31,8 +31,9 @@ class Head(nn.Module): return value, weights class Attention(nn.Module): - def __init__(self, hidden_dim, num_heads): + def __init__(self, hidden_dim, num_heads, device): super(Attention, self).__init__() + self._device = device self._num_heads = num_heads self._head_output_dim = hidden_dim // num_heads # ensure hidden_dim is divisible by num_heads @@ -45,9 +46,9 @@ class Attention(nn.Module): def forward(self, x): # x shape: (seqlen, batch, hiddendim) - result = torch.zeros(x.shape) + result = torch.zeros(x.shape).to(self._device) # attentions are (heads, seqlen, batch, seqlen) - attentions = torch.zeros(self._num_heads, x.shape[0], x.shape[1], x.shape[0]) + attentions = torch.zeros(self._num_heads, x.shape[0], x.shape[1], x.shape[0]).to(self._device) for i in range(self._num_heads): from_index = i * self._head_output_dim to_index = from_index + self._head_output_dim diff --git a/model/encoder.py b/model/encoder.py index 85b3141..d6527dd 100644 --- a/model/encoder.py +++ b/model/encoder.py @@ -17,14 +17,21 @@ class EncoderModel(nn.Module): self._use_positional = use_positional self.embedding_layer = nn.Embedding(input_dim, hidden_dim) self.layers = nn.ModuleList([ - EncoderLayer(hidden_dim, d_ff, num_heads, use_attention, - use_feedforward) for i in range(n_layers) + EncoderLayer( + hidden_dim, + d_ff, + num_heads, + use_attention, + use_feedforward, + device=device + ) for i in range(n_layers) ]) self.output_layer = nn.Linear(hidden_dim, output_dim) def forward(self, x, return_att_weights=False, verbose=False): log(f'Handling {x}', verbose) # x shape: (seqlen, batch) + x = x.to(self._device) hidden = self.embedding_layer(x) # hidden shape: (seqlen, batch, hiddendim) diff --git a/model/encoder_layer.py b/model/encoder_layer.py index 71a7d8f..311c39c 100644 --- a/model/encoder_layer.py +++ b/model/encoder_layer.py @@ -5,12 +5,12 @@ from model.forward import FeedForward class EncoderLayer(nn.Module): def __init__(self, hidden_dim, d_ff, num_heads, use_attention=True, - use_feedforward=True): + use_feedforward=True, device='cpu'): super(EncoderLayer, self).__init__() self._use_attention = use_attention self._use_feedforward = use_feedforward if use_attention: - self.attention = Attention(hidden_dim, num_heads) + self.attention = Attention(hidden_dim, num_heads, device) if use_feedforward: self.feedforward = FeedForward(hidden_dim, d_ff) |