m-chrzan.xyz
aboutsummaryrefslogtreecommitdiff
path: root/model
diff options
context:
space:
mode:
authorMarcin Chrzanowski <mc370754@students.mimuw.edu.pl>2021-05-27 21:05:36 +0200
committerMarcin Chrzanowski <mc370754@students.mimuw.edu.pl>2021-05-27 21:05:36 +0200
commit0fead7ba8062c5704b4a27c9a1c57427b6e8ecea (patch)
tree495814c1070aba7fdead59b8473b89c92aa92feb /model
parent0226b13c96e048282cc1d1868eaeb59fd89877b3 (diff)
Allow GPU use
Diffstat (limited to 'model')
-rw-r--r--model/attention.py7
-rw-r--r--model/encoder.py11
-rw-r--r--model/encoder_layer.py4
3 files changed, 15 insertions, 7 deletions
diff --git a/model/attention.py b/model/attention.py
index ffc07d3..75ff5a0 100644
--- a/model/attention.py
+++ b/model/attention.py
@@ -31,8 +31,9 @@ class Head(nn.Module):
return value, weights
class Attention(nn.Module):
- def __init__(self, hidden_dim, num_heads):
+ def __init__(self, hidden_dim, num_heads, device):
super(Attention, self).__init__()
+ self._device = device
self._num_heads = num_heads
self._head_output_dim = hidden_dim // num_heads
# ensure hidden_dim is divisible by num_heads
@@ -45,9 +46,9 @@ class Attention(nn.Module):
def forward(self, x):
# x shape: (seqlen, batch, hiddendim)
- result = torch.zeros(x.shape)
+ result = torch.zeros(x.shape).to(self._device)
# attentions are (heads, seqlen, batch, seqlen)
- attentions = torch.zeros(self._num_heads, x.shape[0], x.shape[1], x.shape[0])
+ attentions = torch.zeros(self._num_heads, x.shape[0], x.shape[1], x.shape[0]).to(self._device)
for i in range(self._num_heads):
from_index = i * self._head_output_dim
to_index = from_index + self._head_output_dim
diff --git a/model/encoder.py b/model/encoder.py
index 85b3141..d6527dd 100644
--- a/model/encoder.py
+++ b/model/encoder.py
@@ -17,14 +17,21 @@ class EncoderModel(nn.Module):
self._use_positional = use_positional
self.embedding_layer = nn.Embedding(input_dim, hidden_dim)
self.layers = nn.ModuleList([
- EncoderLayer(hidden_dim, d_ff, num_heads, use_attention,
- use_feedforward) for i in range(n_layers)
+ EncoderLayer(
+ hidden_dim,
+ d_ff,
+ num_heads,
+ use_attention,
+ use_feedforward,
+ device=device
+ ) for i in range(n_layers)
])
self.output_layer = nn.Linear(hidden_dim, output_dim)
def forward(self, x, return_att_weights=False, verbose=False):
log(f'Handling {x}', verbose)
# x shape: (seqlen, batch)
+ x = x.to(self._device)
hidden = self.embedding_layer(x)
# hidden shape: (seqlen, batch, hiddendim)
diff --git a/model/encoder_layer.py b/model/encoder_layer.py
index 71a7d8f..311c39c 100644
--- a/model/encoder_layer.py
+++ b/model/encoder_layer.py
@@ -5,12 +5,12 @@ from model.forward import FeedForward
class EncoderLayer(nn.Module):
def __init__(self, hidden_dim, d_ff, num_heads, use_attention=True,
- use_feedforward=True):
+ use_feedforward=True, device='cpu'):
super(EncoderLayer, self).__init__()
self._use_attention = use_attention
self._use_feedforward = use_feedforward
if use_attention:
- self.attention = Attention(hidden_dim, num_heads)
+ self.attention = Attention(hidden_dim, num_heads, device)
if use_feedforward:
self.feedforward = FeedForward(hidden_dim, d_ff)