From 0fead7ba8062c5704b4a27c9a1c57427b6e8ecea Mon Sep 17 00:00:00 2001
From: Marcin Chrzanowski <mc370754@students.mimuw.edu.pl>
Date: Thu, 27 May 2021 21:05:36 +0200
Subject: Allow GPU use

---
 model/attention.py     |  7 ++++---
 model/encoder.py       | 11 +++++++++--
 model/encoder_layer.py |  4 ++--
 3 files changed, 15 insertions(+), 7 deletions(-)

(limited to 'model')

diff --git a/model/attention.py b/model/attention.py
index ffc07d3..75ff5a0 100644
--- a/model/attention.py
+++ b/model/attention.py
@@ -31,8 +31,9 @@ class Head(nn.Module):
         return value, weights
 
 class Attention(nn.Module):
-    def __init__(self, hidden_dim, num_heads):
+    def __init__(self, hidden_dim, num_heads, device):
         super(Attention, self).__init__()
+        self._device = device
         self._num_heads = num_heads
         self._head_output_dim = hidden_dim // num_heads
         # ensure hidden_dim is divisible by num_heads
@@ -45,9 +46,9 @@ class Attention(nn.Module):
 
     def forward(self, x):
         # x shape: (seqlen, batch, hiddendim)
-        result = torch.zeros(x.shape)
+        result = torch.zeros(x.shape).to(self._device)
         # attentions are (heads, seqlen, batch, seqlen)
-        attentions = torch.zeros(self._num_heads, x.shape[0], x.shape[1], x.shape[0])
+        attentions = torch.zeros(self._num_heads, x.shape[0], x.shape[1], x.shape[0]).to(self._device)
         for i in range(self._num_heads):
             from_index = i * self._head_output_dim
             to_index = from_index + self._head_output_dim
diff --git a/model/encoder.py b/model/encoder.py
index 85b3141..d6527dd 100644
--- a/model/encoder.py
+++ b/model/encoder.py
@@ -17,14 +17,21 @@ class EncoderModel(nn.Module):
         self._use_positional = use_positional
         self.embedding_layer = nn.Embedding(input_dim, hidden_dim)
         self.layers = nn.ModuleList([
-            EncoderLayer(hidden_dim, d_ff, num_heads, use_attention,
-                                     use_feedforward) for i in range(n_layers)
+            EncoderLayer(
+                hidden_dim,
+                d_ff,
+                num_heads,
+                use_attention,
+                use_feedforward,
+                device=device
+            ) for i in range(n_layers)
         ])
         self.output_layer = nn.Linear(hidden_dim, output_dim)
 
     def forward(self, x, return_att_weights=False, verbose=False):
         log(f'Handling {x}', verbose)
         # x shape: (seqlen, batch)
+        x = x.to(self._device)
         hidden = self.embedding_layer(x)
         # hidden shape: (seqlen, batch, hiddendim)
 
diff --git a/model/encoder_layer.py b/model/encoder_layer.py
index 71a7d8f..311c39c 100644
--- a/model/encoder_layer.py
+++ b/model/encoder_layer.py
@@ -5,12 +5,12 @@ from model.forward import FeedForward
 
 class EncoderLayer(nn.Module):
     def __init__(self, hidden_dim, d_ff, num_heads, use_attention=True,
-                   use_feedforward=True):
+                   use_feedforward=True, device='cpu'):
         super(EncoderLayer, self).__init__()
         self._use_attention = use_attention
         self._use_feedforward = use_feedforward
         if use_attention:
-            self.attention = Attention(hidden_dim, num_heads)
+            self.attention = Attention(hidden_dim, num_heads, device)
         if use_feedforward:
             self.feedforward = FeedForward(hidden_dim, d_ff)
 
-- 
cgit v1.2.3