m-chrzan.xyz
aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMarcin Chrzanowski <m@m-chrzan.xyz>2021-05-01 15:20:36 +0200
committerMarcin Chrzanowski <m@m-chrzan.xyz>2021-05-01 15:20:36 +0200
commit290d55c4353a7374da14d67bc9ab3d33c236fa95 (patch)
treef7531f12e1f78e32b59cabf2cf3570b6c5869a5f /src
parente6ea98728380a222459049987ddbb858464741d3 (diff)
Implement configurable experiment runner
Diffstat (limited to 'src')
-rw-r--r--src/experiment.py68
-rw-r--r--src/net_types.py5
-rw-r--r--src/parse_config.py104
-rw-r--r--src/run.py14
-rw-r--r--src/runner.py77
5 files changed, 268 insertions, 0 deletions
diff --git a/src/experiment.py b/src/experiment.py
new file mode 100644
index 0000000..dd2033c
--- /dev/null
+++ b/src/experiment.py
@@ -0,0 +1,68 @@
+import subprocess
+import os
+import time
+
+import pandas as pd
+import matplotlib.pyplot as plt
+
+from runner import Runner
+
+class Experiment:
+ def __init__(self, file):
+ self.make_dir()
+ self.copy_config(file)
+ self.metrics = ExperimentMetrics()
+ self.runner = Runner(file, self.metrics)
+
+ def run(self):
+ self.runner.run()
+
+ def save_results(self):
+ data = self.metrics.get_dataframe()
+ data.to_csv(self.dir_path('metrics.csv'))
+
+ plt.plot(data['train_losses'], label='train loss')
+ plt.plot(data['test_losses'], label='test loss')
+ plt.xlabel('Epoch')
+ plt.ylabel('Loss')
+ plt.legend()
+ plt.savefig(self.dir_path('losses.png'))
+ plt.clf()
+
+ plt.plot(data['test_accuracies'], label='test accuracy')
+ plt.xlabel('Epoch')
+ plt.ylabel('% correct')
+ plt.legend()
+ plt.savefig(self.dir_path('accuracies.png'))
+
+ def dir_path(self, file):
+ return '{}/{}'.format(self.dirname, file)
+
+ def make_dir(self):
+ time_string = time.strftime('%Y%m%d%H%M%S')
+ dirname = 'outputs/{}'.format(time_string)
+ self.dirname = dirname
+ os.mkdir(dirname)
+
+ def copy_config(self, file):
+ subprocess.run(['cp', file, '{}/config.yaml'.format(self.dirname)])
+
+class ExperimentMetrics:
+ def __init__(self):
+ self.train_losses = []
+ self.test_losses = []
+ self.test_accuracies = []
+
+ def add_train_loss(self, loss):
+ self.train_losses.append(loss)
+
+ def add_test_metrics(self, loss, accuracy):
+ self.test_losses.append(loss)
+ self.test_accuracies.append(accuracy)
+
+ def get_dataframe(self):
+ return pd.DataFrame({
+ 'train_losses': self.train_losses,
+ 'test_losses': self.test_losses,
+ 'test_accuracies': self.test_accuracies,
+ })
diff --git a/src/net_types.py b/src/net_types.py
new file mode 100644
index 0000000..bb52d50
--- /dev/null
+++ b/src/net_types.py
@@ -0,0 +1,5 @@
+import classification
+
+types = {
+ 'classification': classification
+}
diff --git a/src/parse_config.py b/src/parse_config.py
new file mode 100644
index 0000000..91b4a41
--- /dev/null
+++ b/src/parse_config.py
@@ -0,0 +1,104 @@
+import yaml
+import torch
+
+from config import ConvConfig, LinearConfig
+import net_types
+
+def get(dictionary, key):
+ return dictionary.get(key)
+
+def parse_file(file):
+ with open(file) as file:
+ return parse(yaml.safe_load(file))
+
+def parse_convolution(config):
+ max_pool = get(config, 'max_pool') or False
+ size = get(config, 'size') or 3
+ stride = get(config, 'stride') or 1
+ padding = get(config, 'padding') or 1
+ return ConvConfig(
+ config['in_channels'],
+ config['out_channels'],
+ max_pool,
+ size,
+ stride,
+ padding
+ )
+
+def parse_convolutions(config):
+ convolutions = []
+ for convolution_config in config:
+ convolutions.append(parse_convolution(convolution_config))
+ return convolutions
+
+def parse_linear(config):
+ return LinearConfig(config['in_features'], config['out_features'])
+
+def parse_linears(config):
+ linears = []
+ for linear_config in config:
+ linears.append(parse_linear(linear_config))
+ return linears
+
+# TODO: temporary placeholder
+def foo(x):
+ return x
+
+def parse_type(typ):
+ if typ == 'classification':
+ return 6, torch.sigmoid
+ elif typ == 'counting-small':
+ return 60, foo
+ else:
+ raise Exception('unknown network type: {}'.format(typ))
+
+def parse_net(config):
+ convolutions = parse_convolutions(config['convolutions'])
+ linears = parse_linears(config['linears'])
+ batch_norm = get(config, 'batch_norm') or False
+ dropout = get(config, 'dropout') or False
+ return {
+ 'convolutions': convolutions,
+ 'linears': linears,
+ 'batch_norm': batch_norm,
+ 'dropout': dropout
+ }
+
+def parse_type(typ):
+ net_type = net_types.types[typ]
+ return (
+ net_type.outputs,
+ net_type.finalizer,
+ net_type.target_transform,
+ net_type.loss_function,
+ net_type.count_correct
+ )
+
+def parse_augmentations(config):
+ augmentations = []
+ for augmentation_config in config:
+ rotation = get(augmentation_config, 'rotation') or 0
+ vflip = get(augmentation_config, 'vflip') or False
+ hflip = get(augmentation_config, 'hflip') or False
+ augmentations.append((rotation, vflip, hflip))
+ return augmentations
+
+def parse(config):
+ net = parse_net(config)
+ outputs, finalizer, target_transform, loss_function, count_correct = parse_type(config['type'])
+ net['outputs'] = outputs
+ net['finalizer'] = finalizer
+ lr = config['lr']
+ epochs = config['epochs']
+ batch_size = config['batch_size']
+ augmentations = parse_augmentations(get(config, 'augmentations') or [])
+ return (
+ net,
+ lr,
+ epochs,
+ batch_size,
+ augmentations,
+ target_transform,
+ loss_function,
+ count_correct
+ )
diff --git a/src/run.py b/src/run.py
new file mode 100644
index 0000000..db25e60
--- /dev/null
+++ b/src/run.py
@@ -0,0 +1,14 @@
+import sys
+
+from experiment import Experiment
+
+file = None
+if len(sys.argv) < 2:
+ print('Provide YAML configuration file as argument')
+ exit(1)
+
+file = sys.argv[1]
+
+experiment = Experiment(file)
+experiment.run()
+experiment.save_results()
diff --git a/src/runner.py b/src/runner.py
new file mode 100644
index 0000000..511f9a5
--- /dev/null
+++ b/src/runner.py
@@ -0,0 +1,77 @@
+import torch
+
+from parse_config import parse_file
+import data
+from net import Net
+
+class Runner:
+ def __init__(self, file, metrics):
+ self.metrics = metrics
+ self.setup(file)
+
+ def setup(self, file):
+ (
+ net_config,
+ lr,
+ self.epochs,
+ batch_size,
+ augmentations,
+ target_transform,
+ self.loss_function,
+ self.count_correct
+ ) = parse_file(file)
+
+ self.train_loader, self.test_loader = data.get_loaders(
+ augmentations,
+ target_transform,
+ batch_size)
+
+ self.net = Net(**net_config)
+ self.optimizer = torch.optim.Adam(self.net.parameters(), lr=lr)
+
+ def run(self):
+ for epoch in range(self.epochs):
+ self.train_step()
+ self.test_step()
+
+ def train_step(self):
+ """
+ Performs one epoch of training.
+ """
+ self.net.train()
+ total_loss = 0
+ number_batches = 0
+ for batch_idx, (data, target) in enumerate(self.train_loader):
+ number_batches += 1
+ self.optimizer.zero_grad()
+ output = self.net(data)
+ loss = self.loss_function(output, target)
+ loss.backward()
+ self.optimizer.step()
+ total_loss += loss.detach()
+ if batch_idx % 10 == 0:
+ print('Training: [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
+ batch_idx * len(data), len(self.train_loader.dataset),
+ 100. * batch_idx / len(self.train_loader), loss.item()))
+ self.metrics.add_train_loss(total_loss/number_batches)
+
+ def test_step(self):
+ self.net.eval()
+ test_loss = 0
+ correct = 0
+ number_batches = 0
+ with torch.no_grad():
+ for data, target in self.test_loader:
+ number_batches += 1
+ output = self.net(data)
+ test_loss += self.loss_function(output, target)
+ correct += self.count_correct(output, target)
+
+ test_loss /= number_batches
+ accuracy = correct / len(self.test_loader.dataset)
+
+ print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
+ test_loss, correct, len(self.test_loader.dataset), 100. * accuracy
+ ))
+
+ self.metrics.add_test_metrics(test_loss, accuracy)