From 290d55c4353a7374da14d67bc9ab3d33c236fa95 Mon Sep 17 00:00:00 2001 From: Marcin Chrzanowski Date: Sat, 1 May 2021 15:20:36 +0200 Subject: Implement configurable experiment runner --- src/experiment.py | 68 ++++++++++++++++++++++++++++++++++ src/net_types.py | 5 +++ src/parse_config.py | 104 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/run.py | 14 +++++++ src/runner.py | 77 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 268 insertions(+) create mode 100644 src/experiment.py create mode 100644 src/net_types.py create mode 100644 src/parse_config.py create mode 100644 src/run.py create mode 100644 src/runner.py diff --git a/src/experiment.py b/src/experiment.py new file mode 100644 index 0000000..dd2033c --- /dev/null +++ b/src/experiment.py @@ -0,0 +1,68 @@ +import subprocess +import os +import time + +import pandas as pd +import matplotlib.pyplot as plt + +from runner import Runner + +class Experiment: + def __init__(self, file): + self.make_dir() + self.copy_config(file) + self.metrics = ExperimentMetrics() + self.runner = Runner(file, self.metrics) + + def run(self): + self.runner.run() + + def save_results(self): + data = self.metrics.get_dataframe() + data.to_csv(self.dir_path('metrics.csv')) + + plt.plot(data['train_losses'], label='train loss') + plt.plot(data['test_losses'], label='test loss') + plt.xlabel('Epoch') + plt.ylabel('Loss') + plt.legend() + plt.savefig(self.dir_path('losses.png')) + plt.clf() + + plt.plot(data['test_accuracies'], label='test accuracy') + plt.xlabel('Epoch') + plt.ylabel('% correct') + plt.legend() + plt.savefig(self.dir_path('accuracies.png')) + + def dir_path(self, file): + return '{}/{}'.format(self.dirname, file) + + def make_dir(self): + time_string = time.strftime('%Y%m%d%H%M%S') + dirname = 'outputs/{}'.format(time_string) + self.dirname = dirname + os.mkdir(dirname) + + def copy_config(self, file): + subprocess.run(['cp', file, '{}/config.yaml'.format(self.dirname)]) + +class ExperimentMetrics: + def __init__(self): + self.train_losses = [] + self.test_losses = [] + self.test_accuracies = [] + + def add_train_loss(self, loss): + self.train_losses.append(loss) + + def add_test_metrics(self, loss, accuracy): + self.test_losses.append(loss) + self.test_accuracies.append(accuracy) + + def get_dataframe(self): + return pd.DataFrame({ + 'train_losses': self.train_losses, + 'test_losses': self.test_losses, + 'test_accuracies': self.test_accuracies, + }) diff --git a/src/net_types.py b/src/net_types.py new file mode 100644 index 0000000..bb52d50 --- /dev/null +++ b/src/net_types.py @@ -0,0 +1,5 @@ +import classification + +types = { + 'classification': classification +} diff --git a/src/parse_config.py b/src/parse_config.py new file mode 100644 index 0000000..91b4a41 --- /dev/null +++ b/src/parse_config.py @@ -0,0 +1,104 @@ +import yaml +import torch + +from config import ConvConfig, LinearConfig +import net_types + +def get(dictionary, key): + return dictionary.get(key) + +def parse_file(file): + with open(file) as file: + return parse(yaml.safe_load(file)) + +def parse_convolution(config): + max_pool = get(config, 'max_pool') or False + size = get(config, 'size') or 3 + stride = get(config, 'stride') or 1 + padding = get(config, 'padding') or 1 + return ConvConfig( + config['in_channels'], + config['out_channels'], + max_pool, + size, + stride, + padding + ) + +def parse_convolutions(config): + convolutions = [] + for convolution_config in config: + convolutions.append(parse_convolution(convolution_config)) + return convolutions + +def parse_linear(config): + return LinearConfig(config['in_features'], config['out_features']) + +def parse_linears(config): + linears = [] + for linear_config in config: + linears.append(parse_linear(linear_config)) + return linears + +# TODO: temporary placeholder +def foo(x): + return x + +def parse_type(typ): + if typ == 'classification': + return 6, torch.sigmoid + elif typ == 'counting-small': + return 60, foo + else: + raise Exception('unknown network type: {}'.format(typ)) + +def parse_net(config): + convolutions = parse_convolutions(config['convolutions']) + linears = parse_linears(config['linears']) + batch_norm = get(config, 'batch_norm') or False + dropout = get(config, 'dropout') or False + return { + 'convolutions': convolutions, + 'linears': linears, + 'batch_norm': batch_norm, + 'dropout': dropout + } + +def parse_type(typ): + net_type = net_types.types[typ] + return ( + net_type.outputs, + net_type.finalizer, + net_type.target_transform, + net_type.loss_function, + net_type.count_correct + ) + +def parse_augmentations(config): + augmentations = [] + for augmentation_config in config: + rotation = get(augmentation_config, 'rotation') or 0 + vflip = get(augmentation_config, 'vflip') or False + hflip = get(augmentation_config, 'hflip') or False + augmentations.append((rotation, vflip, hflip)) + return augmentations + +def parse(config): + net = parse_net(config) + outputs, finalizer, target_transform, loss_function, count_correct = parse_type(config['type']) + net['outputs'] = outputs + net['finalizer'] = finalizer + lr = config['lr'] + epochs = config['epochs'] + batch_size = config['batch_size'] + augmentations = parse_augmentations(get(config, 'augmentations') or []) + return ( + net, + lr, + epochs, + batch_size, + augmentations, + target_transform, + loss_function, + count_correct + ) diff --git a/src/run.py b/src/run.py new file mode 100644 index 0000000..db25e60 --- /dev/null +++ b/src/run.py @@ -0,0 +1,14 @@ +import sys + +from experiment import Experiment + +file = None +if len(sys.argv) < 2: + print('Provide YAML configuration file as argument') + exit(1) + +file = sys.argv[1] + +experiment = Experiment(file) +experiment.run() +experiment.save_results() diff --git a/src/runner.py b/src/runner.py new file mode 100644 index 0000000..511f9a5 --- /dev/null +++ b/src/runner.py @@ -0,0 +1,77 @@ +import torch + +from parse_config import parse_file +import data +from net import Net + +class Runner: + def __init__(self, file, metrics): + self.metrics = metrics + self.setup(file) + + def setup(self, file): + ( + net_config, + lr, + self.epochs, + batch_size, + augmentations, + target_transform, + self.loss_function, + self.count_correct + ) = parse_file(file) + + self.train_loader, self.test_loader = data.get_loaders( + augmentations, + target_transform, + batch_size) + + self.net = Net(**net_config) + self.optimizer = torch.optim.Adam(self.net.parameters(), lr=lr) + + def run(self): + for epoch in range(self.epochs): + self.train_step() + self.test_step() + + def train_step(self): + """ + Performs one epoch of training. + """ + self.net.train() + total_loss = 0 + number_batches = 0 + for batch_idx, (data, target) in enumerate(self.train_loader): + number_batches += 1 + self.optimizer.zero_grad() + output = self.net(data) + loss = self.loss_function(output, target) + loss.backward() + self.optimizer.step() + total_loss += loss.detach() + if batch_idx % 10 == 0: + print('Training: [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( + batch_idx * len(data), len(self.train_loader.dataset), + 100. * batch_idx / len(self.train_loader), loss.item())) + self.metrics.add_train_loss(total_loss/number_batches) + + def test_step(self): + self.net.eval() + test_loss = 0 + correct = 0 + number_batches = 0 + with torch.no_grad(): + for data, target in self.test_loader: + number_batches += 1 + output = self.net(data) + test_loss += self.loss_function(output, target) + correct += self.count_correct(output, target) + + test_loss /= number_batches + accuracy = correct / len(self.test_loader.dataset) + + print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( + test_loss, correct, len(self.test_loader.dataset), 100. * accuracy + )) + + self.metrics.add_test_metrics(test_loss, accuracy) -- cgit v1.2.3