From 9e22e1e8a221ce55789438956055bb02b3069162 Mon Sep 17 00:00:00 2001 From: Marcin Chrzanowski Date: Thu, 29 Apr 2021 11:33:08 +0200 Subject: Add augmentations --- src/data.py | 113 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 108 insertions(+), 5 deletions(-) diff --git a/src/data.py b/src/data.py index 6785511..ff7aca5 100644 --- a/src/data.py +++ b/src/data.py @@ -4,10 +4,13 @@ import numpy as np import pandas as pd import torch import torchvision +import torchvision.transforms.functional as F class ShapesData(torch.utils.data.Dataset): - def __init__(self, train): + def __init__(self, train, transform=None, target_transform=None): self.train = train + self.transform = transform + self.target_transform = target_transform labels_file = './data/labels.csv' self.labels = pd.read_csv(labels_file) @@ -29,12 +32,112 @@ class ShapesData(torch.utils.data.Dataset): def __getitem__(self, i): i = self.get_index(i) filename = os.path.join('./data', self.labels.loc[i, 'name']) + image = torchvision.io.read_image(filename) + # Images as we read them have four channels, each with values in (0, + # 255) Take the first channel and rescale to (-1, 1) + image = (image[0] / 255 - 0.5) * 2 + if self.transform: + image = self.transform(image) + labels = self.labels.loc[i, 'squares':'left'] + labels = torch.tensor(labels.values.astype(np.float32)) + if self.target_transform: + labels = self.target_transform(labels) + sample = [ - # images as we read them have values in (0, 255) - # rescale to (-1, 1) - (image / 255 - 0.5) * 2, - torch.tensor(labels.values.astype(np.float32)) + image, + labels ] return sample + +def rotation_transform(image, rotation): + return F.rotate(image.unsqueeze(0), angle=(rotation * 90)) + +def rotation_target_transform(labels, rotation): + labels[2:6] = labels[2:6][[*range(rotation, 4), *range(0, rotation)]] + return labels + +def make_rotation(rotation): + return torchvision.transforms.Lambda( + lambda image: rotation_transform(image, rotation) + ), torchvision.transforms.Lambda( + lambda labels: rotation_target_transform(labels, rotation) + ) + +def vertical_flip_target_transform(labels): + labels[[2, 4]] = labels[[4, 2]] + return labels + +def make_vertical_flip(): + return torchvision.transforms.Lambda( + F.vflip + ), torchvision.transforms.Lambda( + vertical_flip_target_transform + ) + +def horizontal_flip_target_transform(labels): + labels[[3, 5]] = labels[[5, 3]] + return labels + +def make_horizontal_flip(): + return torchvision.transforms.Lambda( + F.hflip + ), torchvision.transforms.Lambda( + horizontal_flip_target_transform + ) + +def make_transforms(rotation, flip_vertical, flip_horizontal): + """ + Returns an image and label transform for rotating and/or flipping images. + - rotation: 0-3, indicating number of counter clockwise 90° turns + - flip_vertical, flip_horizontal: booleans + """ + transforms = [] + target_transforms = [] + if rotation > 0: + rotation, target_rotation = make_rotation(rotation) + transforms.append(rotation) + target_transforms.append(target_rotation) + if flip_vertical: + flip, target_flip = make_vertical_flip() + transforms.append(flip) + target_transforms.append(target_flip) + if flip_horizontal: + flip, target_flip = make_horizontal_flip() + transforms.append(flip) + target_transforms.append(target_flip) + + if transforms: + # Transformations add another dimension, which screws up the net + transforms.append(torchvision.transforms.Lambda(lambda image: + image.squeeze() + )) + + return (torchvision.transforms.Compose(transforms), + torchvision.transforms.Compose(target_transforms)) + +def make_augmented_data(augmentations, after_target_transform=None): + """ + Returns concatanated ShapesData train datasets, each modified by an + augmentation. + - augmentations: list of triples (rotation, vertical flip, horizontal flip). + - after_target_transform: an additional transform to apply after the + augmentation ones. + """ + datasets = [] + for rotation, vertical_flip, horizontal_flip in augmentations: + transform, target_transform = make_transforms( + rotation, vertical_flip, horizontal_flip + ) + if after_target_transform: + target_transform = torchvision.transforms.Compose([ + target_transform, + after_target_transform + ]) + dataset = ShapesData( + True, transform=transform, target_transform=target_transform + ) + datasets.append(dataset) + + return torch.utils.data.ConcatDataset(datasets) -- cgit v1.2.3