Почему RandomCrop с размером 84 и отступом 8 возвращает размер изображения 84, а не 100 в pytorch?

Я использовал набор данных mini-imagenet и заметил эту строку кода:

          elif data_augmentation == 'lee2019:
        normalize = Normalize(
            mean=[120.39586422 / 255.0, 115.59361427 / 255.0, 104.54012653 / 255.0],
            std=[70.68188272 / 255.0, 68.27635443 / 255.0, 72.54505529 / 255.0],
        )
        train_data_transforms = Compose([
            ToPILImage(),
            RandomCrop(84, padding=8),
            ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
            RandomHorizontalFlip(),
            ToTensor(),
            normalize,
        ])
        test_data_transforms = Compose([
            normalize,
        ])

но когда я проверил размер изображения, он был 84 вместо 100 (после добавления заполнения):

      X.size()=torch.Size([50, 3, 84, 84])

что с этим происходит? Разве не должно быть 100?


воспроизведение:

      import random
from typing import Callable

import learn2learn as l2l
import numpy as np
import torch
from learn2learn.data import TaskDataset, MetaDataset, DataDescription
from learn2learn.data.transforms import TaskTransform
from torch.utils.data import Dataset


class IndexableDataSet(Dataset):

    def __init__(self, datasets):
        self.datasets = datasets

    def __len__(self) -> int:
        return len(self.datasets)

    def __getitem__(self, idx: int):
        return self.datasets[idx]


class SingleDatasetPerTaskTransform(Callable):
    """
    Transform that samples a data set first, then creates a task (e.g. n-way, k-shot) and finally
    applies the remaining task transforms.
    """

    def __init__(self, indexable_dataset: IndexableDataSet, cons_remaining_task_transforms: Callable):
        """

        :param: cons_remaining_task_transforms; constructor that builds the remaining task transforms. Cannot be a list
        of transforms because we don't know apriori which is the data set we will use. So this function should be of
        type MetaDataset -> list[TaskTransforms] i.e. given the dataset it returns the transforms for it.
        """
        self.indexable_dataset = MetaDataset(indexable_dataset)
        self.cons_remaining_task_transforms = cons_remaining_task_transforms

    def __call__(self, task_description: list):
        """
        idea:
        - receives the index of the dataset to use
        - then use the normal NWays l2l function
        """
        # - this is what I wish could have gone in a seperate callable transform, but idk how since the transforms take apriori (not dynamically) which data set to use.
        i = random.randint(0, len(self.indexable_dataset) - 1)
        task_description = [DataDescription(index=i)]  # using this to follow the l2l convention

        # - get the sampled data set
        dataset_index = task_description[0].index
        dataset = self.indexable_dataset[dataset_index]
        dataset = MetaDataset(dataset)

        # - use the sampled data set to create task
        remaining_task_transforms: list[TaskTransform] = self.cons_remaining_task_transforms(dataset)
        description = None
        for transform in remaining_task_transforms:
            description = transform(description)
        return description


def sample_dataset(dataset):
    def sample_random_dataset(x):
        print(f'{x=}')
        i = random.randint(0, len(dataset) - 1)
        return [DataDescription(index=i)]
        # return dataset[i]

    return sample_random_dataset


def get_task_transforms(dataset: IndexableDataSet) -> list[TaskTransform]:
    """
    :param dataset:
    :return:
    """
    transforms = [
        sample_dataset(dataset),
        l2l.data.transforms.NWays(dataset, n=5),
        l2l.data.transforms.KShots(dataset, k=5),
        l2l.data.transforms.LoadData(dataset),
        l2l.data.transforms.RemapLabels(dataset),
        l2l.data.transforms.ConsecutiveLabels(dataset),
    ]
    return transforms


def print_datasets(dataset_lst: list):
    for dataset in dataset_lst:
        print(f'\n{dataset=}\n')


def get_indexable_list_of_datasets_mi_and_cifarfs(root: str = '~/data/l2l_data/') -> IndexableDataSet:
    from learn2learn.vision.benchmarks import mini_imagenet_tasksets
    datasets, transforms = mini_imagenet_tasksets(root=root)
    mi = datasets[0].dataset

    from learn2learn.vision.benchmarks import cifarfs_tasksets
    datasets, transforms = cifarfs_tasksets(root=root)
    cifarfs = datasets[0].dataset

    dataset_list = [mi, cifarfs]

    dataset_list = [l2l.data.MetaDataset(dataset) for dataset in dataset_list]
    dataset = IndexableDataSet(dataset_list)
    return dataset


# -- tests

def loop_through_l2l_indexable_datasets_test():
    """

    :return:
    """
    # - for determinism
    random.seed(0)
    torch.manual_seed(0)
    np.random.seed(0)

    # - options for number of tasks/meta-batch size
    batch_size: int = 10

    # - create indexable data set
    indexable_dataset: IndexableDataSet = get_indexable_list_of_datasets_mi_and_cifarfs()

    # - get task transforms
    def get_remaining_transforms(dataset: MetaDataset) -> list[TaskTransform]:
        remaining_task_transforms = [
            l2l.data.transforms.NWays(dataset, n=5),
            l2l.data.transforms.KShots(dataset, k=5),
            l2l.data.transforms.LoadData(dataset),
            l2l.data.transforms.RemapLabels(dataset),
            l2l.data.transforms.ConsecutiveLabels(dataset),
        ]
        return remaining_task_transforms
    task_transforms: TaskTransform = SingleDatasetPerTaskTransform(indexable_dataset, get_remaining_transforms)

    # -
    taskset: TaskDataset = TaskDataset(dataset=indexable_dataset, task_transforms=task_transforms)

    # - loop through tasks
    for task_num in range(batch_size):
        print(f'{task_num=}')
        X, y = taskset.sample()
        print(f'{X.size()=}')
        print(f'{y.size()=}')
        print(f'{y=}')
        print()

    print('-- end of test --')


# -- Run experiment

if __name__ == "__main__":
    import time
    from uutils import report_times

    start = time.time()
    # - run experiment
    loop_through_l2l_indexable_datasets_test()
    # - Done
    print(f"\nSuccess Done!: {report_times(start)}\a")

контекст: https://github.com/learnables/learn2learn/issues/333

кросспост:

1 ответ

Заполнение применяется к входному изображению или тензору перед применением случайной обрезки. В конечном итоге выходное изображение имеет пространственный размер, равный предоставленному размеру (размерам), заданному дляT.RandomCropфункция, так как операция выполняется после.

В конце концов, имеет смысл дополнять входное изображение, а не обрезанное, не так ли?

Другие вопросы по тегам