Как использовать MapDataset в качестве входных данных для ImageDataGenerator?
Я написал код, который получает изображения на входе и оценивает значения для 29 классов на выходе. код отлично работает без увеличения данных. Однако я не могу заставить его увеличивать изображения для обучения модели TensorFlow. Вот ошибка:
TypeError: float() argument must be a string or a number, not 'MapDataset'
Ниже приведена функция, которая получает мои обучающие изображения и соответствующие им метки (массив из 29 столбцов).
Буду признателен за любые мысли / предложения, которые могут у вас возникнуть.
def get_training_dataset(image_paths, label_map_paths):
'''
Prepares shuffled batches of the training set.
Args:
image_paths (list of strings) -- paths to each image file in the train set
label_map_paths (list of strings) -- paths to each label map in the train set
Returns:
tf Dataset containing the preprocessed train set
'''
training_dataset = tf.data.Dataset.from_tensor_slices((image_paths, label_map_paths))
training_dataset = training_dataset.map(map_filename_to_image_and_mask)
datagen = ImageDataGenerator(
featurewise_center=True,
featurewise_std_normalization=True,
rotation_range=10,
width_shift_range=0.1,
height_shift_range=0.1,
shear_range = 0.1,
horizontal_flip=True,
vertical_flip=True)
training_dataset = datagen.flow(training_dataset)
return training_dataset
Вот две другие функции, которые я использовал в приведенной выше функции:
def get_dataset_slice_paths(image_dir,image_list):
'''
generates the lists of image
Args:
image_dir (string) -- path to the input images directory
image_file_list -- list of the input images (to be used for filtering the data from csv file)
Returns:
image_paths (list of strings) -- paths to each image file
'''
image_paths = [os.path.join(image_dir,fname) for fname in image_list]
label_map = np.empty([0,29])
for fname in image_list:
label_map = np.append(label_map, ESI_data[ESI_data['FileName']== fname].iloc[:,6:35], axis=0)
label_map = np.asarray(label_map).astype('float32')
return image_paths, label_map
def map_filename_to_image_and_mask(t_filename,label_map):
'''
Preprocesses the dataset by:
* resizing the input image
* normalizing the input image pixels
Args:
t_filename (string) -- path to the raw input image
label_map (array) -- a 29-column array with values for each class
Returns:
image (tensor) -- preprocessed image
annotation -- fraction cover of each species as tensor
'''
#convert images and mask files to tensor
img_raw = tf.io.read_file(t_filename)
image = tf.image.decode_jpeg(img_raw)
annotation = tf.convert_to_tensor(label_map, dtype= tf.float32)
#resize image and segmentation mask
image = tf.image.resize(image, (height,width,))
image = tf.reshape(image, (height, width,3,))
#normalize pixels in the input image
image = image/127.5
image -=1
return image, annotation
1 ответ
На сегодняшний день решение моей проблемы - избегать использования
MapDataset
все вместе. Вот модифицированный
get_training_dataset
функция:
def get_training_dataset(image_paths, label_map_paths):
'''
Prepares shuffled batches of the training set.
Args:
image_paths (list of strings) -- paths to each image file in the train set
label_map_paths (list of strings) -- paths to each label map in the train set
Returns:
tf Dataset containing the preprocessed train set
'''
training_dataset_img, training_dataset_lab = map_filename_to_training_dataset(image_paths, label_map_paths)
datagen = ImageDataGenerator(
rotation_range=10,
width_shift_range=0.2,
height_shift_range=0.2,
zoom_range = [0.95, 1.05],
shear_range = 0.5,
fill_mode = "reflect",
horizontal_flip=True,
vertical_flip =True)
training_dataset = datagen.flow(training_dataset_img, training_dataset_lab,batch_size=BATCH_SIZE, shuffle=True)
return training_dataset
def map_filename_to_training_dataset(t_filename,label_map):
'''
Preprocesses the dataset by:
* resizing the input image
* normalizing the input image pixels
Args:
t_filename (string) -- path to the raw input image
label_map (array) -- a 29-column array with values for each class
Returns:
image (tensor) -- preprocessed image
annotation -- fraction cover of each species as tensor
'''
i=0
image_set = np.empty((len(label_map),224,224,3))
annotation_set = list()
for fname in t_filename:
#convert images and mask files to tensor
annotation = tf.convert_to_tensor(label_map[i], dtype= tf.float32)
img_raw = tf.io.read_file(fname)
image = tf.image.decode_jpeg(img_raw)
#resize image and segmentation mask
image = tf.image.resize(image, (height,width,))
image = tf.reshape(image, (height, width,3,))
#normalize pixels in the input image
image = image/127.5
image -=1
image_set[i,:,:,:] = image
annotation_set.append(annotation)
i+=1
return image_set , annotation_set