Постоянная ошибка в нейронной сети, MatConvNet
Решено: Ранее в моем наборе данных было около 1000 изображений. Я увеличил его до 50 000, и теперь нейронная сеть учится и работает.
Я создал сверточную нейронную сеть для распознавания трех эмоций по выражению лица (позитивные, нейтральные, негативные эмоции). Почему-то моя функция ошибок не становится лучше ( изображение ошибки). Ошибка обучения и валидации постоянна на протяжении 100 эпох. Что может быть причиной?
Почему ошибка постоянна?
Вот мой код:
function training(varargin)
setup ;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
rngNum = 1; % rng number for random weight initialization, e.g., 1,2,3
num_fcHiddenNeuron =1024; % # neurons in the fully-connected hidden layer
prob_fcDropout = 0.5; % dropout probability in the fully-connected hidden layer,
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% input data for training deep CNNs
imdb1 = load(['trainingdata']) ;
imdb2 = load(['testdata']) ;
imdb.images.data = cat(4, imdb1.images.data, imdb2.images.data);
imdb.images.labels = cat(2, imdb1.images.labels, imdb2.images.labels);
imdb.images.set = cat(2, imdb1.images.set, imdb2.images.set);
imdb.meta = imdb1.meta;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
trainOpts.batchSize = 200 ;
trainOpts.numEpochs = 100 ;
trainOpts.gpus = [] ;
trainOpts.continue = true ;
trainOpts.learningRate = [0.004*ones(1,25), 0.002*ones(1,25), 0.001*ones(1,25), 0.0005*ones(1,25)];
trainOpts = vl_argparse(trainOpts, varargin);
%% Training Deep CNNs
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% CNN configuration
net.layers = {} ;
% %
% % %% Conv1 - MaxPool1
rng(rngNum) %control random number generation
net.layers{end+1} = struct('type', 'conv', ...
'weights', {{0.01*randn(3,3,1,32, 'single'), 0.1*ones(1, 32, 'single')}}, ...
'stride', 1, ...
'pad', 1, ...
'filtersLearningRate', 1, ...
'biasesLearningRate', 1, ...
'filtersWeightDecay', 1/5, ...
'biasesWeightDecay', 0) ;
net.layers{end+1} = struct('type', 'relu') ;
net.layers{end+1} = struct('type', 'pool', ...
'method', 'max', ...
'pool', [2 2], ...
'stride', 2, ...
'pad', 0) ;
% %%% Conv2 - MaxPool2
rng(rngNum)
net.layers{end+1} = struct('type', 'conv', ...
'weights', {{0.01*randn(3,3,32,32, 'single'), 0.1*ones(1, 32, 'single')}}, ...
'stride', 1, ...
'pad', 0, ...
'filtersLearningRate', 1, ...
'biasesLearningRate', 1, ...
'filtersWeightDecay', 1/5, ...
'biasesWeightDecay', 0) ;
net.layers{end+1} = struct('type', 'relu') ;
net.layers{end+1} = struct('type', 'pool', ...
'method', 'max', ...
'pool', [2 2], ...
'stride', 2, ...
'pad', [1, 0, 1, 0]) ;
% %%% Conv3 - MaxPool3
rng(rngNum)
net.layers{end+1} = struct('type', 'conv', ...
'weights', {{0.01*randn(3,3,32,64, 'single'), 0.1*ones(1, 64, 'single')}}, ...
'stride', 1, ...
'pad', 1, ...
'filtersLearningRate', 1, ...
'biasesLearningRate', 1, ...
'filtersWeightDecay', 1/5, ...
'biasesWeightDecay', 0) ;
net.layers{end+1} = struct('type', 'relu') ;
net.layers{end+1} = struct('type', 'pool', ...
'method', 'max', ...
'pool', [2 2], ...
'stride', 2, ...
'pad', 0) ;
% %%% Fc Hidden
rng(rngNum)
net.layers{end+1} = struct('type', 'conv', ...
'weights', {{0.001*randn(5,5,64,num_fcHiddenNeuron, 'single'), 0.01*ones(1, num_fcHiddenNeuron, 'single')}}, ...
'stride', 1, ...
'pad', 0, ...
'filtersLearningRate', 1, ...
'biasesLearningRate', 1, ...
'filtersWeightDecay', 1/5, ...
'biasesWeightDecay', 0) ;
net.layers{end+1} = struct('type', 'relu') ;
net.layers{end+1} = struct('type', 'dropout', ...
'rate', prob_fcDropout) ;
%
% %%% Fc Output
rng(rngNum)
net.layers{end+1} = struct('type', 'conv', ...
'weights', {{zeros(1,1,num_fcHiddenNeuron, 3, 'single'), zeros(1, 3, 'single')}}, ...
'stride', 1, ...
'pad', 0, ...
'filtersLearningRate', 1, ...
'biasesLearningRate', 1, ...
'filtersWeightDecay', 4, ...
'biasesWeightDecay', 0) ;
net.layers{end+1} = struct('type', 'softmaxloss') ;
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% starting to train deep CNN
[net,info] = cnn_train(net, imdb, getBatch(opts), trainOpts, 'val', find(imdb.images.set == 2)) ;
net.layers(end) = [] ;
function fn = getBatch(opts)
% -------------------------------------------------------------------------
fn = @(x,y) getSimpleNNBatch(x,y) ;
end
% -------------------------------------------------------------------------
function [images, labels] = getSimpleNNBatch(imdb, batch)
% -------------------------------------------------------------------------
images = imdb.images.data(:,:,:,batch) ;
labels = imdb.images.labels(1,batch) ;
end