Батчнормализация пирога во время испытаний
Это новый вопрос, который я задаю здесь, но по какой-то причине, когда я изменяю размер партии во время тестирования, точность моей модели изменяется. Уменьшение размера партии снижает точность до тех пор, пока размер партии 1 не приведет к точности 11%, хотя та же самая модель дает мне точность 97% при размере тестовой партии 512 (я обучил ее с размером партии 512). Я использую предварительно обученную модель resnet 50 и настраиваю ее на своих собственных изображениях, а также правильно использую.train() и.eval() во время обучения и тестирования. Лучшая причина, по которой я могу придумать, заключается в том, что по какой-то причине слои нормализации партии в модели все еще отслеживают статистику партии во время тестирования (что они не должны делать, вместо этого они должны использовать те, которые были сохранены во время обучение), поскольку размер пакета 1 должен приводить к среднему значению (x) = x, а выходной сигнал слоя bn станет равным 0, что приведет к нулевому прогнозированию на выходе и, следовательно, к точности 11%, поскольку данные 11% относятся к классу 0. Также когда я отправляю загрузчики проверки и тестирования в произвольном порядке, я получаю неверные числа, но когда я их перемешиваю, те же наборы дают мне точность +96%. Может кто-нибудь помочь мне, пожалуйста? Спасибо вам большое!
Вот мой код (определение модели)
class ResNet(nn.Module):
"""
Get a pretrained VGG network (on ImageNet) and try to finetune it on EuroSat images
Reported acc is > 98% on Resnet-50, let's see what can we get from a VGG network
"""
def __init__(self, in_channels):
super(ResNet, self).__init__()
graph = models.resnet50(pretrained=True)
removed = list(graph.children())[:-2]
with_dropout = []
with_dropout.append(removed[0])
with_dropout.append(removed[1])
with_dropout.append(removed[2])
with_dropout.append(removed[3])
for part in removed[4:]:
with_dropout.append(part)
with_dropout.append(nn.Dropout2d(p=0.8))
# print(with_dropout)
self.feature_extracter = torch.nn.Sequential(*with_dropout)
self.kill = nn.Dropout(p=0.8)
self.classifier = nn.Sequential(
nn.Linear(in_features=2048*4, out_features=1024),
nn.ReLU(),
nn.Linear(in_features=1024, out_features=512),
nn.ReLU(),
nn.Dropout(p=0.8),
nn.Linear(in_features=512, out_features=256),
nn.ReLU(),
nn.Linear(in_features=256, out_features=128),
nn.ReLU(),
nn.Dropout(p=0.8),
nn.Linear(in_features=128, out_features=10),
nn.LogSoftmax(dim=0)
)
def forward(self, x):
x = self.feature_extracter(x)
x = self.kill(x)
x = self.classifier(x.view(x.size(0), -1))
return x, torch.argmax(input=x, dim=1)
и для обучения я делаю это.
def train_net(model, base_folder, pre_model, save_dir, batch_size, lr, log_after, cuda, device):
if not pre_model:
print(model)
writer = SummaryWriter()
if cuda:
print('GPU')
model.cuda(device=device)
print('log: training started on device: {}'.format(device))
# define loss and optimizer
optimizer = Adam(model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()
train_loader, val_dataloader, test_loader = get_dataloaders(base_folder=base_folder,
batch_size=batch_size)
if not os.path.exists(save_dir):
os.mkdir(save_dir)
if True:
i = 1
m_loss, m_accuracy = [], []
if pre_model:
# self.load_state_dict(torch.load(pre_model)['model'])
model.load_state_dict(torch.load(pre_model))
print('log: resumed model {} successfully!'.format(pre_model))
print(model)
# starting point
# model_number = int(pre_model.split('/')[1].split('-')[1].split('.')[0])
model_number = int(re.findall('\d+', str(pre_model))[0])
i = i + model_number - 1
else:
print('log: starting anew using ImageNet weights...')
while True:
i += 1
net_loss = []
# new model path
save_path = os.path.join(save_dir, 'model-{}.pt'.format(i))
# remember to save only five previous models, so
del_this = os.path.join(save_dir, 'model-{}.pt'.format(i - 6))
if os.path.exists(del_this):
os.remove(del_this)
print('log: removed {}'.format(del_this))
if i > 1 and not os.path.exists(save_path):
torch.save(model.state_dict(), save_path)
print('log: saved {}'.format(save_path))
correct_count, total_count = 0, 0
for idx, data in enumerate(train_loader):
##########################
model.train() # train mode at each epoch, just in case...
##########################
test_x, label = data['input'], data['label']
if cuda:
test_x = test_x.cuda(device=device)
label = label.cuda(device=device)
# forward
out_x, pred = model.forward(test_x)
# out_x, pred = out_x.cpu(), pred.cpu()
loss = criterion(out_x, label)
net_loss.append(loss.item())
# get accuracy metric
batch_correct = (label.eq(pred.long())).double().sum().item()
correct_count += batch_correct
# print(batch_correct)
total_count += np.float(pred.size(0))
if idx % log_after == 0 and idx > 0:
print('{}. ({}/{}) image size = {}, loss = {}: accuracy = {}/{}'.format(i,
idx,
len(train_loader),
out_x.size(),
loss.item(),
batch_correct,
pred.size(0)))
#################################
# three steps for backprop
model.zero_grad()
loss.backward()
# perform gradient clipping between loss backward and optimizer step
clip_grad_norm_(model.parameters(), 0.05)
optimizer.step()
#################################
mean_accuracy = correct_count / total_count * 100
mean_loss = np.asarray(net_loss).mean()
m_loss.append((i, mean_loss))
m_accuracy.append((i, mean_accuracy))
writer.add_scalar(tag='train loss', scalar_value=mean_loss, global_step=i)
writer.add_scalar(tag='train over_all accuracy', scalar_value=mean_accuracy, global_step=i)
print('####################################')
print('epoch {} -> total loss = {:.5f}, total accuracy = {:.5f}%'.format(i, mean_loss, mean_accuracy))
print('####################################')
# validate model after each epoch
eval_net(model=model, writer=writer, criterion=criterion,
val_loader=val_dataloader, denominator=batch_size,
cuda=cuda, device=device, global_step=i)
pass
и, наконец, оценка / тестирование, как это
def eval_net(**kwargs):
model = kwargs['model']
cuda = kwargs['cuda']
device = kwargs['device']
if cuda:
model.cuda(device=device)
if 'criterion' in kwargs.keys():
writer = kwargs['writer']
val_loader = kwargs['val_loader']
criterion = kwargs['criterion']
global_step = kwargs['global_step']
correct_count, total_count = 0, 0
net_loss = []
model.eval() # put in eval mode first ############################
for idx, data in enumerate(val_loader):
test_x, label = data['input'], data['label']
if cuda:
test_x = test_x.cuda(device=device)
label = label.cuda(device=device)
# forward
out_x, pred = model.forward(test_x)
loss = criterion(out_x, label)
net_loss.append(loss.item())
# get accuracy metric
batch_correct = (label.eq(pred.long())).double().sum().item()
correct_count += batch_correct
total_count += np.float(pred.size(0))
#################################
mean_accuracy = correct_count / total_count * 100
mean_loss = np.asarray(net_loss).mean()
# summarize mean accuracy
writer.add_scalar(tag='val. loss', scalar_value=mean_loss, global_step=global_step)
writer.add_scalar(tag='val. over_all accuracy', scalar_value=mean_accuracy, global_step=global_step)
print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$')
print('log: validation:: total loss = {:.5f}, total accuracy = {:.5f}%'.format(mean_loss, mean_accuracy))
print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$')
else:
# model, images, labels, pre_model, save_dir, sum_dir, batch_size, lr, log_after, cuda
pre_model = kwargs['pre_model']
base_folder = kwargs['base_folder']
batch_size = kwargs['batch_size']
log_after = kwargs['log_after']
criterion = nn.CrossEntropyLoss()
un_confusion_meter = tnt.meter.ConfusionMeter(10, normalized=False)
confusion_meter = tnt.meter.ConfusionMeter(10, normalized=True)
model.load_state_dict(torch.load(pre_model))
print('log: resumed model {} successfully!'.format(pre_model))
_, _, test_loader = get_dataloaders(base_folder=base_folder, batch_size=batch_size)
net_accuracy, net_loss = [], []
correct_count = 0
total_count = 0
for idx, data in enumerate(test_loader):
model.eval() # put in eval mode first
test_x, label = data['input'], data['label']
# print(test_x)
# print(test_x.shape)
# this = test_x.numpy().squeeze(0).transpose(1,2,0)
# print(this.shape, np.min(this), np.max(this))
if cuda:
test_x = test_x.cuda(device=device)
label = label.cuda(device=device)
# forward
out_x, pred = model.forward(test_x)
loss = criterion(out_x, label)
un_confusion_meter.add(predicted=pred, target=label)
confusion_meter.add(predicted=pred, target=label)
###############################
# pred = pred.view(-1)
# pred = pred.cpu().numpy()
# label = label.cpu().numpy()
# print(pred.shape, label.shape)
###############################
# get accuracy metric
# correct_count += np.sum((pred == label))
# print(pred, label)
batch_correct = (label.eq(pred.long())).double().sum().item()
correct_count += batch_correct
# print(batch_correct)
total_count += np.float(batch_size)
net_loss.append(loss.item())
if idx % log_after == 0:
print('log: on {}'.format(idx))
#################################
mean_loss = np.asarray(net_loss).mean()
mean_accuracy = correct_count * 100 / total_count
print(correct_count, total_count)
print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$')
print('log: test:: total loss = {:.5f}, total accuracy = {:.5f}%'.format(mean_loss, mean_accuracy))
print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$')
with open('normalized.pkl', 'wb') as this:
pkl.dump(confusion_meter.value(), this, protocol=pkl.HIGHEST_PROTOCOL)
with open('un_normalized.pkl', 'wb') as this:
pkl.dump(un_confusion_meter.value(), this, protocol=pkl.HIGHEST_PROTOCOL)
pass
pass