Как использовать Conv1d с MXNet.jl или Flux в Julia?
Я попытался перевести пример Conv1d Python Keras из: https://machinelearningmastery.com/cnn-models-for-human-activity-recognition-time-series-classification/ на Julia, используя пакеты mxnet.jl и Flux.
Я смог обучить модель, но с плохими результатами по сравнению с версией Keras. Я не уверен, что сделал не так. Я предполагаю, что что-то не так с входной формой или моделью. Я немного запутался, как использовать mx.ArrayDataProvider. Как видите, точность после 20 эпох составляет всего ~20% вместо ~90% при использовании Keras. Я пробовал разные формы ввода, но безуспешно:(
Набор данных можно загрузить по адресу: https://archive.ics.uci.edu/ml/machine-learning-databases/00240/UCI%20HAR%20Dataset.zip.
Функции для загрузки наборов данных:
using DelimitedFiles
path_dataset = "/Users/bjoern/code/tests/HARDataset"
# load a single file as a numpy array
function load_file(file_path)
dat = readdlm(file_path)
return dat
end
# load a list of files and return as a 3d numpy array
function load_group(filenames, path2csv)
loaded = []
for name in filenames
data = load_file(path2csv * name)
push!(loaded, data)
end
# stack group so that features are the 3rd dimension
loaded = cat(loaded...; dims=3)
return loaded
end
# load a dataset group, such as train or test
function load_dataset_group(group, path_dataset)
filepath = joinpath(path_dataset, group, "Inertial_Signals/")
# load all 9 files as a single array
filenames = []
# total acceleration
push!(filenames, "total_acc_x_" * group * ".txt", "total_acc_y_" * group * ".txt", "total_acc_z_" * group * ".txt")
# body acceleration
push!(filenames, "body_acc_x_" * group * ".txt", "body_acc_y_" * group * ".txt", "body_acc_z_" * group * ".txt")
# body gyroscope
push!(filenames, "body_gyro_x_" * group * ".txt", "body_gyro_y_" * group * ".txt", "body_gyro_z_" * group * ".txt")
# load input data
X = load_group(filenames, filepath)
# load class output
y = load_file(path_dataset * "/" * group * "/y_" * group * ".txt")
return X, y
end
# load the dataset, returns train and test X and y elements
function load_dataset(path_dataset)
# load all train
trainX, trainy = load_dataset_group("train", path_dataset)
println(size(trainX), size(trainy))
# load all test
testX, testy = load_dataset_group("test", path_dataset)
println(size(testX), size(testy))
println(size(trainX), size(trainy), size(testX), size(testy))
return trainX, trainy, testX, testy
end
Версия MXNet:
using MXNet
# load data
trainX, trainy, testX, testy = load_dataset(path_dataset)
trainX_ = permutedims(trainX, (2,3,1))
testX_ = permutedims(testX, (2,3,1))
trainy_ = reshape(trainy,(7352,))
testy_ = reshape(testy, (2947,))
train_data_provider = mx.ArrayDataProvider(:data => trainX_, :label => trainy_, batch_size=32, shuffle=true)
validation_data_provider = mx.ArrayDataProvider(:data => testX_, :label => testy_,batch_size=32)
arch = @mx.chain mx.Variable(:data) =>
#mx.Embedding(2,1) =>
mx.Convolution(kernel=(3,), num_filter=64) =>
mx.Activation(act_type=:relu) =>
mx.Convolution(kernel=(3,), num_filter=64) =>
mx.Activation(act_type=:relu) =>
mx.Dropout(p=0.5) =>
mx.Pooling(pool_type=:max, kernel=(2,)) =>
mx.Flatten() =>
mx.FullyConnected(num_hidden=100) =>
mx.Activation(act_type=:relu) =>
mx.FullyConnected(num_hidden=6) =>
mx.SoftmaxOutput(mx.Variable(:label))
nnet = mx.FeedForward(arch, context = mx.cpu())
mx.fit(nnet, mx.ADAM(), train_data_provider, eval_data = validation_data_provider, n_epoch = 20, callbacks = [mx.speedometer()]);
Info: Start training on Context[CPU0]
[ Info: Initializing parameters...
[ Info: Creating KVStore...
[ Info: TempSpace: Total 0 MB allocated on CPU0
[ Info: Start training...
[ Info: Speed: 1303.57 samples/sec
[ Info: Speed: 1364.40 samples/sec
[ Info: Speed: 1282.10 samples/sec
[ Info: Speed: 1306.47 samples/sec
[ Info: == Epoch 001/050 ==========
[ Info: ## Training summary
[ Info: accuracy = 0.2117
[ Info: time = 5.6121 seconds
[ Info: ## Validation summary
[ Info: accuracy = 0.0413
[ Info: Speed: 1378.38 samples/sec
[ Info: Speed: 1350.01 samples/sec
[ Info: Speed: 1353.40 samples/sec
[ Info: Speed: 1368.58 samples/sec
[ Info: == Epoch 002/050 ==========
[ Info: ## Training summary
[ Info: accuracy = 0.1776
[ Info: time = 5.3959 seconds
[ Info: ## Validation summary
[ Info: accuracy = 0.2530
[ Info: Speed: 1341.65 samples/sec
[ Info: Speed: 1366.51 samples/sec
[ Info: Speed: 1254.77 samples/sec
[ Info: Speed: 1205.79 samples/sec
.
.
.
[ Info: == Epoch 019/050 ==========
[ Info: ## Training summary
[ Info: accuracy = 0.1822
[ Info: time = 6.5816 seconds
[ Info: ## Validation summary
[ Info: accuracy = 0.1411
[ Info: Speed: 1195.42 samples/sec
[ Info: Speed: 1235.22 samples/sec
[ Info: Speed: 1226.30 samples/sec
[ Info: Speed: 1240.68 samples/sec
[ Info: == Epoch 020/050 ==========
[ Info: ## Training summary
[ Info: accuracy = 0.1792
[ Info: time = 6.0004 seconds
[ Info: ## Validation summary
[ Info: accuracy = 0.1788
[ Info: Speed: 1338.49 samples/sec
ОБНОВЛЕНИЕ: пробовал также в Flux.jl с почти такой же точностью, поэтому я предполагаю, что что-то не так с формой ввода. Как мне организовать входные данные временных рядов для слоя 1d Conv в Flux или MXNet?
добавлена версия Flux:
using Flux, Flux.Data.MNIST, Statistics
using Flux: onehotbatch, onecold, crossentropy, throttle, relu, softmax, cpu, ADAM
using Base.Iterators: repeated, partition
using Printf
# load data
trainX, trainy, testX, testy = load_dataset(path_dataset)
# Not sure how to arrange
permutedims!(trainX, (3,2,1))
trainX = permutedims(reshape(trainX,(size(trainX,1)*size(trainX_,2),size(trainX,3))),(2,1))
permutedims!(testX, (3,2,1))
testX = permutedims(reshape(testX, (size(testX,1)*size(testX,2),size(testX,3))),(2,1))
y_train = trainy .- 1
y_test = testy .- 1
function make_minibatch(X, Y, idxs)
X_batch = Array{Float32}(undef, 1152..., 1, length(idxs))
for i in 1:length(idxs)
X_batch[:, :, i] = Float32.(X[i,:])
end
Y_batch = onehotbatch(Y[idxs], 0:5)
return (X_batch, Y_batch)
end
batch_size = 32
mb_idxs = partition(1:7352, batch_size)
train_set = [make_minibatch(trainX, y_train, i) for i in mb_idxs]
test_set = make_minibatch(testX, y_test, 1:2947)
model = Chain(
Conv((3,), 1=>64, relu),
Conv((3,), 64=>64, relu),
Dropout(0.5),
x -> maxpool(x, (2,)),
x -> reshape(x, :, size(x, 3)),
Dense(36736, 6),
softmax,
)
train_set = Flux.cpu.(train_set)
test_set = Flux.cpu.(test_set)
model = Flux.cpu(model)
function loss(x, y)
x_aug = x .+ 0.1f0*Flux.cpu(randn(eltype(x), size(x)))
y_hat = model(x_aug)
return crossentropy(y_hat, y)
end
accuracy(x, y) = mean(onecold(model(x)) .== onecold(y))
opt = Flux.ADAM()
@info("Beginning training loop...")
best_acc = 0.0
last_improvement = 0
for epoch_idx in 1:10
global best_acc, last_improvement
Flux.train!(loss, params(model), train_set, opt)
acc = accuracy(test_set...)
@info(@sprintf("[%d]: Test accuracy: %.4f", epoch_idx, acc))
end
OUTPUT:
[ Info: Beginning training loop...
[ Info: [1]: Test accuracy: 0.1775
[ Info: [2]: Test accuracy: 0.1802
[ Info: [3]: Test accuracy: 0.1843
[ Info: [4]: Test accuracy: 0.1856
Любой совет? Заранее спасибо!