Ошибка во время переноса: "данные" должны быть data.frame, environment или list
Я пытаюсь предсказать качество вина. Я использовал 4 модели, и теперь я хотел объединить их и сделать перекрестную проверку и ансамбль. Когда я пытаюсь запустить цикл в предсказаниях '''<- sapply (validation_models, function (model) {' '') выдает мне ошибку Error во время обертывания: "данные" должны быть data.frame, environment или list Ошибка: обработчики ошибок больше нет (рекурсивные ошибки?); Вызов "прервать" перезапуск. У кого-нибудь есть ключ к отладке?
#Cross validation---------------------------------------------------------------
install.packages("caret")
library(caret)
library(lessR)
library(Hmisc)
library(dplyr)
library(fastAdaboost)
library(gam)
library(gbm)
library("import")
library(kernlab)
library(kknn)
library(klaR)
install.packages("MASS")
library(MASS)
library(mboost)
library(mgcv)
library(monmlp)
library(naivebayes)
library(nnet)
library(plyr)
library(ranger)
library(randomForest)
library(Rborist)
library(RSNNS)
library(wsrf)
library(reprex)
#Define the models to validate
validation_models <- c("Linear Regression", "Knn", "Random Forest","LDA")
#Number of files to run validation: 4
#Train 90% of the dataset
#Function twoClassSumary:predict the metrics -sensitivity, specificity & ROC
train_control <- trainControl(method = "cv", number = 10,p = 0.9,
summaryFunction = twoClassSummary,
classProbs = TRUE,
savePredictions = TRUE)
train_control <- trainControl(method = "cv", number =10,p = 0.9,
classProbs = TRUE,
savePredictions = TRUE)
#The next stage creates missing values(NA) to train the models
#Input the missing values in a table to check the models output
library(tibble)
metrics_output <- tibble(Model = NA,
Accuracy = NA,
Sensitivity = NA,
Specificity = NA,
F1_Score = NA,
AUC = NA)
#After models set up , do parallel processing
#Use of 4 cores instead of 7 since the machine has no capacity.
cor <-4
library("doParallel")
cl <- makePSOCKcluster(cor)
registerDoParallel(cl)
#START TRAINING THE MODELS-----------------------------------------
#To make sure that the process starts equal , the seed is setted.
set.seed(1234, sample.kind = "Rejection")
# Refer to the formula used previously
fml <- as.formula(paste("type", "~",
paste(xquality , collapse=' + ')))
#Let's start predicting
#Use of tuning parameters
predictions<- sapply(validation_models, function(model){
if (model == "knn") { #tuning parameters
grid <- data.frame(k = seq(3, 50, 2))
fit <- caret::train(form = fml,
method = model,
data = train,
trControl = train_control,
tuneGrid = grid)
} else if (model == "Random Forest"){ #tuning arameters
grid <- data.frame(mtry = c(1, 2, 3, 4, 5, 10, 25, 50, 100))
fit <- caret::train(form = fml,
method = "Random Forest",
data = train,
trControl = train_control,
ntree = 150,
tuneGrid = grid,
nSamp = 5000)
} else {
# Other models use standard parameters (no tuning)
fit <- caret::train(form = fml,
method = model,
data = train,
trControl = train_control)
}
# Predictions
pred <- predict(object = fit, newdata = test)
})