Ошибка во время переноса: "данные" должны быть data.frame, environment или list

Я пытаюсь предсказать качество вина. Я использовал 4 модели, и теперь я хотел объединить их и сделать перекрестную проверку и ансамбль. Когда я пытаюсь запустить цикл в предсказаниях '''<- sapply (validation_models, function (model) {' '') выдает мне ошибку Error во время обертывания: "данные" должны быть data.frame, environment или list Ошибка: обработчики ошибок больше нет (рекурсивные ошибки?); Вызов "прервать" перезапуск. У кого-нибудь есть ключ к отладке?

#Cross validation---------------------------------------------------------------
install.packages("caret")
library(caret)
library(lessR)
library(Hmisc)
library(dplyr)
library(fastAdaboost)
library(gam)
library(gbm)
library("import")
library(kernlab)
library(kknn)
library(klaR)
install.packages("MASS")
library(MASS)
library(mboost)
library(mgcv)
library(monmlp)
library(naivebayes)
library(nnet)
library(plyr)
library(ranger)
library(randomForest)
library(Rborist)
library(RSNNS)
library(wsrf)
library(reprex)
#Define the models to validate

validation_models <- c("Linear Regression", "Knn", "Random Forest","LDA")

#Number of files to run validation: 4
#Train 90% of the dataset
#Function twoClassSumary:predict the metrics -sensitivity, specificity & ROC

train_control <- trainControl(method = "cv", number = 10,p = 0.9,
                        summaryFunction = twoClassSummary, 
                        classProbs = TRUE,
                        savePredictions = TRUE)

train_control <- trainControl(method = "cv", number =10,p = 0.9,
                        classProbs = TRUE,
                        savePredictions = TRUE)

#The next stage creates missing values(NA) to train the models
#Input the missing values in a table to check the models output
library(tibble)
metrics_output <- tibble(Model = NA,
                  Accuracy = NA,
                  Sensitivity = NA,
                  Specificity = NA,
                  F1_Score = NA,
                  AUC = NA)
#After models set up , do parallel processing

#Use of 4 cores instead of 7 since the machine has no capacity.
cor <-4

library("doParallel")
cl <- makePSOCKcluster(cor)
registerDoParallel(cl)

#START TRAINING THE MODELS-----------------------------------------
#To make sure that the process starts equal , the seed is setted.

set.seed(1234, sample.kind = "Rejection")

# Refer to the formula used previously 
fml <- as.formula(paste("type", "~", 
                        paste(xquality , collapse=' + ')))

#Let's start predicting 
#Use of tuning parameters



predictions<- sapply(validation_models, function(model){ 
  
  if (model == "knn") { #tuning parameters
    grid <- data.frame(k = seq(3, 50, 2))
    fit <- caret::train(form = fml, 
                        method = model, 
                        data = train,
                        trControl = train_control,
                        tuneGrid = grid)
  } else if (model == "Random Forest"){ #tuning arameters
    grid <- data.frame(mtry = c(1, 2, 3, 4, 5, 10, 25, 50, 100))
    
    fit <- caret::train(form = fml,
                        method = "Random Forest",
                        data = train,
                        trControl = train_control,
                        ntree = 150,
                        tuneGrid = grid,
                        nSamp = 5000)
  } else {
    # Other models use standard parameters (no tuning)
    fit <- caret::train(form = fml, 
                        method = model,
                        data = train,
                        trControl = train_control)
  }
  
  # Predictions
  pred <- predict(object = fit, newdata = test)
})

0 ответов

Другие вопросы по тегам