Give Mode в таблице в пакете expss

Я использую пакет Expss для анализа в R. Я использую cro_mean_sd_n() дать среднее значение и стандартное отклонение. Могу ли я вызвать другую функцию, чтобы найти режим?

И другие возможности означают, медиана, мода, сумма, N в одной таблице.

1 ответ

Решение

Вы можете использовать произвольную функцию с cro_fun:

library(expss)

data(mtcars)
mtcars = apply_labels(mtcars,
    mpg = "Miles/(US) gallon",
    cyl = "Number of cylinders",
    vs = "Engine",
    vs = num_lab("
                 0 V-engine 
                 1 Straight engine
                 "),
    am = "Transmission",
    am = num_lab("
                 0 Automatic 
                 1 Manual
                 ")
)

# by now there is no built-in 'mode' function so we define our own
w_mode = function(x, weight = NULL){
    if(is.null(weight)){
        curr_freq = data.table(x = x)[, list(count = .N), by = x]
    } else {
        curr_freq = data.table(x = x, weight = weight)[, list(count = sum(weight, na.rm = TRUE)), by = x]
    }
    curr_freq[["x"]][which.max(curr_freq[["count"]])]
}


# caclulate statistics
calc_cro_fun(mtcars, list(mpg, cyl),
             col_vars = list(total(), am),
             fun = combine_functions("Mean" = w_mean, 
                                     "Median" = w_median, 
                                     "Mode" = w_mode, 
                                     "Sum" = w_sum, 
                                     "N" = valid_n)
             )

# |                     |         | #Total | Transmission |        |
# |                     |         |        |    Automatic | Manual |
# | ------------------- | ------- | ------ | ------------ | ------ |
# |   Miles/(US) gallon |    Mean |   20.1 |         17.1 |   24.4 |
# |                     |  Median |   19.2 |         17.3 |   22.8 |
# |                     |    Mode |   21.0 |         19.2 |   21.0 |
# |                     |     Sum |  642.9 |        325.8 |  317.1 |
# |                     |       N |   32.0 |         19.0 |   13.0 |
# | Number of cylinders |    Mean |    6.2 |          6.9 |    5.1 |
# |                     |  Median |    6.0 |          8.0 |    4.0 |
# |                     |    Mode |    8.0 |          8.0 |    4.0 |
# |                     |     Sum |  198.0 |        132.0 |   66.0 |
# |                     |       N |   32.0 |         19.0 |   13.0 |

# we can reposition statistics with argument 'method' in 'combine_functions'
calc_cro_fun(mtcars, list(mpg, cyl),
             col_vars = list(total(), am),
             fun = combine_functions("Mean" = w_mean, 
                                     "Median" = w_median, 
                                     "Mode" = w_mode, 
                                     "Sum" = w_sum, 
                                     "N" = valid_n,
                                     method = list
                                     )
)

# |                     | #Total |        |      |       |    | Transmission |        |      |       |    |        |        |      |       |    |
# |                     |   Mean | Median | Mode |   Sum |  N |    Automatic |        |      |       |    | Manual |        |      |       |    |
# |                     |        |        |      |       |    |         Mean | Median | Mode |   Sum |  N |   Mean | Median | Mode |   Sum |  N |
# | ------------------- | ------ | ------ | ---- | ----- | -- | ------------ | ------ | ---- | ----- | -- | ------ | ------ | ---- | ----- | -- |
# |   Miles/(US) gallon |   20.1 |   19.2 |   21 | 642.9 | 32 |         17.1 |   17.3 | 19.2 | 325.8 | 19 |   24.4 |   22.8 |   21 | 317.1 | 13 |
# | Number of cylinders |    6.2 |    6.0 |    8 | 198.0 | 32 |          6.9 |    8.0 |  8.0 | 132.0 | 19 |    5.1 |    4.0 |    4 |  66.0 | 13 |

Вы также можете использовать sum, mean и т. д. из базы R вместо w_* функции, если вам не нужна поддержка весов.

Другие вопросы по тегам