Почему NA создаются из pivot_wider?
Я пытаюсь перевести свой фрейм данных из аккуратного формата в широкий формат, используя столбец с двумя значениями, используя следующее:
bai_wide = bai_trim %>% pivot_wider(names_from = Species, values_from = BAI)
Но когда я это делаю, в итоговом фрейме данных создаются NA. Значения должны совпадать, и когда я проверяю исходный фрейм данных, я не могу найти ни одного экземпляра, где они не совпадают.
Я знаю, этот что здесь задаютвопрос , но, похоже, он не решает мою проблему
Выход
dput(head(bai_trim, 100))
:
structure(list(Site = c("TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1",
"TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1",
"TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1",
"TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1",
"TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1",
"TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1",
"TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1",
"TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C2",
"TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2",
"TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2",
"TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2",
"TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2",
"TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2",
"TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2",
"TN_C2", "TN_C2", "TN_C2", "TN_C2"), Year = c(1930L, 1931L, 1932L,
1933L, 1934L, 1935L, 1936L, 1937L, 1938L, 1939L, 1940L, 1941L,
1942L, 1943L, 1944L, 1945L, 1946L, 1947L, 1948L, 1949L, 1950L,
1951L, 1952L, 1953L, 1954L, 1955L, 1956L, 1957L, 1958L, 1959L,
1960L, 1961L, 1962L, 1963L, 1964L, 1965L, 1966L, 1967L, 1968L,
1969L, 1970L, 1971L, 1972L, 1973L, 1974L, 1975L, 1976L, 1977L,
1978L, 1979L, 1980L, 1981L, 1982L, 1930L, 1931L, 1932L, 1933L,
1934L, 1935L, 1936L, 1937L, 1938L, 1939L, 1940L, 1941L, 1942L,
1943L, 1944L, 1945L, 1946L, 1947L, 1948L, 1949L, 1950L, 1951L,
1952L, 1953L, 1954L, 1955L, 1956L, 1957L, 1958L, 1959L, 1960L,
1961L, 1962L, 1963L, 1964L, 1965L, 1966L, 1967L, 1968L, 1969L,
1970L, 1971L, 1972L, 1973L, 1974L, 1975L, 1976L), Species = c("QR",
"QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR",
"QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR",
"QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR",
"QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR",
"QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR",
"QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR",
"QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR",
"QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR",
"QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR"
), Sample.Depth = c(30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L,
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L,
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L,
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L,
30L, 30L, 30L, 30L, 19L, 29L, 29L, 29L, 30L, 31L, 31L, 31L, 31L,
31L, 31L, 31L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L,
32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L,
32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L
), Method = c("DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH",
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH",
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH",
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH",
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH",
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH",
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH",
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH",
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH",
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH",
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH",
"DBH", "DBH", "DBH"), BAI = c(1329.82725527258, 1583.55443950606,
1562.33088797649, 1781.17227674256, 2239.26579940025, 2283.51207558404,
1494.47266835451, 2079.21430793831, 2431.61659002079, 2063.6712031744,
2279.01645480338, 2240.79817505811, 2537.08695503732, 2357.25996541304,
2143.34894709899, 2963.42239899576, 3266.11822944487, 3188.08984551795,
2053.72520969305, 1976.4907044215, 1974.33378516752, 2314.19980193622,
1986.85089493789, 1717.7066077125, 1712.32576613411, 2270.12697244457,
2265.44617869404, 2086.27614664055, 2290.16557632423, 2067.56268776649,
2330.32100341616, 2594.45495623365, 1916.37409435704, 2615.32977322989,
2994.09297309259, 3105.71799117356, 2608.13289994918, 2781.32817927508,
2788.89468459625, 2814.41629406914, 2218.40404749475, 2375.62820321149,
2454.40055519329, 2536.22462576871, 2673.39980127834, 2883.60697407212,
2901.26428554182, 2759.19544971662, 3271.437201359, 3023.01356721046,
2586.11651777101, 2683.77375275508, 2560.55282710926, 1028.27393956856,
1254.97727247239, 1180.00666939284, 1162.75652641982, 1468.21393690705,
1420.29545487908, 870.636254692378, 1558.97134681397, 1680.04973736316,
1807.98548193521, 1887.32063639148, 1916.04119222857, 1949.52683704445,
1921.80868471893, 1600.62264826328, 1859.9149833578, 2184.22704501268,
2364.39029270987, 1853.12296621112, 1533.22199599478, 1797.1627135163,
1738.07965789397, 1687.15007187521, 1592.13731685411, 1656.32266290939,
2337.09276793395, 2353.86414716497, 2290.38356871338, 2562.25811266612,
2576.09112815194, 2595.90714922909, 2892.38644610441, 1926.95398513788,
2040.79373628591, 2636.83713546072, 3216.10408623204, 2399.34264253439,
2411.58302876301, 2150.87125164971, 2456.28295814168, 2401.15926385922,
2525.4045600946, 2619.28151832898, 2869.37020856327, 2457.47946097768,
2505.49431848312, 2343.63069935373)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -100L), groups = structure(list(
Site = c("TN_C1", "TN_C2"), .rows = structure(list(1:53,
54:100), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -2L), .drop = TRUE))
Вот образец полученного фрейма данных:
structure(list(Site = c("TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1",
"TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1",
"TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1",
"TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1",
"TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1",
"TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1",
"TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1",
"TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C2",
"TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2",
"TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2",
"TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2",
"TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2",
"TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2",
"TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2",
"TN_C2", "TN_C2", "TN_C2", "TN_C2"), Year = c(1930L, 1931L, 1932L,
1933L, 1934L, 1935L, 1936L, 1937L, 1938L, 1939L, 1940L, 1941L,
1942L, 1943L, 1944L, 1945L, 1946L, 1947L, 1948L, 1949L, 1950L,
1951L, 1952L, 1953L, 1954L, 1955L, 1956L, 1957L, 1958L, 1959L,
1960L, 1961L, 1962L, 1963L, 1964L, 1965L, 1966L, 1967L, 1968L,
1969L, 1970L, 1971L, 1972L, 1973L, 1974L, 1975L, 1976L, 1977L,
1978L, 1979L, 1980L, 1981L, 1982L, 1930L, 1931L, 1932L, 1933L,
1934L, 1935L, 1936L, 1937L, 1938L, 1939L, 1940L, 1941L, 1942L,
1943L, 1944L, 1945L, 1946L, 1947L, 1948L, 1949L, 1950L, 1951L,
1952L, 1953L, 1954L, 1955L, 1956L, 1957L, 1958L, 1959L, 1960L,
1961L, 1962L, 1963L, 1964L, 1965L, 1966L, 1967L, 1968L, 1969L,
1970L, 1971L, 1972L, 1973L, 1974L, 1975L, 1976L), Sample.Depth = c(30L,
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L,
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L,
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L,
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 29L, 26L,
29L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L,
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L,
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L,
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L), Method = c("DBH", "DBH",
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH",
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH",
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH",
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH",
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH",
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH",
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH",
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH",
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH",
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH",
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH"), QA = c(1112.01006767124,
949.583411961448, 982.998663254952, 1087.36586010667, 1183.95219489817,
1437.32692024663, 859.941821023156, 1378.82104138941, 1715.11803906048,
1205.20873806085, 1314.40911284639, 1311.60513756934, 1334.12431932916,
1250.4241475598, 1206.45532955227, 1669.46160189739, 1748.00363523953,
1522.13188782807, 1068.48352520193, 1383.60591823409, 1629.94356878758,
1716.81958142787, 1371.09743912618, 1177.56768513191, 1268.53730445105,
1435.3852922059, 1431.72686167387, 1209.19417164828, 1475.2233795444,
1353.14184703705, 1405.88977051333, 1502.28919572968, 1231.9358745554,
1493.22404186533, 1608.26405912164, 1758.40007776153, 1358.26743655462,
1604.19889400061, 1582.77287404955, 1460.38775673841, 1718.89866003169,
1926.87492109503, 2035.25743659833, 2154.8572833228, 2155.72079265846,
1938.13092846124, 2236.80568615272, 1805.23678218424, 1856.36065999217,
1679.99679942377, 1441.26238614602, 1936.44942937414, 2133.45057631534,
1085.3150108096, 974.860473716478, 986.924868102327, 1037.83237831603,
1312.30667301435, 1405.30585427792, 773.207242839713, 1277.61195650029,
1772.52157942987, 1388.91468492248, 1391.06708726821, 1268.28478285902,
1295.29415128352, 1143.11153240523, 1058.28457720443, 1476.33487734882,
1673.26309468627, 1455.5231756649, 1015.5006665268, 1242.52404078483,
1348.94246837961, 1301.55518283897, 1075.92047580797, 977.592546236365,
1046.42643732053, 1426.81431935015, 1475.07415572278, 1455.23907789844,
1649.60781234728, 1563.4820765323, 1642.9919422491, 1865.42560165599,
1329.73932888637, 1795.37507081007, 2413.71424418505, 2499.48425942841,
2007.68534251994, 2279.94325095388, 2250.84540282916, 1988.31215010309,
2384.77641721496, 2719.39349513496, 2888.75729672066, 2955.42338126383,
2908.70715866689, 2724.37859079958, 2901.46999203769), QR = c(1329.82725527258,
1583.55443950606, 1562.33088797649, 1781.17227674256, 2239.26579940025,
2283.51207558404, 1494.47266835451, 2079.21430793831, 2431.61659002079,
2063.6712031744, 2279.01645480338, 2240.79817505811, 2537.08695503732,
2357.25996541304, 2143.34894709899, 2963.42239899576, 3266.11822944487,
3188.08984551795, 2053.72520969305, 1976.4907044215, 1974.33378516752,
2314.19980193622, 1986.85089493789, 1717.7066077125, 1712.32576613411,
2270.12697244457, 2265.44617869404, 2086.27614664055, 2290.16557632423,
2067.56268776649, 2330.32100341616, 2594.45495623365, 1916.37409435704,
2615.32977322989, 2994.09297309259, 3105.71799117356, 2608.13289994918,
2781.32817927508, 2788.89468459625, 2814.41629406914, 2218.40404749475,
2375.62820321149, 2454.40055519329, 2536.22462576871, 2673.39980127834,
2883.60697407212, 2901.26428554182, 2759.19544971662, 3271.437201359,
3023.01356721046, 2586.11651777101, NA, NA, 1028.27393956856,
NA, NA, 1162.75652641982, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"
), row.names = c(NA, -100L), groups = structure(list(Site = c("TN_C1",
"TN_C2"), .rows = structure(list(1:53, 54:100), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -2L), .drop = TRUE))
1 ответ
Я использовал ваш фрейм данных, отфильтрованный между 1930-1976 годами для Сайта: «TN_C2», чтобы развернуться шире:
bai_wide = df %>%
pivot_wider(names_from = Species, values_from = BAI)
И это вывод:
> bai_wide
# A tibble: 92 x 6
# Groups: Site [1]
Site Year Sample.Depth Method QA QR
<chr> <int> <int> <chr> <dbl> <dbl>
1 TN_C2 1930 29 DBH 1085. 1028.
2 TN_C2 1931 30 DBH 975. NA
3 TN_C2 1931 29 DBH NA 1255.
4 TN_C2 1932 30 DBH 987. NA
5 TN_C2 1932 29 DBH NA 1180.
6 TN_C2 1933 30 DBH 1038. 1163.
7 TN_C2 1934 30 DBH 1312. NA
8 TN_C2 1934 31 DBH NA 1468.
9 TN_C2 1935 30 DBH 1405. NA
10 TN_C2 1935 31 DBH NA 1420.
# ... with 82 more rows
Мы видим, что Нас много. Почему? Поскольку у вас есть несколько значений в
Возможные решения зависят от того, насколько точно вы хотите использовать параметр « Глубина ». Это зависит от анализа, который вы хотите сделать.
Если вас не волнует глубина образца, вы можете просто создать новый df без этого столбца, а затем выполнить поворот.
Если вы просто заботитесь, но не очень точны, вы можете объединить значения глубины для каждого года, сгруппированные по сайту и году, и изменить новый столбец, чтобы использовать среднее значение глубины. Нравиться
>%> group_by(Site,Year) >%> mutate(meanDepth = mean())
Просто сохраните значения NA, потому что таков ваш фрейм данных.
Есть более сложные способы попытаться создать отношения, чтобы иметь только 1 строку в год для каждого сайта, изменяющую Sample.Depth, но у меня нет достаточно времени для подробностей.
Это зависит только от вас. Главное, что вы должны знать, это то, что Sample.Depth создает вам пустые значения, потому что pivot_wider() создает новые столбцы, связанные с другими значениями столбцов, которые были в той же строке.