График плотности, выглядящий странно, используя stat_density_2d
У меня есть фрейм данных, как показано ниже
> dput(test_data)
structure(list(X206.204 = c(18.28, 18.32, 18.453, 18.55, 18.24,
18.39, 18.36, 18.26, 18.23, 18.42, 18.35, 18.64, 18.59, 18.28,
18.56, 18.72, 18.6, 18.59, 18.63, 18.19, 18.73, 18.36, 18.71,
18.66, 18.66, 18.61, 18.68, 18.741, 17.1758, 17.18, 17.1709,
17.1748, 17.1774, 17.1756, 17.156, 17.074, 17.1837, 17.1806,
17.1904, 17.1849, 17.2025, 17.1802, 17.92, 17.9, 17.94, 17.93,
17.91, 17.92, 17.906, 17.92, 17.887, 17.935, 17.905, 17.867,
17.957, 17.957, 18.3794, 18.3667, 18.3777, 18.3672, 18.387, 18.3765,
18.4, 17.905, 17.886, 17.906, 18.144, 17.998, 18.394, 18.339,
18.437, 18.349, 18.41, 18.407, 18.442, 18.309, 18.348, 18.193,
18.419, 18.363), X207.204 = c(15.45, 15.574, 15.464, 15.64, 15.47,
15.55, 15.61, 15.48, 15.47, 15.64, 15.56, 15.9, 15.74, 15.44,
15.65, 15.77, 15.81, 15.77, 15.85, 15.63, 15.97, 15.59, 15.59,
15.89, 15.88, 15.89, 15.91, 15.706, 15.5249, 15.5277, 15.5214,
15.5245, 15.5261, 15.5252, 15.522, 15.422, 15.5276, 15.5274,
15.5322, 15.5259, 15.5274, 15.5238, 15.44, 15.42, 15.36, 15.47,
15.47, 15.51, 15.471, 15.465, 15.47, 15.463, 15.473, 15.462,
15.449, 15.445, 15.6314, 15.6307, 15.6332, 15.6332, 15.6323,
15.6336, 15.6324, 15.465, 15.465, 15.506, 15.489, 15.484, 15.612,
15.618, 15.735, 15.619, 15.665, 15.649, 15.698, 15.554, 15.606,
15.491, 15.693, 15.6), X208.204 = c(38.42, 38.099, 38.076, 39.27,
38.19, 38.37, 38.57, 38.22, 38.19, 38.56, 38.01, 38.93, 38.77,
37.97, 38.63, 39.01, 39.03, 38.86, 39.11, 38.25, 39.51, 38.22,
39.45, 39.24, 39.2, 39.17, 39.33, 37.969, 37.002, 37.013, 36.9921,
37, 37.008, 37.003, 37.005, 36.685, 37.0207, 37.0144, 37.0351,
37.0187, 37.0713, 37.013, 37.58, 37.55, 37.62, 37.69, 37.58,
37.72, 37.431, 37.418, 37.426, 37.46, 37.467, 37.396, 37.443,
37.385, 38.3435, 38.3215, 38.3393, 38.3166, 38.3508, 38.3333,
38.3692, 37.426, 37.398, 37.522, 37.684, 37.325, 38.478, 38.342,
38.585, 38.294, 38.482, 38.457, 38.592, 38.3, 38.372, 38.945,
38.527, 38.396)), .Names = c("X206.204", "X207.204", "X208.204"
), row.names = c(NA, -80L), class = "data.frame")
Я попытался реализовать следующий код для создания двухмерного графика плотности, используя stat_denisty_2d
ggplot(data=test_data,aes(x = X206.204 , y = X207.204))+ stat_density_2d(geom="polygon",n=800,bins=20,
aes(fill = ..level..,
alpha = ..level..)) +
geom_point(color="red")+
labs( x = expression({}^206*"Pb/"*{}^204*"Pb"),
y = expression({}^207*"Pb/"*{}^204*"Pb")
)+
theme_bw() +
theme(panel.grid = element_blank(),
axis.text.x = element_text(margin = margin(t = 5, unit = "pt")))+
theme(axis.text=element_text(size=18))+ # adjust x y axis tick mark
theme(axis.title = element_text(size=22))+ # adjust x y axis title
theme(legend.position="none")+
scale_fill_gradient(low = "cyan1",high = "cyan4")+
scale_x_continuous(breaks = seq(17, 19.6, by = 0.2))+
scale_y_continuous(breaks = seq(15.4, 15.9, by = 0.1))+
coord_cartesian(xlim = c(17,19.6),ylim=c(15.4, 15.9))
Это дает мне диаграмму, края которой обрезаны, и в середине есть большая область, не содержащая данных вообще. Как я могу исправить эти проблемы?
1 ответ
Если я правильно понимаю, вам нужно передать дополнительный аргумент, определяющий bandwidth - аргумент h
в stat_density_2d
,
час
Полоса пропускания (вектор длины два). Если NULL, оценивается с помощью bandwidth.nrd.
ggplot(data = df,
aes(x = X206.204,
y = X207.204))+
stat_density_2d(geom = "polygon",
n = 800,
bins = 20,
aes(fill = ..level..,
alpha = ..level..),
h = c(0.2, 0.2)) + #change this to your liking
geom_point(color="red")+
labs( x = expression({}^206*"Pb/"*{}^204*"Pb"),
y = expression({}^207*"Pb/"*{}^204*"Pb")) +
theme_bw() +
theme(panel.grid = element_blank(),
axis.text.x = element_text(margin = margin(t = 5, unit = "pt")))+
theme(axis.text = element_text(size = 18))+
theme(axis.title = element_text(size = 22))+
theme(legend.position = "none")+
scale_fill_gradient(low = "cyan1",high = "cyan4")+
scale_x_continuous(breaks = seq(17, 19.6, by = 0.2))+
scale_y_continuous(breaks = seq(15.4, 15.9, by = 0.1))+
coord_cartesian(xlim = c(17, 19.6),ylim = c(15.4, 15.9))
по умолчанию оценивается с помощью функции bandwidth.nrd
от MASS
library(MASS)
bandwidth.nrd(c(df$X206.204))
#output
0.6671485
bandwidth.nrd(c(df$X207.204))
#output
0.2143042
а также:
ggplot(data = df,
aes(x = X206.204,
y = X207.204))+
stat_density_2d(geom = "polygon",
n = 800,
bins = 15,
aes(fill = ..level..,
alpha = ..level..),
h = c(bandwidth.nrd(c(df$X206.204)),
bandwidth.nrd(c(df$X207.204)))) +
geom_point(color="red")+
labs( x = expression({}^206*"Pb/"*{}^204*"Pb"),
y = expression({}^207*"Pb/"*{}^204*"Pb")) +
theme_bw() +
theme(panel.grid = element_blank(),
axis.text.x = element_text(margin = margin(t = 5, unit = "pt")))+
theme(axis.text = element_text(size = 18))+
theme(axis.title = element_text(size = 22))+
theme(legend.position = "none")+
scale_fill_gradient(low = "cyan1",high = "cyan4")+
scale_x_continuous(breaks = seq(17, 19.6, by = 0.2))+
scale_y_continuous(breaks = seq(15.4, 15.9, by = 0.1))+
coord_cartesian(xlim = c(17, 19.6),ylim = c(15.4, 15.9))
выглядит просто сюжет без h
определены: