Упорядочить строки тепловой карты в фасетном графике ggplot2

У меня проблема с рендерингом многогранной тепловой карты в ggplot2. Идея состоит в том, что у меня есть несколько элементов (это гены в реальной жизни) и несколько экспериментов (F1 и F2 в приведенном ниже примере). Используя эксперимент F1, я могу создать класс элементов / генов на основе их средней экспрессии (высокая,..., средняя, ​​..., низкая). В тепловой карте, созданной с помощью приведенного ниже примера, я хотел бы упорядочить каждый элемент в каждом классе (01, 02, 03, 04) на основе его среднего значения выражения в F1. К сожалению, элементы отображаются в алфавитном порядке. Я был бы очень рад получить несколько советов... Лучший

library(ggplot2)
library(reshape2)

set.seed(123)

# let's create a fake dataset
nb.experiment <- 4
n.row <- 200
n.col <- 5
d <- matrix(round(runif(n.row * n.col),2), nc=n.col) 
colnames(d) <- sprintf("%02d", 1:5)

# These strings will be the row names of each heatmap
# in the subsequent facet plot
elements <- sample(replicate(n.row/2, rawToChar(as.raw(sample(65:90, 6, replace=T)))))


# let's create a data.frame d
d <- data.frame(d, 
                experiment = sort(rep(c("F1","F2"), n.row/2)),
                elements= elements)

# For elements related to experiment F1 
# we artificially produce a gradient of values that will
# create elements with increasing row means
d[d$experiment =="F1",1:5] <- round(sweep(d[d$experiment =="F1",1:5], 
                                          1, 
                                          seq(from=1, 10, length.out = 100), 
                                          "+"), 2)
# For elements related to experiment F2
# we artificially produce a gradient of values that will
# create elements with decreasing row means
d[d$experiment =="F2",1:5] <- round(sweep(d[d$experiment =="F2",1:5], 
                                          1, 
                                          seq(from=10, 1, length.out = 100), 
                                          "+"), 2)

#print(d[d$experiment =="F1",1:5])

# Now we split the dataset by experiments
d.split <- split(d, d$experiment)

# For all experiments, we order elements based on the mean expression signal in 
# F1.
row.means.F1 <- rowMeans(d.split$F1[,1:5])
pos <- order(row.means.F1)

for(s in names(d.split)){
  d.split[[s]] <- d.split[[s]][pos,]
}


# We create several classes of elements based on their 
# mean expression signal in F1.
cuts <- cut(1:nrow(d.split$F1), nb.experiment)
levels(cuts) <- sprintf("%02d", 1:nb.experiment)

for(s in names(d.split)){
  d.split[[s]] <- split(d.split[[s]], cuts)
}



# Data are melt (their is perhaps a better solution...)
# in order to use the ggplot function
dm <- melt(do.call('rbind',lapply(d.split, melt)), id.var=c( "experiment", "elements", "variable", "L1"))
dm <- dm[, -5]
colnames(dm) <- c("experiment","elements", "pos", "rowMeanClass", "exprs")


# Now we plot the data
p <- ggplot(dm, aes(x = pos, y = elements, fill = exprs))
p <- p + geom_raster()                                        
p <- p + facet_wrap(~rowMeanClass +experiment , scales = "free", ncol = 2)
p <- p + theme_bw()
p <- p + theme(text = element_text(size=4))
p <- p + theme(text = element_text(family = "mono", face = "bold"))

ggsave("RPlot_test.jpeg", p)

http://oi57.tinypic.com/2cqfiwm.jpg

1 ответ

Решение

Используя ваши советы, я смог найти решение (которое подразумевает четкое определение порядка уровней для фактора "элементов"). Спасибо hrbrmstr (и всем остальным).

Примечание: я добавил только несколько строк по сравнению с исходным кодом, которые обозначены ниже флагами "Добавлено: начало" и "Добавлено: конец".

library(ggplot2)
library(reshape2)

set.seed(123)

# let's create a fake dataset
nb.experiment <- 4
n.row <- 200
n.col <- 5
d <- matrix(round(runif(n.row * n.col),2), nc=n.col) 
colnames(d) <- sprintf("%02d", 1:5)

# These strings will be the row names of each heatmap
# in the subsequent facet plot
elements <- sample(replicate(n.row/2, rawToChar(as.raw(sample(65:90, 6, replace=T)))))


# let's create a data.frame d
d <- data.frame(d, 
                experiment = sort(rep(c("F1","F2"), n.row/2)),
                elements= elements)

# For elements related to experiment F1 
# we artificially produce a gradient of values that will
# create elements with increasing row means
d[d$experiment =="F1",1:5] <- round(sweep(d[d$experiment =="F1",1:5], 
                                          1, 
                                          seq(from=1, 10, length.out = 100), 
                                          "+"), 2)
# For elements related to experiment F2
# we artificially produce a gradient of values that will
# create elements with decreasing row means
d[d$experiment =="F2",1:5] <- round(sweep(d[d$experiment =="F2",1:5], 
                                          1, 
                                          seq(from=10, 1, length.out = 100), 
                                          "+"), 2)

#print(d[d$experiment =="F1",1:5])

# Now we split the dataset by experiments
d.split <- split(d, d$experiment)

# For all experiments, we order elements based on the mean expression signal in 
# F1.
row.means.F1 <- rowMeans(d.split$F1[,1:5])
pos <- order(row.means.F1)

for(s in names(d.split)){
  d.split[[s]] <- d.split[[s]][pos,]
}

## Added: begin ###
#Get the list of elements in proper order (based on row mean)
mean.order <- as.character(d.split$F1$elements)
## Added: end###

# We create several classes of elements based on their 
# mean expression signal in F1.
cuts <- cut(1:nrow(d.split$F1), nb.experiment)
levels(cuts) <- sprintf("%02d", 1:nb.experiment)

for(s in names(d.split)){
  d.split[[s]] <- split(d.split[[s]], cuts)
}



# Data are melt (their is perhaps a better solution...)
# in order to use the ggplot function
dm <- melt(do.call('rbind',lapply(d.split, melt)), id.var=c( "experiment", "elements", "variable", "L1"))
dm <- dm[, -5]
colnames(dm) <- c("experiment","elements", "pos", "rowMeanClass", "exprs")

## Added: begin###
#Ensure that dm$elements is an ordered factor with levels
# ordered as expected
dm$elements <- factor(dm$elements, levels = mean.order, ordered = TRUE)
## Added: end###

# Now we plot the data
p <- ggplot(dm, aes(x = pos, y = elements, fill = exprs))
p <- p + geom_raster()                                        
p <- p + facet_wrap(~rowMeanClass +experiment , scales = "free", ncol = 2)
p <- p + theme_bw()
p <- p + theme(text = element_text(size=4))
p <- p + theme(text = element_text(family = "mono", face = "bold"))

ggsave("RPlot_test.jpeg", p)
Другие вопросы по тегам