写在前面
随着研究的逐渐深入,我们对绘图的要求越来越高,各种之前使用的较少的图形如今追求热度和新颖程度,都开始逐渐在大文章中显现。如下图。
这是最近刚发表于Nature Ecology & Evolution中的图1b。如何绘制呢?
Thorsten Thiergart, Paloma Durán, Thomas Ellis, Nathan Vannier, Ruben Garrido-Oter, Eric Kemen, Fabrice Roux, Carlos Alonso-Blanco, Jon Ågren, Paul Schulze-Lefert & Stéphane Hacquard. Root microbiota assembly and adaptive differentiation among European Arabidopsis populations. Nature Ecology & Evolution4, 122-131, doi:10.1038/s41559-019-1063-3 (2020).
这次的聚类加物种丰度展示让我们学习一波。之前推出了用R语言的plot绘制的教程。
R语言绘制带聚类树的堆叠柱形图
但修改细节仍比较麻烦。今天更新基于ggplot2系统的教程。
加载依赖关系
这里的ggtree需要使用19年7月以后的版本,因为这以后的版本才支持将聚类结果转化为树结构。
如果你的Bioconductor版本较旧,可能一直会安装旧版ggtree。升新方法如下:
<code>## 先卸载先前的安装控制程序
remove.packages(c(
"BiocInstaller"
,"BiocManager"
,"BiocVersion"
))## 再安装新版程序
install.packages(
"BiocManager"
)BiocManager::install(update=
TRUE
, ask=FALSE
)/<code><code>library(
"ggplot2"
)library(
"ggdendro"
)# library(remotes)
library(phyloseq)
library(tidyverse)
library(ggtree)
library( ggstance)
# library(amplicon)
vegan_otu =
function
(physeq)
{OTU = otu_table(physeq)
if
(taxa_are_rows(OTU)){OTU = t(OTU)
}
return
(as(OTU,"matrix"
))}
vegan_tax
function
(physeq)
tax
return
(as(tax,"matrix"
))}/<code>
导入数据
<code># 从R数据文件中读入
# ps = readRDS(
"data/ps_liu.rds"
)# 从文件读取
metadata =
read
.table
("http://210.75.224.110/github/EasyAmplicon/data/metadata.tsv"
, header=T, row.names=1
, sep="\t"
, comment.char
=""
, stringsAsFactors = F)otutab =
read
.table
("http://210.75.224.110/github/EasyAmplicon/data/otutab.txt"
, header=T, row.names=1
, sep="\t"
, comment.char
=""
, stringsAsFactors = F)taxonomy =
read
.table
("http://210.75.224.110/github/EasyAmplicon/data/taxonomy.txt"
, header=T, row.names=1
, sep="\t"
, comment.char
=""
, stringsAsFactors = F)# 提取两个表中共有的ID
# Extract only those ID
in
common between the two tablesidx = rownames(otutab) %
in
% rownames(taxonomy)otutab = otutab[idx,]
taxonomy = taxonomy[rownames(otutab),]
# 使用amplicon包内置数据
# data(
"metadata"
)# data(otutab)
# 导入phyloseq(ps)对象
ps = phyloseq(sample_data(metadata),otu_table(as.matrix(otutab), taxa_are_rows=TRUE), tax_table(as.matrix(taxonomy)))/<code>
ggtree绘制聚类树
<code># 样本间距离类型:Bray-Curtis
dist =
"bray"
# phyloseq(ps)对象标准化
ps1_rela = transform_sample_counts(ps, function(x) x / sum(x) )
# 导出OTU表
otu =
as
data
.frame(t(vegan_otu(ps1_rela)))# 预览
otu[
1
:3
,1
:3
]#计算距离矩阵
unif = phyloseq::distance(ps1_rela , method=dist)
# 聚类树,method默认为complete
hc "complete")
# 对树分组
clus 3)
# 提取树中分组的标签和分组编号
d =
data
.frame(label = names(clus),member = factor(clus))
# 提取样本元数据
map =
as
.data
.frame(sample_data(ps))# 合并树信息到样本元数据
dd = merge(d,map,
by
="row.names"
,all = F)row.names(dd) = dd$Row.names
dd$Row.names =
dd[
1
:3
,1
:3
]# ggtree绘图 #----
p = ggtree(hc) %geom_tippoint(size=
5
, shape=21
, aes(fill=factor(Group), x=x)) +# geom_tiplab(aes(label=Group), size=
3
, hjust=.5
) +geom_tiplab(aes(color = Group,x=x*
1.2
), hjust=1
)# theme_dendrogram(plot.margin=margin(
6
,6
,80
,6
))# 这是聚类图形的layoutp/<code>
物种组成数据
<code># 指定物种组成的选项
i = ps # 指定输入数据
j =
"Phylum"
# 使用门水平绘制丰度图表rep
=6
# 重复数量是6
个Top =
10
# 提取丰度前十的物种注释tran = TRUE # 转化为相对丰度值/<code>
<code># 按照分类学门(j)合并
psdata = i %>% tax_glom(taxrank = j)
# 转化丰度值
if
(tran ==TRUE
) {psdata = psdata%>% transform_sample_counts(function(x) {x/sum(x)} )
}
#--提取otu和物种注释表格
otu = otu_table(psdata)
tax = tax_table(psdata)
tax[
1
:3
,1
:7
]#--按照指定的Top数量进行筛选与合并
for
(iin
1
:dim(tax)[1
]) {if
(row.names(tax)[i] %in
% names(sort(rowSums(otu), decreasing =TRUE
)[1
:Top])) {tax[i,j] =tax[i,j]
}
else
{tax[i,j]=
"Other"
}
}
tax_table(psdata)= tax
##转化为表格
Taxonomies % psmelt
/<code>
整理成facet需要的格式
这里的格式也很简单,就是需要一列“id”,这里我们将样本名修改为id,即可
<code>
colnames(Taxonomies)[1] =
"id"
Taxonomies
$OTU
=colnames(Taxonomies)[1] =
"id"
/<code>保证颜色填充独立性
因为我们颜色填充有好几种方式,所以需要对每种颜色填充保重独立性,使用ggnewscale。
<code>library(ggnewscale)
p p/<code>
分面组合树和柱图
<code>p3
<
-
facet_plot
(p
,panel
='Stacked Barplot'
,data
=Taxonomies,
geom
=geom_barh,mapping
=aes(x
=Abundance,
fill
=as.factor(Phylum)),color
="black"
,stat
='identity'
)p3
/<code>修改配色
<code>
colbar
select(Taxonomies, one_of(j))))[1
]colors = colorRampPalette(c(
"#CBD588"
,"#599861"
,"orange"
,"#DA5724"
,"#508578"
,"#CD9BCD"
,"#AD6F3B"
,"#673770"
,"#D14285"
,"#652926"
,"#C84248"
,"#8569D5"
,"#5E738F"
,"#D1A33D"
,"#8A7C64"
,"black"
))(colbar)p3 + scale_fill_manual(values = colors)/<code>
ggtree调整布局
修改layout,设置中空等。
<code>p = ggtree(hc,layout=
"fan"
, branch.length ="none"
, ladderize = FALSE) %<
+%
dd
+geom_tippoint
(size
=5,
shape
=21,
aes
(fill
=factor(Group),
x
=x))
+geom_tiplab
(aes
(color
=Group,x
=x*1.2),
hjust
=1)
p
=p
+xlim
(-4
,NA
)p
/<code>添加样本其他信息
如添加样品测序量柱状图、数值标签
<code>
p
p/<code><code>head(dd)
dd$sequencenum = sample_sums(ps)
dd
data = data.frame(id = row.names(dd),sequencenum = dd$sequencenum )
head(data)
# p3
#---------添加序列
p2
-
facet_plot
(p
,panel
='Number Barplot'
,data
=dd
,geom
=geom_barh,mapping
=aes(x
=sequencenum
,fill
=Group),stat
='identity'
)p2
/<code><code>facet_plot(p2, panel=
'Stacked Barplot'
,data
=dd, geom=geom_text, mapping=aes(x=sequencenum+20
, label=sequencenum))/<code>树+柱+堆叠图组合
<code>p3
<
-
facet_plot
(p2
,panel
='Abundance Barplot'
,data
=Taxonomies,
geom
=geom_barh,mapping
=aes(x
=Abundance,
fill
=as.factor(Phylum)),color
="black"
,stat
='identity'
)p3
/<code>撰文:五谷杂粮
责编:刘永鑫 中科院遗传发育所
10000+:菌群分析 宝宝与猫狗 梅毒狂想曲 提DNA发Nature Cell专刊 肠道指挥大脑
系列教程:微生物组入门 Biostar 微生物组 宏基因组
专业技能:学术图表 高分文章 生信宝典 不可或缺的人