写在前面
随着研究的逐渐深入,我们对绘图的要求越来越高,各种之前使用的较少的图形如今追求热度和新颖程度,都开始逐渐在大文章中显现。如下图。
这是最近刚发表于Nature Ecology & Evolution中的图1b。如何绘制呢?
Thorsten Thiergart, Paloma Durán, Thomas Ellis, Nathan Vannier, Ruben Garrido-Oter, Eric Kemen, Fabrice Roux, Carlos Alonso-Blanco, Jon Ågren, Paul Schulze-Lefert & Stéphane Hacquard. Root microbiota assembly and adaptive differentiation among European Arabidopsis populations. Nature Ecology & Evolution4, 122-131, doi:10.1038/s41559-019-1063-3 (2020).
这次的聚类加物种丰度展示让我们学习一波。之前推出了用R语言的plot绘制的教程。
R语言绘制带聚类树的堆叠柱形图
但修改细节仍比较麻烦。今天更新基于ggplot2系统的教程。
加载依赖关系
这里的ggtree需要使用19年7月以后的版本,因为这以后的版本才支持将聚类结果转化为树结构。
如果你的Bioconductor版本较旧,可能一直会安装旧版ggtree。升新方法如下:
<code>## 先卸载先前的安装控制程序
remove.packages(c("BiocInstaller"
,"BiocManager"
,"BiocVersion"
))
## 再安装新版程序
install.packages("BiocManager"
)
BiocManager::install(update=TRUE
, ask=FALSE
)/<code>
<code>library("ggplot2"
)
library("ggdendro"
)
# library(remotes)
library(phyloseq)
library(tidyverse)
library(ggtree)
library( ggstance)
# library(amplicon)
vegan_otu =function
(physeq)
{
OTU = otu_table(physeq)if
(taxa_are_rows(OTU)){
OTU = t(OTU)
}return
(as(OTU,"matrix"
))
}
vegan_taxfunction
(physeq)
{
taxreturn
(as(tax,"matrix"
))
}/<code>
导入数据
<code># 从R数据文件中读入
# ps = readRDS("data/ps_liu.rds"
)
# 从文件读取
metadata =read
.table
("http://210.75.224.110/github/EasyAmplicon/data/metadata.tsv"
, header=T, row.names=1
, sep="\t"
, comment.char
=""
, stringsAsFactors = F)
otutab =read
.table
("http://210.75.224.110/github/EasyAmplicon/data/otutab.txt"
, header=T, row.names=1
, sep="\t"
, comment.char
=""
, stringsAsFactors = F)
taxonomy =read
.table
("http://210.75.224.110/github/EasyAmplicon/data/taxonomy.txt"
, header=T, row.names=1
, sep="\t"
, comment.char
=""
, stringsAsFactors = F)
# 提取两个表中共有的ID
# Extract only those IDin
common between the two tables
idx = rownames(otutab) %in
% rownames(taxonomy)
otutab = otutab[idx,]
taxonomy = taxonomy[rownames(otutab),]
# 使用amplicon包内置数据
# data("metadata"
)
# data(otutab)
# 导入phyloseq(ps)对象
ps = phyloseq(sample_data(metadata),otu_table(as.matrix(otutab), taxa_are_rows=TRUE), tax_table(as.matrix(taxonomy)))/<code>
ggtree绘制聚类树
<code># 样本间距离类型:Bray-Curtis
dist ="bray"
# phyloseq(ps)对象标准化
ps1_rela = transform_sample_counts(ps, function(x) x / sum(x) )
# 导出OTU表
otu =as
.data
.frame(t(vegan_otu(ps1_rela)))
# 预览
otu[1
:3
,1
:3
]
#计算距离矩阵
unif = phyloseq::distance(ps1_rela , method=dist)
# 聚类树,method默认为complete
hc "complete")
# 对树分组
clus 3)
# 提取树中分组的标签和分组编号
d =data
.frame(label = names(clus),
member = factor(clus))
# 提取样本元数据
map =as
.data
.frame(sample_data(ps))
# 合并树信息到样本元数据
dd = merge(d,map,by
="row.names"
,all = F)
row.names(dd) = dd$Row.names
dd$Row.names =
dd[1
:3
,1
:3
]
# ggtree绘图 #----
p = ggtree(hc) %geom_tippoint(size=5
, shape=21
, aes(fill=factor(Group), x=x)) +
# geom_tiplab(aes(label=Group), size=3
, hjust=.5
) +
geom_tiplab(aes(color = Group,x=x*1.2
), hjust=1
)
# theme_dendrogram(plot.margin=margin(6
,6
,80
,6
))# 这是聚类图形的layout
p/<code>
物种组成数据
<code># 指定物种组成的选项
i = ps # 指定输入数据
j ="Phylum"
# 使用门水平绘制丰度图表rep
=6
# 重复数量是6
个
Top =10
# 提取丰度前十的物种注释
tran = TRUE # 转化为相对丰度值/<code>
<code># 按照分类学门(j)合并
psdata = i %>% tax_glom(taxrank = j)
# 转化丰度值if
(tran ==TRUE
) {
psdata = psdata%>% transform_sample_counts(function(x) {x/sum(x)} )
}
#--提取otu和物种注释表格
otu = otu_table(psdata)
tax = tax_table(psdata)
tax[1
:3
,1
:7
]
#--按照指定的Top数量进行筛选与合并for
(iin
1
:dim(tax)[1
]) {if
(row.names(tax)[i] %in
% names(sort(rowSums(otu), decreasing =TRUE
)[1
:Top])) {
tax[i,j] =tax[i,j]
}else
{
tax[i,j]="Other"
}
}
tax_table(psdata)= tax
##转化为表格
Taxonomies % psmelt
/<code>
整理成facet需要的格式
这里的格式也很简单,就是需要一列“id”,这里我们将样本名修改为id,即可
<code>colnames(Taxonomies)[1] =
"id"
Taxonomies
$OTU
=colnames(Taxonomies)[1] =
"id"
/<code>
保证颜色填充独立性
因为我们颜色填充有好几种方式,所以需要对每种颜色填充保重独立性,使用ggnewscale。
<code>library(ggnewscale)
p p/<code>
分面组合树和柱图
<code>p3<
-
facet_plot
(p
,panel
='Stacked Barplot'
,data
=Taxonomies,
geom
=geom_barh,mapping
=aes(x
=Abundance,
fill
=as.factor(Phylum)),color
="black"
,stat
='identity'
)p3
/<code>
修改配色
<code>colbar
select(Taxonomies, one_of(j))))[1
]
colors = colorRampPalette(c("#CBD588"
,"#599861"
,"orange"
,"#DA5724"
,"#508578"
,"#CD9BCD"
,"#AD6F3B"
,"#673770"
,"#D14285"
,"#652926"
,"#C84248"
,"#8569D5"
,"#5E738F"
,"#D1A33D"
,"#8A7C64"
,"black"
))(colbar)
p3 + scale_fill_manual(values = colors)/<code>
ggtree调整布局
修改layout,设置中空等。
<code>p = ggtree(hc,layout="fan"
, branch.length ="none"
, ladderize = FALSE) %<
+%
dd
+geom_tippoint
(size
=5,
shape
=21,
aes
(fill
=factor(Group),
x
=x))
+geom_tiplab
(aes
(color
=Group,x
=x*1.2),
hjust
=1)
p
=p
+xlim
(-4
,NA
)p
/<code>
添加样本其他信息
如添加样品测序量柱状图、数值标签
<code>p
p/<code>
<code>head(dd)
dd$sequencenum = sample_sums(ps)
dd
data = data.frame(id = row.names(dd),sequencenum = dd$sequencenum )
head(data)
# p3
#---------添加序列
p2<
-
facet_plot
(p
,panel
='Number Barplot'
,data
=dd
,geom
=geom_barh,mapping
=aes(x
=sequencenum
,fill
=Group),stat
='identity'
)p2
/<code>
<code>facet_plot(p2, panel='Stacked Barplot'
,data
=dd, geom=geom_text, mapping=aes(x=sequencenum+20
, label=sequencenum))/<code>
树+柱+堆叠图组合
<code>p3<
-
facet_plot
(p2
,panel
='Abundance Barplot'
,data
=Taxonomies,
geom
=geom_barh,mapping
=aes(x
=Abundance,
fill
=as.factor(Phylum)),color
="black"
,stat
='identity'
)p3
/<code>
撰文:五谷杂粮
责编:刘永鑫 中科院遗传发育所
10000+:菌群分析 宝宝与猫狗 梅毒狂想曲 提DNA发Nature Cell专刊 肠道指挥大脑
系列教程:微生物组入门 Biostar 微生物组 宏基因组
专业技能:学术图表 高分文章 生信宝典 不可或缺的人