微生物多样性分类柱状图按照分组进行排序脚本升级;tax_bar_plot.r

微生物多样性分类柱状图按照分组进行排序脚本升级;tax_bar_plot.r





#!/usr/bin/Rscript
############################################################################
#北京组学生物科技有限公司
#author huangls
#date 2020.07.29
#version 1.1  增加样本按照fastmap的分组排序功能
#学习R课程:
#R 语言入门与基础绘图:
# https://bdtcd.xetslk.com/s/2G8tHr
#R 语言绘图ggplot2等等:
# https://bdtcd.xetslk.com/s/2G8tHr
###############################################################################################

library("argparse")
parser <- ArgumentParser(description='plot otu tax bar plot')
parser$add_argument( "-i", "--input", type="character",required=T,
help="input the abuance of tax in each sample,[required]",
metavar="filepath")
parser$add_argument( "-a", "--xlab", type="character",required=F, default="sample",
help="input xlab [default sample]",
metavar="xlab")
parser$add_argument( "-t", "--title", type="character",required=F, default="demo",
help="input tittle name [default demo]",
metavar="xlab")
parser$add_argument( "-b", "--ylab", type="character",required=F, default="Relative abundance",
help="input ylab [default Relative abundance]",
metavar="ylab")
parser$add_argument( "-f", "--fastmap", type="character",required=F, default=NULL,
help="input fastmap file [default NULL]",
metavar="path")
parser$add_argument( "-g", "--group", type="character",required=F, nargs='+',default=NULL,
help="name of group order [default NULL]",
metavar="group")
parser$add_argument( "-o", "--outdir", type="character", default=getwd(),
help="output file directory [default cwd]",
metavar="path")
parser$add_argument("-n", "--name", type="character", default="demo",
help="out file name prefix [default demo]",
metavar="prefix")
parser$add_argument( "-H", "--height", type="double", default=7,
help="the height of pic   inches  [default 7]",
metavar="number")
parser$add_argument("-W", "--width", type="double", default=12,
help="the width of pic   inches [default 12]",
metavar="number")
opt <- parser$parse_args()
#parser$print_help()
#set some reasonable defaults for the options that are needed,
#but were not specified.
if( !file.exists(opt$outdir) ){
if( !dir.create(opt$outdir, showWarnings = FALSE, recursive = TRUE) ){
stop(paste("dir.create failed: outdir=",opt$outdir,sep=""))
}
}

require(ggplot2)
library(reshape2)
require(RColorBrewer)
library("stringr")



#-----------------------------------------------------------------
# reading data
#-----------------------------------------------------------------
# reading data
#注意跳过第一行:skip=1
df<-read.table(opt$input,sep="\t",skip=1,header = TRUE,check.names=FALSE,comment.char="@")
#处理第一列物种分类名字,没有物种名字的全部归为Other
spe.names<-str_split(df$`#OTU ID`,";",simplify=T)
spe.name<-str_replace(spe.names[,ncol(spe.names)],"^\\w__","")
spe.name[spe.name=="" | spe.name=="Other" ]<-"Unknown"
df$`#OTU ID`<-spe.name

mydf<-df[df$`#OTU ID`!="Unknown",]
if(nrow(mydf)==nrow(df)){}else{

unknown<-data.frame(c("Unknown",colSums(data.frame(df[df$`#OTU ID`=="Unknown",2:ncol(df)],check.names=F))),check.names=F)

rownames(unknown)[1]=colnames(df)[1]
df=rbind(mydf,t(unknown))
}
#排序
if(ncol(df) >2){
odf=df[order(rowSums(sapply(df[,2:round(ncol(df)),],as.double)),decreasing=T),]
}else{
odf=df[order(df[,2],decreasing=T),]
}
#合并计算Other,最多绘制15个物种的分类柱状图
if(nrow(df)>15){
xdf=odf[1:15,]
print(class(odf[16:nrow(odf),2:ncol(odf)]))
other=data.frame(c("Other",colSums(data.frame(sapply(odf[16:nrow(odf),2:ncol(odf)],as.double),check.names=F))),check.names=F)
#print(other)
rownames(other)[1]=colnames(df)[1]
#cat(other)
#print(xdf)
xdf=rbind(xdf,t(other))
}else{
xdf=df
}
myorder<-xdf[,1]
myorder<-setdiff(myorder,c("Other"))
#myorder
#绘图数据准备
mdf<- melt(xdf,id.vars=colnames(xdf)[1])
colnames(mdf)<-c("OTU","sample","value")
#指定柱状图中分类顺序
if ("Other" %in%mdf$OTU ){
mdf$OTU<-factor(mdf$OTU,levels=c("Other",rev(myorder)),ordered=T)
}else{
mdf$OTU<-factor(mdf$OTU,levels=c(rev(myorder)),ordered=T)
}
if(nlevels(as.factor(mdf$OTU))<=9){
mycol=c("grey",rev(brewer.pal(8,"Set1")[1:(nlevels(as.factor(mdf$OTU))-1)]))
}else{
mycol=c("grey",brewer.pal(7,"Set2"),rev(brewer.pal(8,"Set1")),brewer.pal(8,"Set3"),brewer.pal(12,"Paired"),brewer.pal(8,"Dark2"),brewer.pal(7,"Accent"))
}

#按照分组进行绘图,方便输出
if(!is.null(opt$fastmap) & !is.null(opt$group)){
fastmap<-read.table(opt$fastmap,check.names =F,comment.char ="",sep = "\t",header = T,row.names = 1)
fastmap<-fastmap[unique(mdf$sample),]
for (g in opt$group){
order_index=order(fastmap[,g],rownames(fastmap))
sam_order=rownames(fastmap)[order_index]

mdf$sample=factor(mdf$sample,levels=sam_order,order=T)

p <- ggplot(mdf, aes(sample,as.double(value)))+
geom_bar(aes(fill =OTU) ,stat = "identity")+
scale_fill_manual(values=mycol,name=opt$title)+
labs(y=opt$ylab,x=opt$xlab,fill = opt$title)+
coord_cartesian(ylim = c(0, 1),expand = FALSE)
p<- p +theme_classic()+ theme(  
#panel.grid=element_blank(), 
axis.text.x=element_text(colour="black",angle=45, hjust=1),axis.text.y=element_text(colour="black"))+
theme(legend.key = element_blank())
pdf(file=paste(opt$outdir,"/",opt$name,"_",g,"_ordered.pdf",sep=""), height=opt$height, width=opt$width)
print(p)
dev.off()
png(filename=paste(opt$outdir,"/",opt$name,"_",g,"_ordered.png",sep=""), height=opt$height*300, width=opt$width*300, res=300, units="px")
print(p)
dev.off()
}

}else{
p <- ggplot(mdf, aes(sample,as.double(value)))+
geom_bar(aes(fill =OTU) ,stat = "identity")+
scale_fill_manual(values=mycol,name=opt$title)+
labs(y=opt$ylab,x=opt$xlab,fill = opt$title)+
coord_cartesian(ylim = c(0, 1),expand = FALSE)
p<- p +theme_classic()+ theme(  
#panel.grid=element_blank(), 
axis.text.x=element_text(colour="black",angle=45, hjust=1),axis.text.y=element_text(colour="black"))+
theme(legend.key = element_blank())
pdf(file=paste(opt$outdir,"/",opt$name,".pdf",sep=""), height=opt$height, width=opt$width)
print(p)
dev.off()
png(filename=paste(opt$outdir,"/",opt$name,".png",sep=""), height=opt$height*300, width=opt$width*300, res=300, units="px")
print(p)
dev.off()
}





##绘图数据准备开始绘图
#p <- ggplot(mdf, aes(sample,as.double(value)))+
#geom_bar(aes(fill =OTU) ,stat = "identity")+
#scale_fill_manual(values=mycol,name=opt$title)+
#labs(y=opt$ylab,x=opt$xlab,fill = opt$title)+
#coord_cartesian(ylim = c(0, 1),expand = FALSE)
#p<- p +theme_classic()+ theme(  
##panel.grid=element_blank(), 
#axis.text.x=element_text(colour="black",angle=45, hjust=1),axis.text.y=element_text(colour="black"))+
#theme(legend.key = element_blank())
#
#
#pdf(file=paste(opt$outdir,"/",opt$name,".pdf",sep=""), height=opt$height, width=opt$width)
#print(p)
#dev.off()
#png(filename=paste(opt$outdir,"/",opt$name,".png",sep=""), height=opt$height*300, width=opt$width*300, res=300, units="px")
#print(p)
#dev.off()






  • 发表于 2022-07-27 11:11
  • 阅读 ( 1722 )
  • 分类:宏基因组

0 条评论

请先 登录 后评论
omicsgene
omicsgene

生物信息

700 篇文章

作家榜 »

  1. omicsgene 700 文章
  2. 安生水 348 文章
  3. Daitoue 167 文章
  4. 生物女学霸 120 文章
  5. xun 82 文章
  6. 红橙子 78 文章
  7. rzx 75 文章
  8. CORNERSTONE 72 文章