R.version.string
[1] "R version 4.0.2 (2020-06-22)"
install.packages("adegent",dep=TRUE)
packageDescription("adegenet", fields = "Version")
[1] "2.1.3"
dfpath<-system.file("files/usflu.fasta",package="adegenet")
dfpath
library(adegenet)
flu<-fasta2genlight(dfpath,chunkSize = 10,parallel = F)
library(ggplot2)
snpposi.plot(position(flu),genome.size = 1700,codon = F)
+ theme_bw()
snpposi.plot(position(flu),genome.size = 1700,codon = T)
+ theme_bw()
snpposi.test(position(flu),genome.size = 1700)
1、计算pca特征向量
df.pca<-glPca(flu,nf=3)
df.pca.scores<-as.data.frame(df.pca$scores)
df.pca.scores
2、计算方差解释率
#计算标准差
sdev<- apply(df.pca.scores,2,sd)
sdev
#计算方差解释率
jsl<- sdev^2/sum(sdev^2)*100
jsl
PC1 PC2 PC3 67.789393 23.930861 8.279746
df.pca.scores$population<-ifelse(df.pca.scores$PC1>0,"pop1",
ifelse(df.pca.scores$PC2>1,"pop2","pop3"))
若使用自己的数据,则输入准确的分组信息
a <- read.table(file="pop.txt") #1列,对应每条序列的分组信息
df.pca.scores$population <- factor(a$V1, levels=c("group1", "group2","group3",
"group4","group5","group6","group7",
"group8","group9"), ordered = FALSE)
4、主成分分析绘图
library(ggplot2)
ggplot()+
geom_point(data=df.pca.scores,
size=2,
aes(x=PC1,y=PC2,
color=population))+
theme_bw()+
stat_ellipse(data=df.pca.scores,
aes(x=PC1,y=PC2,fill=population),
geom = "polygon",alpha=0.2,lty="dashed",color="black")
如果觉得我的文章对您有用,请随意打赏。你的支持将鼓励我继续创作!