Advertisement

GSEA基因基富集分析

阅读量:

###GSEA富集分析中,不需要提取差异基因,只需要将所有基因的表达情况按照一定顺序排列(一般按log2FD)之后根据对照组和实验组中所有基因在红色(蓝色)富集,从而得出对照组或者实验组所富集到的通路。因此,GSEA分析可以很有效的避免基因过滤从而导致一些基因被筛除掉。

复制代码
    library(DO.db)
    require(DOSE)
    library(clusterProfiler)
    library(AnnotationHub)
    library(readr)
    library(pheatmap)
    library(tidyverse)
    library(DESeq2)
    library(ggplot2)
    library(export)
    library(enrichplot)
    library(Rgraphviz)
    library(org.Hs.eg.db)
复制代码
    #all_entrez.csv的第一列是entrezid,第二列是FoldChange的值。
    
    DEG <- read.csv(file.choose(),sep = "\t",header = TRUE)
    fix(DEG)
    DEG1 <- DEG$X  #第一列向量化
    DEG.gene_symbol = as.character(DEG1)#选择基于列表并且将其向量化

#进行基因名称转换匹配

复制代码
    DEG.entrize_id <- mapIds(x= org.Hs.eg.db,
                         keys = DEG.gene_symbol,
                         keytype = "SYMBOL",
                         column = "ENTREZID")
    luffy <- data.frame(DEG.entrize_id)
    fix(luffy)
    luffy$lgfc = " "
    luffy$lgfc = DEG$log2FoldChange
    write.csv(DEG.entrize_id,file = "GESA-GO_CC.csv")

#去除未匹配到的NA值

复制代码
    DEG.enter_id <- na.omit(DEG.entrize_id)

#gse需要单独做数据格式

复制代码
    geneList <- luffy[,2]
    names(geneList)=as.factor(luffy[,1])
    geneList <- sort(geneList,decreasing = TRUE)
    fix(geneList)

#gseGO进行GSEA分析

复制代码
    ###gseBP <- gseGO(geneList=geneList,ont="BP",OrgDb=maize,keyType = 'ENTREZID',nPerm = 50000,minGSSize = 100,maxGSSize = 6000,pvalueCutoff = 0.05,verbose = FALSE)

############# GSEA CC 模式 start

复制代码
    ego3 <- gseGO(geneList = geneList,OrgDb = org.Hs.eg.db,ont = "BP",nPerm = 1000,minGSSize = 100,maxGSSize = 1000,pvalueCutoff = 0.05,verbose = FALSE)
    write.csv(ego3,file = "GESA-GO_CC.csv")

#显示前4组信息

复制代码
    gseaplot2(ego3,geneSetID = 1:4, ES_geom = "dot",pvalue_table = TRUE)

############# GSEA BP 模式 start

复制代码
    ego2 <- gseGO(geneList = geneList,OrgDb = org.Hs.eg.db,ont = "CC",pvalueCutoff = 0.05,verbose = FALSE)
    write.csv(ego2,file = "GESA-GO_BP.csv")
    #ridgeline plot for expression distribution of GSEA result
    ridgeplot(ego2)
    out_img(filename = "ridgeplot_BP",pic_width = 12,pic_height = 12)

#只显示值最高的一组的信息

复制代码
    gseaplot2(ego2,geneSetID = 1:4, ES_geom = "dot",pvalue_table = TRUE)

############# GSEA MF 模式 start

复制代码
    ego4 <- gseGO(geneList = geneList,OrgDb = org.Hs.eg.db,ont = "MF",pvalueCutoff = 0.05,verbose = FALSE)
    write.csv(ego4,file = "GESA-GO_MF.csv")
    #ridgeline plot for expression distribution of GSEA result
    ridgeplot(ego4)
    out_img(filename = "ridgeplot_MF",pic_width = 12,pic_height = 12)

#显示前4组信息

复制代码
    gseaplot2(ego4,geneSetID = 1:4, ES_geom = "dot",pvalue_table = TRUE)

全部评论 (0)

还没有任何评论哟~