Advertisement

【DoubletFinder】predicts doublets in single-cell RNA sequencing data

阅读量:
复制代码
    remotes::install_github('chris-mcginnis-ucsf/DoubletFinder')
    

DF共分成四步:

需要输入的参数:

(1)seu ~ fully processed seurat object (i.e., after NormalizeData, FindVariableGenes, ScaleData, RunPCA, and RunTSNE have all been run)

(2)PCs ~ The number of statistically-significant principal components, specified as a range (e.g., PCs = 1:10)

(3)pN ~ This defines the number of generated artificial doublets, expressed as a proportion of the merged real-artificial data. Default is set to 25%

(4)pK ~ This defines the PC neighborhood size used to compute pANN, expressed as a proportion of the merged real-artificial data. No default is set, as pK should be adjusted for each scRNA-seq dataset. Optimal pK values should be estimated using the strategy described below.

(5)nExp ~ This defines the pANN threshold used to make final doublet/singlet predictions. This value can best be estimated from cell loading densities into the 10X/Drop-Seq device, and adjusted according to the estimated proportion of homotypic doublets.

  • INPUT

注意不要用于已经进行多样本整合的scRNA data上,单独做。一个sample拆出来的多个data同样可以用。

二是应当除去低质量cell cluster,认真走seurat质控流程就可以。

复制代码
 library(DoubletFinder)

    
 library(tidyverse)
    
 library(Seurat)
    
 library(patchwork)
    
 rm(list=ls())
    
  
    
 dir.create("DoulbletFinder")
    
 setwd("DoulbletFinder")
    
  
    
 ## 下载数据并预处理
    
 pbmc <- Read10X_h5("pbmc.h5")
    
 pbmc <- CreateSeuratObject(pbmc, min.cells = 3, min.features = 200)
    
 pbmc <- SCTransform(pbmc)
    
 pbmc <- RunPCA(pbmc, verbose = F)
    
 ElbowPlot(pbmc)
    
 pc.num=1:15
    
 pbmc <- RunUMAP(pbmc, dims=pc.num)
    
 pbmc <- FindNeighbors(pbmc, dims = pc.num) %>% FindClusters(resolution = 0.3)
    
  
    
 ## 寻找最优pK值
    
 sweep.res.list <- paramSweep_v3(pbmc, PCs = pc.num, sct = T)
    
 sweep.stats <- summarizeSweep(sweep.res.list, GT = FALSE)  
    
 bcmvn <- find.pK(sweep.stats)
    
 pK_bcmvn <- bcmvn$pK[which.max(bcmvn$BCmetric)] %>% as.character() %>% as.numeric()
    
  
    
 ## 排除不能检出的同源doublets,优化期望的doublets数量
    
 #这里需要查表或者根据泊松分布计算
    
 DoubletRate = ncol(pbmc)*8*1e-6 #更通用
    
 DoubletRate = 0.039                     
    
 homotypic.prop <- modelHomotypic(pbmc$seurat_clusters)   # 最好提供celltype
    
 nExp_poi <- round(DoubletRate*ncol(pbmc)) 
    
 nExp_poi.adj <- round(nExp_poi*(1-homotypic.prop))
    
  
    
 ## 使用确定好的参数鉴定doublets
    
 pbmc <- doubletFinder_v3(pbmc, PCs = pc.num, pN = 0.25, pK = pK_bcmvn, 
    
                      nExp = nExp_poi.adj, reuse.pANN = F, sct = T)
    
                      
    
 ## 结果展示,分类结果在pbmc@meta.data中
    
 DimPlot(pbmc2, reduction = "umap", group.by = "DF.classifications_0.25_0.3_171")
    
    
    
    
复制代码
 keloid@meta.data[,"DF_hi.lo"] <- keloid@meta.data$DF.classifications_0.25_0.01_222

    
  
    
 keloid@meta.data$DF_hi.lo[which(keloid@meta.data$DF_hi.lo == "Doublet" & keloid@meta.data$DF.classifications_0.25_0.01_198 == "Singlet")] <- "Doublet-Low Confidience"
    
  
    
 keloid@meta.data$DF_hi.lo[which(keloid@meta.data$DF_hi.lo == "Doublet")] <- "Doublet-High Confidience"
    
  
    
 table(keloid@meta.data$DF_hi.lo)
    
 # Doublet-High Confidience  Doublet-Low Confidience                  Singlet 
    
 # 198                       24                     5041 
    
  
    
 DimPlot(keloid, reduction = "umap", group.by ="DF_hi.lo",cols=c("black","red","gold"))
    
    
    
    


全部评论 (0)

还没有任何评论哟~