【DoubletFinder】predicts doublets in single-cell RNA sequencing data
remotes::install_github('chris-mcginnis-ucsf/DoubletFinder')
DF共分成四步:


需要输入的参数:
(1)seu ~ fully processed seurat object (i.e., after NormalizeData, FindVariableGenes, ScaleData, RunPCA, and RunTSNE have all been run)
(2)PCs ~ The number of statistically-significant principal components, specified as a range (e.g., PCs = 1:10)
(3)pN ~ This defines the number of generated artificial doublets, expressed as a proportion of the merged real-artificial data. Default is set to 25%
(4)pK ~ This defines the PC neighborhood size used to compute pANN, expressed as a proportion of the merged real-artificial data. No default is set, as pK should be adjusted for each scRNA-seq dataset. Optimal pK values should be estimated using the strategy described below.
(5)nExp ~ This defines the pANN threshold used to make final doublet/singlet predictions. This value can best be estimated from cell loading densities into the 10X/Drop-Seq device, and adjusted according to the estimated proportion of homotypic doublets.
- INPUT
注意不要用于已经进行多样本整合的scRNA data上,单独做。一个sample拆出来的多个data同样可以用。
二是应当除去低质量cell cluster,认真走seurat质控流程就可以。
library(DoubletFinder)
library(tidyverse)
library(Seurat)
library(patchwork)
rm(list=ls())
dir.create("DoulbletFinder")
setwd("DoulbletFinder")
## 下载数据并预处理
pbmc <- Read10X_h5("pbmc.h5")
pbmc <- CreateSeuratObject(pbmc, min.cells = 3, min.features = 200)
pbmc <- SCTransform(pbmc)
pbmc <- RunPCA(pbmc, verbose = F)
ElbowPlot(pbmc)
pc.num=1:15
pbmc <- RunUMAP(pbmc, dims=pc.num)
pbmc <- FindNeighbors(pbmc, dims = pc.num) %>% FindClusters(resolution = 0.3)
## 寻找最优pK值
sweep.res.list <- paramSweep_v3(pbmc, PCs = pc.num, sct = T)
sweep.stats <- summarizeSweep(sweep.res.list, GT = FALSE)
bcmvn <- find.pK(sweep.stats)
pK_bcmvn <- bcmvn$pK[which.max(bcmvn$BCmetric)] %>% as.character() %>% as.numeric()
## 排除不能检出的同源doublets,优化期望的doublets数量
#这里需要查表或者根据泊松分布计算
DoubletRate = ncol(pbmc)*8*1e-6 #更通用
DoubletRate = 0.039
homotypic.prop <- modelHomotypic(pbmc$seurat_clusters) # 最好提供celltype
nExp_poi <- round(DoubletRate*ncol(pbmc))
nExp_poi.adj <- round(nExp_poi*(1-homotypic.prop))
## 使用确定好的参数鉴定doublets
pbmc <- doubletFinder_v3(pbmc, PCs = pc.num, pN = 0.25, pK = pK_bcmvn,
nExp = nExp_poi.adj, reuse.pANN = F, sct = T)
## 结果展示,分类结果在pbmc@meta.data中
DimPlot(pbmc2, reduction = "umap", group.by = "DF.classifications_0.25_0.3_171")
keloid@meta.data[,"DF_hi.lo"] <- keloid@meta.data$DF.classifications_0.25_0.01_222
keloid@meta.data$DF_hi.lo[which(keloid@meta.data$DF_hi.lo == "Doublet" & keloid@meta.data$DF.classifications_0.25_0.01_198 == "Singlet")] <- "Doublet-Low Confidience"
keloid@meta.data$DF_hi.lo[which(keloid@meta.data$DF_hi.lo == "Doublet")] <- "Doublet-High Confidience"
table(keloid@meta.data$DF_hi.lo)
# Doublet-High Confidience Doublet-Low Confidience Singlet
# 198 24 5041
DimPlot(keloid, reduction = "umap", group.by ="DF_hi.lo",cols=c("black","red","gold"))


