Advertisement

ChIP-Seq分析流程

阅读量:

1.FastqC质控

复制代码
 fastqc *fq.gz -o raw -t 4

    
 multiqc /. 20230204_fastqc.zip

2.Trim_galore去接头(×)

复制代码
 paste <(ls *1.fq.gz) <(ls *2.fq.gz) >config

    
 cat config |while read id;do arr=$id;fq1=${arr[0]};fq2=${arr[1]};nohup trim_galore --gzip --paired $fq1 $fq2 -o ./raw/trim.log & done

2.Fastp(√)

复制代码
 paste <(ls *1.clean.fq.gz) <(ls *2.clean.fq.gz) >config

    
  
    
 cat config |while read id;
    
 do arr=($id);fq1=${arr[0]};fq2=${arr[1]};
    
 fastp -i $fq1 -I $fq2 -o ./fastp/$fq1 -O ./fastp/$fq2 \
    
 -f 15 \ # 双端测序修剪1端上游15bp
    
 -F 15 \ # 修剪2端上游15bp
    
 -q 20 \ # reads最低质量,一般在20以上即可
    
 -l 75 \ # 修剪后reads长度在75bp以上
    
 -u 20 \ # 限制低质量碱基的百分比,默认40,表示40%
    
 -w 20 # 线程数
    
 done
    
  
    
 # 再次fastqc质控
    
 fastqc -t 20 -o fastp/fastqc fastp/*.gz
    
 multiqc -o fastqc ./fastp

3.Bowtie2比对

复制代码
 bowtie2_mm10_idx=/public/ojsys/management/wbhou/reference/mouse/genome/mm10/mm10-bowtie2

    
  
    
 paste <(ls *.clean.fq.gz | cut -d "_" -f 1 | sort | uniq) <(ls *1.clean.fq.gz) <(ls *2.clean.fq.gz) >config
    
  
    
 cat config | while read id;do arr=($id);sample=${arr[0]};fq1=${arr[1]};fq2=${arr[2]};
    
 bowtie2 -p 100 -1 $fq1 -2 $fq2 -x bowtie2_mm10_idx -S ../bowtie2/sam/$sample.sam

4. sam转bam,排序 ---Samtools

复制代码
 samtools view -@ 100 -bhS ../bowtie2/sam/$sample.sam -o ../bowtie2/bam/$sample.bam

    
 samtools sort -@ 100 -o ../bowtie2/bam/$sample_sorted.bam ../bowtie2/bam/$sample.bam
    
 done

5.Picard去重

复制代码
 picard MarkDuplicates \

    
 VALIDATION_STRINGENCY=SILENT \
    
 REMOVE_DUPLICATES=true \ # 不将重复写入输出文件,而设置适当的标志来写入它们,默认值为false
    
 READ_NAME_REGEX=null \ # 正则表达式,可用于解析传入的SAM文件中的读名称。读取的名称被解析为提取三个变量:tile/region、x坐标和y坐标。这些值用于估计光学复制率,以便给出更准确的库大小估计。将此选项设置为null可禁用光学重复检测
    
 ASSUME_SORTED=true \ # 如果为true,假设输入文件按坐标排序
    
 I=../bowtie2/bam/$sample_sorted.bam \ # input
    
 O=../bowtie2/bam/$sample_sorted_rmdup.bam \ # output
    
 M=../bowtie2/bam/$sample_dup_metrics.txt # 相当于log文件
Tips:如需过滤二次比对得到unique map reads可用下列代码
复制代码
    samtools view -bhS -@ 8 -F 1796 -q 20 ../bowtie2/bam/$sample_sorted_rmdup.bam | samtools sort -@ 12 > ../bowtie2/bam/$sample_sort.bam

6.为bam文件建立索引

复制代码
 samtools index -@ 100 ../bowtie2/bam/$sample_sorted_rmdup.bam

    
 samtools index -@ 100 ../bowtie2/bam/$sample_sort.bam

7.相关性分析(bam文件),根据相关性分析结果可选择是否merge

复制代码
 multiBamSummary bins --bamfiles ./*sort.bam --binSize 5000 --numberOfProcessors 100 --outRawCounts ./correlation/results.txt -o ./correlation/results.npz

    
 plotCorrelation -in ./correlation/results.npz --corMethod spearman --skipZeros --plotTitle "Sperman Correlation of Read Counts" --whatToPlot heatmap --colorMap RdYlBu --plotNumbers -o ./correlation/heatmap_SpearmanCorr.pdf --outFileCorMatrix ./correlation/SpearmanCorr_readCounts.tab
Tips:merge相同处理组不同重复的bam文件
复制代码
    samtools merge ./bw-merge/CUT-MI-1000-1-2.bam CUT-MI-1000-1_sorted_rmdup.bam CUT-MI-1000-1_sorted_rmdup.bam

7. bam转bw---deeptols

复制代码
 paste <(ls *rmdup.bam | cut -d "_" -f1 | sort | uniq) >config
    
 cat config | while read id;
    
 do
    
 bamCoverage --bam ${id}_sorted_rmdup.bam -o ./bw/${id}.bw --binSize 50 --normalizeUsing RPKM  --effectiveGenomeSize 2652783500
    
 done

8. 根据mm10.bed文件绘制基因上H4K16ac信号的富集图

复制代码
 computeMatrix reference-point -S CUT-GV-1000-100.bw CUT-GV-1000-10.bw CUT-GV-500-100.bw CUT-GV-500-10.bw CUT-GV-Ctrl-1.bw -R mm10.bed --referencePoint TSS -a 10000 -b 10000 --numberOfProcessors 100 -out ./signal2/GV-H4K16ac.gz

    
 # plot:
    
 plotProfile -m ./signal2/GV-H4K16ac.gz --perGroup -out ./signal2/GV-H4K16ac.pdf --numPlotsPerRow 8 --plotTitle GV-H4K16ac
    
 # heatmap+plot
    
 plotHeatmap -m upMI-H4K16ac-merge.gz -out heatmap-MI-UP-K16-merge.svg --heatmapHeight 12 --refPointLabel TSS  --zMin 1 --zMax 4 --colorMap OrRd --dpi 750 --regionsLabel genes --plotTitle MI-UP-Genes-H4K16ac-merge
Tips:mm10.bed是所有基因list,DESeq得到差异基因后可根据基因名获得感兴趣基因的bed文件
复制代码
 cat up_MIgene.txt | while read line;
    
 do
    
 grep $line mm10.bed >> up-GV.bed
    
 done

9. Callpeaks---Macs2

复制代码
    macs2 callpeak -t H3K4me3-rep1-sorted_rmp.bam  -c H3K4me3-input-sorted_rmp.bam -f BAM -g mm --outdir ./callpeaks/ -n H3K4me3-1.bam -B -q 0.01
Tips:合并在一起操作
复制代码
 samtools merge chip.merge.bam morula-H3K4me3-rep1-sorted_rmp.bam morula-H3K4me3-rep2-sorted_rmp.bam

    
 macs2 callpeak -t chip.merge.bam -c morula-input-sorted_rmp.bam -f BAM -g mm --outdir ./ -n morula-H3K4me3 -B -q 0.01

9.Chipseeker注释

复制代码
 BiocManager::install("ChIPseeker")

    
 library(CHIPseeker)
    
 BiocManager::install("ChIPseeker")
    
 BiocManager::install('TxDb.Hsapiens.UCSC.hg19.knownGene', ask=F,suppressUpdates=T)
    
 BiocManager::install('TxDb.Mmusculus.UCSC.mm10.knownGene', ask=F,suppressUpdates=T)
    
 library(TxDb.Mmusculus.UCSC.mm10.knownGene)
    
 H3K4me3_1_peak <- readPeakFile("D:/Rstudio/fenxishuju/ChIP-seq/2023_7_15/macs2_callpeak/morula-H3K4me3-1_summits.bed")
    
 require(TxDb.Mmusculus.UCSC.mm10.knownGene)
    
 txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene
    
 H3K4me3_1_peakAnno <- annotatePeak(H3K4me3_1_peak, tssRegion = c(-1000, 1000), TxDb = txdb)
    
 setwd("D:/Rstudio/fenxishuju/ChIP-seq/2023_7_15/peakanno/")
    
 write.table(H3K4me3_1_peakAnno, file = "H3K4me3_1_peak.txt",sep = '\t', quote = FALSE, row.names = FALSE)
    
 plotAnnoPie(H3K4me3_1_peakAnno)

全部评论 (0)

还没有任何评论哟~