Advertisement

linux批量下载基因组、基因序列、基因组注释信息

阅读量:

ncbi虽然有提供专门的下载的方式,但是比较复杂,如果想知道怎么下载的,可以私聊我。

我这里所提供的是我改过的,可以更加方便,快捷;话不多说,批量下载开始!

需要先用conda安装一个 datasets软件,然后就可以使用咯

复制代码
 import argparse

    
 import zipfile
    
 import glob
    
 from pathlib import Path
    
 import subprocess
    
 import shutil
    
  
    
 #autor = Zhou.wangyi
    
 #email = 772967843@qq.com
    
  
    
 def download(inputfile,outputfile,include = 'gff3'):
    
  
    
     assison_set = set()
    
     inputfile_abs  = Path(inputfile).resolve()
    
     outputfile_abs = Path(outputfile).resolve()
    
  
    
     with open (inputfile_abs, 'r') as f:
    
     for line in f:
    
         line = line.strip()
    
         assison_set.add(line)
    
         subprocess.run(f'''
    
         datasets download  genome accession {line} --include {include} --filename {outputfile_abs}/{line}.zip
    
                        ''',shell=True)
    
  
    
     zip_list = glob.glob(f'{outputfile_abs}/*.zip')
    
  
    
     for zipfn in zip_list:
    
     prefix0=str(Path(zipfn).absolute().name)
    
     prefix=prefix0.replace('.zip','')
    
     with zipfile.ZipFile(f'{zipfn}','r')as zip_file:
    
         zip_file.extract(f'ncbi_dataset/data/{prefix}/genomic.gff',f'./{prefix}/')
    
         rawpath=f'./{prefix}/ncbi_dataset/data/{prefix}/genomic.gff'
    
         newpath=f'./{prefix}.gff'
    
         shutil.move(rawpath,newpath)
    
         shutil.rmtree(f"./{prefix}")
    
  
    
 def main():
    
     parser = argparse.ArgumentParser(description='Download genomic data from NCBI')
    
     parser.add_argument('-i', '--inputfile', type=str, required=True, help='Input file containing NCBI accession numbers')
    
     parser.add_argument('-o', '--outputfile', type=str, required=True, help='Output directory for downloaded data')
    
     parser.add_argument('-include', '--include', type=str, default='gff3', help='Type of data to download (gff3, fasta, etc.)')
    
     args = parser.parse_args()
    
     download(args.inputfile,args.outputfile,args.include)
    
  
    
 if __name__ == '__main__':
    
     main()
    
    
    
    

输入参数为:python3 down.py -i list -o .,-i参数为你想要下载的accession_number号,-o参数为输出路径;

全部评论 (0)

还没有任何评论哟~