Advertisement

Python生物信息学数据管理——第7、8章课后习题

阅读量:

#新手参考

复制代码
    #添加一列平均消光值
    table = [
    ['protein', 'ext1', 'ext2', 'ext3'],
    [0.16, 0.038, 0.044, 0.040],
    [0.33, 0.089, 0.095, 0.091],
    [0.66, 0.184, 0.191, 0.191],
    [1.00, 0.280, 0.292, 0.283],
    [1.32, 0.365, 0.367, 0.365],
    [1.66, 0.441, 0.443, 0.444]
    ]
    title=table[0]
    table = table[1:]
    title.append('average_ext')
    protein, ext1, ext2, ext3 = zip(*table)
    average_ext=[round(sum(tup)/3,2)  for tup in zip(ext1,ext2,ext3)]
    q=tuple(average_ext)
    opq=zip(protein,ext1,ext2,ext3,q)
    table=[]
    table.append(title)
    for line in opq:
    table.append(list(line))
    for line in table:
    for item in line:
        print(item,end='\t')
    print('')

7.2将7.2.2的示例代码中出现的表转化为一个嵌套字典的列表

复制代码
    table = [
    ['protein', 'ext1', 'ext2', 'ext3'],
    [0.16, 0.038, 0.044, 0.040],
    [0.33, 0.089, 0.095, 0.091],
    [0.66, 0.184, 0.191, 0.191],
    [1.00, 0.280, 0.292, 0.283],
    [1.32, 0.365, 0.367, 0.365],
    [1.66, 0.441, 0.443, 0.444]
    ]
    ls=[]
    key=table[0]
    for row in table[1:]:
    entry={key[0]:row[0],key[1]:row[1],key[2]:row[2],key[3]:row[3]}
    ls.append(entry)
    print(ls)

7.3从文本文件读取矩阵

复制代码
    table = []
    for line in open('similarity.txt'):
    table.append(line.strip().split('\t'))
    table[0].insert(0,'')
    for line in table:
    for item in line:
        print(item,end='\t')
    print('')

7.4RNA序列的相似性

复制代码
    table = []
    for line in open('similarity.txt'):
    table.append(line.strip().split('\t'))
    table[0].insert(0,'')
    seq1='AGCAUCUA'
    seq2='ACCGUUCU'
    similarity=0
    for base1,base2 in zip(seq1,seq2):
    num1= 'AGCU'.find(base1)
    num2= 'AGCU'.find(base2)
    similarity += eval(table[num1+1][num2+1])
    print(similarity)

7.5选择性打印表的列和行

复制代码
    table = [
    ['protein', 'ext1', 'ext2', 'ext3'],
    [0.16, 0.038, 0.044, 0.040],
    [0.33, 0.089, 0.095, 0.091],
    [0.66, 0.184, 0.191, 0.191],
    [1.00, 0.280, 0.292, 0.283],
    [1.32, 0.365, 0.367, 0.365],
    [1.66, 0.441, 0.443, 0.444]
    ]
    ls=[]
    key=table[0]
    for row in table[1:]:
    entry={key[0]:row[0],key[1]:row[1],key[2]:row[2],key[3]:row[3]}
    ls.append(entry)
       
    #对嵌套列表进行处理
    print(table[1])
    for line in table:
    print(line[0],end=',')
    print('\n')
    #对嵌套字典进行处理
    print(ls[1])
    for line in ls:
    print(line['protein'],end=',')

8.1按第二列对表排序

复制代码
    table = [
    ['protein', 'ext1', 'ext2', 'ext3'],
    [0.16, 0.038, 0.044, 0.040],
    [0.66, 0.184, 0.191, 0.191],
    [0.33, 0.089, 0.095, 0.091],
    [1.32, 0.365, 0.367, 0.365],
    [1.00, 0.280, 0.292, 0.283],
    [1.66, 0.441, 0.443, 0.444]
    ]
    table1=table[1:]
    table_sorted=sorted(table1,key=lambda x:x[1])
    fo=open('table_sorted','w')
    fo.write(','.join(table[0])+'\n')
    for line in table_sorted[0:2]:
    line=[str(x) for x in line]
    fo.write(','.join(line)+'\n')
    fo.close()

8.2按序列长度排序

复制代码
    fo=open('SwissProt.fasta','r')
    fi=open('result.fasta','w')
    seq=''
    ls=[]
    for line in fo:
    if line[0]=='>' and seq=='':
        header=line
    elif line[0]!='>':
        seq+=line.strip()
    else:
        ls.append([header,seq,len(seq)])
        seq=''
        header = line
    ls.append([header,seq,len(seq)])
    ls=sorted(ls,key=lambda x:x[2])
    for item in ls:
    fi.write(item[0])
    for i in range(len(item[1])//60):
        fi.write(item[1][60*(i+1)-60:60*(i+1)]+'\n')
    fi.write(item[1][60*(i+1):]+'\n')
    fi.close()
    fo.close()

8.3Excel文件中的排序

复制代码
    from operator import itemgetter
    fo=open('PDB.txt','r')
    fi=open('PDB_sorted.txt','w')
    ls=[]
    for line in fo:
    line=line.replace('"','')
    ls0=line.strip().split(',')
    ls.append(ls0)
    ls_1=ls[0]
    ls_2=ls[1:]
    for line in ls_2:
    line[-1]=eval(line[-1])
    line[-2] = eval(line[-2])
    ls_sorted=sorted(ls_2,key=itemgetter(4,3,2,1,0))
    fi.write(','.join(ls_1)+'\n')
    for line in ls_sorted:
    line=[str(x) for x in line]
    fi.write(','.join(line)+'\n')
    fo.close()
    fi.close()

8.4按字母顺序对FASTA序列记录排序

复制代码
    fo=open('SwissProt.fasta','r')
    fi=open('result.fasta','w')
    seq=''
    ls=[]
    for line in fo:
    if line[0]=='>' and seq=='':
        header=line
        AC=line.split('|')[1]
    elif line[0]!='>':
        seq+=line
    else:
        ls.append([AC,header,seq])
        seq=''
        header = line
        AC = line.split('|')[1]
    ls.append([AC,header,seq])
    ls=sorted(ls,key=lambda x:x[0])
    for item in ls:
    fi.write(item[1])
    fi.write(item[2])
    fi.close()
    fo.close()

8.5按升序根据e-value对BLAST输出排序

复制代码
    from operator import itemgetter
    input_file = open("BlastOut.csv")
    output_file = open("BlastOutSorted.csv", "w")
    table = []
    for line in input_file:
    col = line.split(',')
    col[-2] = float(col[-2])
    table.append(col)
    table_sorted = sorted(table, key=itemgetter(-2))
    for row in table_sorted:
    row = [str(x) for x in row]
    output_file.write("\t".join(row) + '\n')
    input_file.close()
    output_file.close()

全部评论 (0)

还没有任何评论哟~