Python生物信息学数据管理——第7、8章课后习题
发布时间
阅读量:
阅读量
#新手参考
#添加一列平均消光值
table = [
['protein', 'ext1', 'ext2', 'ext3'],
[0.16, 0.038, 0.044, 0.040],
[0.33, 0.089, 0.095, 0.091],
[0.66, 0.184, 0.191, 0.191],
[1.00, 0.280, 0.292, 0.283],
[1.32, 0.365, 0.367, 0.365],
[1.66, 0.441, 0.443, 0.444]
]
title=table[0]
table = table[1:]
title.append('average_ext')
protein, ext1, ext2, ext3 = zip(*table)
average_ext=[round(sum(tup)/3,2) for tup in zip(ext1,ext2,ext3)]
q=tuple(average_ext)
opq=zip(protein,ext1,ext2,ext3,q)
table=[]
table.append(title)
for line in opq:
table.append(list(line))
for line in table:
for item in line:
print(item,end='\t')
print('')
7.2将7.2.2的示例代码中出现的表转化为一个嵌套字典的列表
table = [
['protein', 'ext1', 'ext2', 'ext3'],
[0.16, 0.038, 0.044, 0.040],
[0.33, 0.089, 0.095, 0.091],
[0.66, 0.184, 0.191, 0.191],
[1.00, 0.280, 0.292, 0.283],
[1.32, 0.365, 0.367, 0.365],
[1.66, 0.441, 0.443, 0.444]
]
ls=[]
key=table[0]
for row in table[1:]:
entry={key[0]:row[0],key[1]:row[1],key[2]:row[2],key[3]:row[3]}
ls.append(entry)
print(ls)
7.3从文本文件读取矩阵
table = []
for line in open('similarity.txt'):
table.append(line.strip().split('\t'))
table[0].insert(0,'')
for line in table:
for item in line:
print(item,end='\t')
print('')
7.4RNA序列的相似性
table = []
for line in open('similarity.txt'):
table.append(line.strip().split('\t'))
table[0].insert(0,'')
seq1='AGCAUCUA'
seq2='ACCGUUCU'
similarity=0
for base1,base2 in zip(seq1,seq2):
num1= 'AGCU'.find(base1)
num2= 'AGCU'.find(base2)
similarity += eval(table[num1+1][num2+1])
print(similarity)
7.5选择性打印表的列和行
table = [
['protein', 'ext1', 'ext2', 'ext3'],
[0.16, 0.038, 0.044, 0.040],
[0.33, 0.089, 0.095, 0.091],
[0.66, 0.184, 0.191, 0.191],
[1.00, 0.280, 0.292, 0.283],
[1.32, 0.365, 0.367, 0.365],
[1.66, 0.441, 0.443, 0.444]
]
ls=[]
key=table[0]
for row in table[1:]:
entry={key[0]:row[0],key[1]:row[1],key[2]:row[2],key[3]:row[3]}
ls.append(entry)
#对嵌套列表进行处理
print(table[1])
for line in table:
print(line[0],end=',')
print('\n')
#对嵌套字典进行处理
print(ls[1])
for line in ls:
print(line['protein'],end=',')
8.1按第二列对表排序
table = [
['protein', 'ext1', 'ext2', 'ext3'],
[0.16, 0.038, 0.044, 0.040],
[0.66, 0.184, 0.191, 0.191],
[0.33, 0.089, 0.095, 0.091],
[1.32, 0.365, 0.367, 0.365],
[1.00, 0.280, 0.292, 0.283],
[1.66, 0.441, 0.443, 0.444]
]
table1=table[1:]
table_sorted=sorted(table1,key=lambda x:x[1])
fo=open('table_sorted','w')
fo.write(','.join(table[0])+'\n')
for line in table_sorted[0:2]:
line=[str(x) for x in line]
fo.write(','.join(line)+'\n')
fo.close()
8.2按序列长度排序
fo=open('SwissProt.fasta','r')
fi=open('result.fasta','w')
seq=''
ls=[]
for line in fo:
if line[0]=='>' and seq=='':
header=line
elif line[0]!='>':
seq+=line.strip()
else:
ls.append([header,seq,len(seq)])
seq=''
header = line
ls.append([header,seq,len(seq)])
ls=sorted(ls,key=lambda x:x[2])
for item in ls:
fi.write(item[0])
for i in range(len(item[1])//60):
fi.write(item[1][60*(i+1)-60:60*(i+1)]+'\n')
fi.write(item[1][60*(i+1):]+'\n')
fi.close()
fo.close()
8.3Excel文件中的排序
from operator import itemgetter
fo=open('PDB.txt','r')
fi=open('PDB_sorted.txt','w')
ls=[]
for line in fo:
line=line.replace('"','')
ls0=line.strip().split(',')
ls.append(ls0)
ls_1=ls[0]
ls_2=ls[1:]
for line in ls_2:
line[-1]=eval(line[-1])
line[-2] = eval(line[-2])
ls_sorted=sorted(ls_2,key=itemgetter(4,3,2,1,0))
fi.write(','.join(ls_1)+'\n')
for line in ls_sorted:
line=[str(x) for x in line]
fi.write(','.join(line)+'\n')
fo.close()
fi.close()
8.4按字母顺序对FASTA序列记录排序
fo=open('SwissProt.fasta','r')
fi=open('result.fasta','w')
seq=''
ls=[]
for line in fo:
if line[0]=='>' and seq=='':
header=line
AC=line.split('|')[1]
elif line[0]!='>':
seq+=line
else:
ls.append([AC,header,seq])
seq=''
header = line
AC = line.split('|')[1]
ls.append([AC,header,seq])
ls=sorted(ls,key=lambda x:x[0])
for item in ls:
fi.write(item[1])
fi.write(item[2])
fi.close()
fo.close()
8.5按升序根据e-value对BLAST输出排序
from operator import itemgetter
input_file = open("BlastOut.csv")
output_file = open("BlastOutSorted.csv", "w")
table = []
for line in input_file:
col = line.split(',')
col[-2] = float(col[-2])
table.append(col)
table_sorted = sorted(table, key=itemgetter(-2))
for row in table_sorted:
row = [str(x) for x in row]
output_file.write("\t".join(row) + '\n')
input_file.close()
output_file.close()
全部评论 (0)
还没有任何评论哟~
