9月28号Python生物信息学数据管理
发布时间
阅读量:
阅读量
第五章笔记
1,range(start, stop[, step]),不包括stop。
2,字典的形式{键:值},键为不可改变对象(数字,字符串,元祖)。值可改变。
定义字典的方法:a,直接定义: dict = {'b':'n','c':10}。 b,逐个分配:>>>dict = {} >>>dict['b'] = 'n'。
3,if 循环,第二个是elif,不是if。
4,列表转化为字符用''.join(list)。
字符转化为列表:


5,使用input()函数,后面将其做变量时,前面插入的字符不算字符数。
自测题
1,
codon_aa = {'UAA' : 'stop','UAG' : 'stop','UGA' : 'STOP','AUG' : 'Start','GGG' : 'Glycin'}
print(codon_aa['UAA'])
2,
codon_table = {
'GCU':'A', 'GCC':'A', 'GCA':'A', 'GCG':'A', 'CGU':'R', 'CGC':'R',
'CGA':'R', 'CGG':'R', 'AGA':'R', 'AGG':'R', 'UCU':'S', 'UCC':'S',
'UCA':'S', 'UCG':'S', 'AGU':'S', 'AGC':'S', 'AUU':'I', 'AUC':'I',
'AUA':'I', 'UUA':'L', 'CUU':'L', 'CUC':'L', 'CUA':'L',
'CUG':'L', 'GGU':'G', 'GGC':'G', 'GGA':'G', 'GGG':'G', 'GUU':'V',
'GUC':'V', 'GUA':'V', 'ACU':'T', 'ACC':'T', 'ACA':'T',
'ACG':'T', 'CCU':'P', 'CCC':'P', 'CCA':'P', 'CCG':'P', 'AAU':'N',
'AAC':'N', 'GAU':'D', 'GAC':'D', 'UGU':'C', 'UGC':'C', 'CAA':'Q',
'CAG':'Q', 'GAA':'E', 'GAG':'E', 'CAU':'H', 'CAC':'H', 'AAA':'K',
'AAG':'K', 'UUU':'F', 'UUC':'F', 'UAU':'Y', 'UAC':'Y',
'UGG':'W',
'UAG':'STOP', 'UGA':'STOP', 'UAA':'STOP','AUG':'START','GUG':'START','UUG':'START'
}
# read the RNA sequence into a single string
rna = ''
for line in open('A06662-RNA.fasta'):
if not line.startswith('>'):
rna = rna + line.strip()
# translate one frame at a time
for frame in range(3):
prot = ''
print('Reading frame' + str(frame + 1))
count_start = 0
count_end = 0
for i in range(frame, len(rna), 3):
codon = rna[i:i + 3]
if codon in codon_table:
if codon_table[codon] == 'STOP':
prot = prot + '*'
count_end += 1
elif codon_table[codon] == 'START':
prot = prot + '#'
count_start += 1
else:
prot = prot + codon_table[codon]
else:
prot = prot + '-'
print('起始密码子:',count_start)
print('终止密码子:',count_end)
3,
f = open('text1.txt')
text = f.readlines()
str = ''
s=str + ' '.join(text)
if 'MA' in s and 'OSS' in s:
print('找到了')
elif 'MA' in s and 'OSS' not in s or 'OSS' in s and 'MA' not in s:
print('找到一个')
else:
print('都没找到')
4,
dict_stru = {'A':[1.45,0.97],'C':[0.77,1.30],'D':[0.98,0.80],'E':[1.53,0.26],
'F':[1.12,1.28],'G':[0.53,0.81],'H':[1.24,0.71],'I':[1.00,1.60],
'K':[1.07,0.74],'L':[1.34,1.22],'M':[1.20,1.67],'N':[0.73,0.65],
'P':[0.59,0.62],'Q':[1.17,1.23],'R':[0.79,0.90],'S':[0.79,0.72],
'T':[0.82,1.20],'V':[1.14,1.65],'W':[1.14,1.19],'Y':[0.61,1.29]}
for codon in dict_stru:
if dict_stru[codon][0] >= 1 and dict_stru[codon][1] < dict_stru[codon][0]:
dict_stru[codon] = 'H'
elif dict_stru[codon][1] >= 1 and dict_stru[codon][1] > dict_stru[codon][0]:
dict_stru[codon] = 'E'
else:
dict_stru[codon] = 'L'
while 1:
in_put = input('请输入氨基酸:')
for aa in in_put:
print(dict_stru[aa],end = '')
5,
dict_table = {'A':0.48,'R':0.84,'D':0.81,'N':0.82,'C':0.32,
'E':0.93,'Q':0.81,'G':0.51,'H':0.66,'I':0.39,
'L':0.41,'K':0.93,'M':0.44,'F':0.42,'P':0.78,
'S':0.70,'T':0.71,'W':0.49,'Y':0.67,'V':0.40}
input_seq = ''
f = open('proteinseq.fasta')
for line in f:
if line[0] != '>':
input_seq = input_seq + ''.join(line).strip()
print(input_seq)
output_seq = ''
for aa in input_seq:
if aa in dict_table:
if dict_table[aa] > 0.7:
output_seq += aa.upper()
else:
output_seq += aa.lower()
else:
print('unrecognized character:',aa)
print(output_seq)
open('result.txt','w').write(output_seq)
全部评论 (0)
还没有任何评论哟~
