Advertisement

9月28号Python生物信息学数据管理

阅读量:

第五章笔记

1,range(start, stop[, step]),不包括stop。

2,字典的形式{键:值},键为不可改变对象(数字,字符串,元祖)。值可改变。

定义字典的方法:a,直接定义: dict = {'b':'n','c':10}。 b,逐个分配:>>>dict = {} >>>dict['b'] = 'n'。

3,if 循环,第二个是elif,不是if。

4,列表转化为字符用''.join(list)。

字符转化为列表:

5,使用input()函数,后面将其做变量时,前面插入的字符不算字符数。

自测题

1,

复制代码
 codon_aa = {'UAA' : 'stop','UAG' : 'stop','UGA' : 'STOP','AUG' : 'Start','GGG' : 'Glycin'}

    
 print(codon_aa['UAA'])

2,

复制代码
 codon_table = {

    
     'GCU':'A', 'GCC':'A', 'GCA':'A', 'GCG':'A', 'CGU':'R', 'CGC':'R',   
    
     'CGA':'R', 'CGG':'R', 'AGA':'R', 'AGG':'R', 'UCU':'S', 'UCC':'S',
    
     'UCA':'S', 'UCG':'S', 'AGU':'S', 'AGC':'S', 'AUU':'I', 'AUC':'I',
    
     'AUA':'I', 'UUA':'L', 'CUU':'L', 'CUC':'L', 'CUA':'L',
    
     'CUG':'L', 'GGU':'G', 'GGC':'G', 'GGA':'G', 'GGG':'G', 'GUU':'V',
    
     'GUC':'V', 'GUA':'V', 'ACU':'T', 'ACC':'T', 'ACA':'T',
    
     'ACG':'T', 'CCU':'P', 'CCC':'P', 'CCA':'P', 'CCG':'P', 'AAU':'N',
    
     'AAC':'N', 'GAU':'D', 'GAC':'D', 'UGU':'C', 'UGC':'C', 'CAA':'Q',
    
     'CAG':'Q', 'GAA':'E', 'GAG':'E', 'CAU':'H', 'CAC':'H', 'AAA':'K',
    
     'AAG':'K', 'UUU':'F', 'UUC':'F', 'UAU':'Y', 'UAC':'Y', 
    
     'UGG':'W',
    
     'UAG':'STOP', 'UGA':'STOP', 'UAA':'STOP','AUG':'START','GUG':'START','UUG':'START'
    
     }
    
  
    
 # read the RNA sequence into a single string
    
 rna = ''
    
 for line in open('A06662-RNA.fasta'):
    
     if not line.startswith('>'): 
    
     rna = rna + line.strip()
    
  
    
 # translate one frame at a time
    
 for frame in range(3):
    
     prot = '' 
    
     print('Reading frame' + str(frame + 1))
    
     count_start = 0
    
     count_end = 0
    
     for i in range(frame, len(rna), 3):
    
     codon = rna[i:i + 3]
    
     if codon in codon_table:
    
         if codon_table[codon] == 'STOP':
    
             prot = prot + '*'
    
             count_end += 1 
    
         elif codon_table[codon] == 'START':
    
             prot = prot + '#'
    
             count_start += 1
    
         else: 
    
             prot = prot + codon_table[codon]
    
     else:
    
         prot = prot + '-'
    
     print('起始密码子:',count_start)
    
     print('终止密码子:',count_end)

3,

复制代码
 f = open('text1.txt')

    
 text = f.readlines()
    
 str = ''
    
 s=str + ' '.join(text)
    
 if 'MA' in s and 'OSS' in s:
    
     print('找到了')
    
 elif 'MA' in s and 'OSS' not in s or 'OSS' in s and 'MA' not in s:
    
     print('找到一个')
    
 else:
    
     print('都没找到')

4,

复制代码
 dict_stru = {'A':[1.45,0.97],'C':[0.77,1.30],'D':[0.98,0.80],'E':[1.53,0.26],

    
          'F':[1.12,1.28],'G':[0.53,0.81],'H':[1.24,0.71],'I':[1.00,1.60],
    
          'K':[1.07,0.74],'L':[1.34,1.22],'M':[1.20,1.67],'N':[0.73,0.65],
    
          'P':[0.59,0.62],'Q':[1.17,1.23],'R':[0.79,0.90],'S':[0.79,0.72],
    
          'T':[0.82,1.20],'V':[1.14,1.65],'W':[1.14,1.19],'Y':[0.61,1.29]}
    
 for codon in dict_stru:
    
     if dict_stru[codon][0] >= 1 and dict_stru[codon][1] < dict_stru[codon][0]:
    
     dict_stru[codon] = 'H'
    
     elif  dict_stru[codon][1] >= 1 and dict_stru[codon][1] > dict_stru[codon][0]:
    
     dict_stru[codon] = 'E'
    
     else:
    
     dict_stru[codon] = 'L'
    
 while 1:
    
     in_put = input('请输入氨基酸:')
    
     for aa in in_put:
    
     print(dict_stru[aa],end = '')

5,

复制代码
 dict_table = {'A':0.48,'R':0.84,'D':0.81,'N':0.82,'C':0.32,

    
           'E':0.93,'Q':0.81,'G':0.51,'H':0.66,'I':0.39,
    
           'L':0.41,'K':0.93,'M':0.44,'F':0.42,'P':0.78,
    
           'S':0.70,'T':0.71,'W':0.49,'Y':0.67,'V':0.40}
    
 input_seq = ''
    
 f = open('proteinseq.fasta')
    
 for line in f:
    
     if line[0] != '>':
    
     input_seq = input_seq + ''.join(line).strip()
    
 print(input_seq)
    
 output_seq = ''
    
 for aa in input_seq:
    
     if aa in dict_table:
    
     if dict_table[aa] > 0.7:
    
         output_seq += aa.upper()
    
     else:
    
         output_seq += aa.lower()
    
     else:
    
     print('unrecognized character:',aa)
    
 print(output_seq)
    
 open('result.txt','w').write(output_seq)
复制代码
复制代码
复制代码

全部评论 (0)

还没有任何评论哟~