Advertisement

生物信息学-序列比对-DP-动态规划

阅读量:
复制代码
 def sequence_alignment(seq1, seq2, match, mismatch, gap):

    
     # 初始化得分矩阵和回溯矩阵
    
     rows = len(seq1) + 1
    
     cols = len(seq2) + 1
    
     score_matrix = []
    
     traceback_matrix = []
    
  
    
     # 初始化 score_matrix 和 traceback_matrix
    
     for _ in range(rows):
    
     score_row = [0] * cols
    
     traceback_row = [set() for _ in range(cols)]  # 使用集合保存多个方向
    
     score_matrix.append(score_row)
    
     traceback_matrix.append(traceback_row)
    
  
    
     # 填充得分矩阵的第一行和第一列
    
     for i in range(1, rows):
    
     score_matrix[i][0] = score_matrix[i-1][0] + gap
    
     traceback_matrix[i][0].add('↑')  # 上方
    
     for j in range(1, cols):
    
     score_matrix[0][j] = score_matrix[0][j-1] + gap
    
     traceback_matrix[0][j].add('←')  # 左侧
    
  
    
     # 填充得分矩阵和回溯矩阵的其余部分
    
     for i in range(1, rows):
    
     for j in range(1, cols):
    
         if seq1[i-1] == seq2[j-1]:
    
             diagonal_score = score_matrix[i-1][j-1] + match
    
             max_score = diagonal_score
    
             score_matrix[i][j] = max_score
    
             traceback_matrix[i][j].add('↖')  # 对角线
    
         else:
    
             mis_diagonal_score = score_matrix[i-1][j-1] + mismatch
    
             up_score = score_matrix[i-1][j] + gap
    
             left_score = score_matrix[i][j-1] + gap
    
  
    
             # 取三个方向的最大得分作为当前位置的得分,并记录回溯方向
    
             max_score = max(mis_diagonal_score, up_score, left_score)
    
             score_matrix[i][j] = max_score
    
             if max_score == mis_diagonal_score:
    
                 traceback_matrix[i][j].add('↖') # 对角线
    
             if max_score == up_score:
    
                 traceback_matrix[i][j].add('↑')  # 上方
    
             if max_score == left_score:
    
                 traceback_matrix[i][j].add('←')  # 左侧
    
  
    
     return score_matrix, traceback_matrix
    
  
    
 # 测试算法,输出得分矩阵和回溯矩阵
    
 seq1 = "GTCGTAGAATA"
    
 seq2 = "CACGTAGTA"
    
  
    
 score_matrix, traceback_matrix = sequence_alignment(seq1, seq2, 1, 1, 2)
    
  
    
 # 输出得分矩阵
    
 print("得分矩阵:")
    
 for row in score_matrix:
    
     print(row)
    
  
    
 # 输出回溯矩阵
    
 print("\n回溯矩阵:")
    
 for row in traceback_matrix:
    
     print(row)
    
    
    
    

全部评论 (0)

还没有任何评论哟~