在整个脚本中,您能否提供有关每个函数在脚本中如何工作的有用注释,以及它对对齐过程的重要性

 收藏

在整个脚本中,您能否提供有关每个函数在脚本中的工作方式的有用注释,以及它对对齐过程的重要性。此脚本中是否还有任何错误或改进此脚本的方法?任何帮助,将不胜感激。谢谢。同样在第46行,您可以描述数据类型,其中数据以行和列存储。

enter code here

#!/usr/bin/env python

file=input("Please provide fasta file with 2 sequences:")
match=float(input('What is the match score?:'))
missmatch=float(input('What is the missmatch score?:'))
gap=float(input('What is the gap cost?:'))

fasta=open(file,'r')
sequence_list=[]
sequence=''
for line in fasta:
    line = line.rstrip()
    if line.startswith(">"):
        sequence_list.append(sequence)
        sequence=''
        continue
    else:
        sequence += line

sequence_list.append(sequence)
sequence_list = sequence_list[1:]

sequence1 = sequence_list[0]
sequence2 = sequence_list[1]


def main():
    rows=len(sequence1)+1
    columns=len(sequence2)+1
    score_matrix, start_position = create_scoring_matrix(rows, columns)
    FirstSeq_WithGaps, SecondSeq_WithGaps = traceback(score_matrix, start_position)
    alignment_str, identical, gap, missmatches = alignment_string(FirstSeq_WithGaps, SecondSeq_WithGaps)
    alignment_length = len(FirstSeq_WithGaps)
    print ('\n')
    print (' Identities = {0}/{1} ({2:.1%}), ''Gaps = {3}/{4} ({5:.1%})'.format(identical, alignment_length, identical / alignment_length, gap, alignment_length, gap / alignment_length))
    print ('\n')
    for i in range(0, alignment_length, 60):
        sequence1_slice = FirstSeq_WithGaps[i:i+60]
        print('Query {0:<4}     {1}  {2:<4}'.format(i + 1, sequence1_slice, i + len(sequence1_slice)))
        print('               {0}'.format(alignment_str[i:i+60]))
        sequence2_slice = SecondSeq_WithGaps[i:i+60]
        print('Subject  {0:<4}  {1}  {2:<4}'.format(i + 1, sequence2_slice, i + len(sequence2_slice)))
        print('\n')

def create_scoring_matrix(rows, columns):
    score_matrix=[[0 for column in range(columns)] for row in range (rows)]
    max_score = 0;
    max_position = None
    for i in range (1, rows):
        for j in range(1, columns):
            score = calculate_score(score_matrix, i, j)
            if score > max_score:
                max_score = score
                max_position = (i, j)
            score_matrix[i][j] = score
    return score_matrix, max_position
    
def calculate_score(matrix, x, y):
    similarity = match if sequence1[x-1] == sequence2[y-1] else missmatch
    diagonal_score = matrix[x-1][y-1] + similarity
    up_score = matrix[x-1][y] + gap
    left_score = matrix[x][y-1] + gap
    return max(0, diagonal_score, up_score, left_score)
    
def traceback(score_matrix, start_position):
    END, DIAGONAL, UP, LEFT = range(4)
    FirstSeq_WithGaps = []
    SecondSeq_WithGaps = []
    x, y = start_position
    move = next_move(score_matrix, x, y)

    while move != END:
        if move == DIAGONAL:
            FirstSeq_WithGaps.append(sequence1[x-1])
            SecondSeq_WithGaps.append(sequence2[y-1])
            x -= 1
            y -= 1
        elif move == UP:
            FirstSeq_WithGaps.append(sequence1[x-1])
            SecondSeq_WithGaps.append('-')
            x -= 1
        else:
            FirstSeq_WithGaps.append('-')
            SecondSeq_WithGaps.append(sequence2[y-1])
            y -= 1
        move = next_move(score_matrix, x, y)
    return ''.join(reversed(FirstSeq_WithGaps)),''.join(reversed(SecondSeq_WithGaps))

def next_move(score_matrix, x, y,):
    diagonal = score_matrix[x-1][y-1]

 

   up = score_matrix[x-1][y]
    left = score_matrix[x][y-1]
    if diagonal >= up and diagonal >= left:
        return 1 if diagonal !=0 else 0
    elif up > diagonal and up >= left:
        return 2 if up !=0 else 0
    elif left > diagonal and left > up:
        return 3 if left !=0 else 0
        
def alignment_string(FirstSeq_WithGaps, SecondSeq_WithGaps):
    identical, gap, missmatch = 0, 0, 0
    alignment_string = []
    for position1, position2 in zip(FirstSeq_WithGaps, SecondSeq_WithGaps):
        if position1 == position2:
            alignment_string.append('|')
            identical += 1
        elif '-' in (position1, position2):
            alignment_string.append(' ')
            gap += 1
        else:
            alignment_string.append(':')
            missmatch += 1
    return ''.join(alignment_string), identical, gap, missmatch
    
if __name__ == '__main__':
    main()
回复