This commit is contained in:
borrageiros 2024-07-07 19:57:44 +02:00
parent 6cd03d1c37
commit 9dab196e24

View File

@ -5,29 +5,29 @@ import os
### FORWARD ### FORWARD
def generate_quality_first_bases_fw(): def generate_quality_first_bases_fw(read_length):
return ''.join(random.choice('?@ABCD') for _ in range(2)) return ''.join(random.choice('?@ABCD') for _ in range(int(read_length * 0.02)))
def generate_quality_next_bases_fw(): def generate_quality_next_bases_fw(read_length):
return ''.join(random.choice('ABCDEG') for _ in range(3)) return ''.join(random.choice('ABCDEG') for _ in range(int(read_length * 0.02)))
def generate_quality_middle_bases_fw(): def generate_quality_middle_bases_fw(read_length):
return ''.join(random.choice('CDEFGH') for _ in range(5)) return ''.join(random.choice('FGH') for _ in range(int(read_length * 0.10)))
def generate_quality_last_bases_fw(): def generate_quality_last_bases_fw(read_length):
return ''.join(random.choice('HI') for _ in range(200)) return ''.join(random.choice('HI') for _ in range((int(read_length * 0.60))))
def generate_quality_decreasing_200_220_fw(): def generate_quality_decreasing_200_220_fw(read_length):
return ''.join(random.choice('CDEFGH') for _ in range(30)) return ''.join(random.choice('CDEFGH') for _ in range(int(read_length * 0.10)))
def generate_quality_decreasing_220_260_fw(): def generate_quality_decreasing_220_260_fw(read_length):
return ''.join(random.choice('ABCDEG') for _ in range(25)) return ''.join(random.choice('ABCDEG') for _ in range(int(read_length * 0.10)))
def generate_quality_decreasing_260_290_fw(): def generate_quality_decreasing_260_290_fw(read_length):
return ''.join(random.choice('?@ABCD') for _ in range(25)) return ''.join(random.choice('?@ABCD') for _ in range(int(read_length * 0.04)))
def generate_quality_decreasing_290_300_fw(): def generate_quality_decreasing_290_300_fw(read_length):
return ''.join(random.choice('?@') for _ in range(10)) return ''.join(random.choice('?@AB') for _ in range(int(read_length * 0.02)))
def change_quality_fw(input_file, output_file): def change_quality_fw(input_file, output_file):
with open(input_file, 'r') as f_in, open(output_file, 'w') as f_out: with open(input_file, 'r') as f_in, open(output_file, 'w') as f_out:
@ -40,45 +40,45 @@ def change_quality_fw(input_file, output_file):
f_out.write(line) f_out.write(line)
else: else:
new_quality = ( new_quality = (
generate_quality_first_bases_fw() + generate_quality_first_bases_fw(read_length) +
generate_quality_next_bases_fw() + generate_quality_next_bases_fw(read_length) +
generate_quality_middle_bases_fw() + generate_quality_middle_bases_fw(read_length) +
generate_quality_last_bases_fw() + generate_quality_last_bases_fw(read_length) +
generate_quality_decreasing_200_220_fw() + generate_quality_decreasing_200_220_fw(read_length) +
generate_quality_decreasing_220_260_fw() + generate_quality_decreasing_220_260_fw(read_length) +
generate_quality_decreasing_260_290_fw() + generate_quality_decreasing_260_290_fw(read_length) +
generate_quality_decreasing_290_300_fw() generate_quality_decreasing_290_300_fw(read_length)
) )
f_out.write(new_quality + '\n') f_out.write(new_quality + '\n')
### REVERSE ### REVERSE
def generate_quality_first_bases_rv(read_length): def generate_quality_first_bases_rv(read_length):
return ''.join(random.choice('?@ABCD') for _ in range(read_length * 0.01)) return ''.join(random.choice('?@ABCD') for _ in range(int(read_length * 0.01)))
def generate_quality_next_bases_rv(read_length): def generate_quality_next_bases_rv(read_length):
return ''.join(random.choice('ABCDEG') for _ in range(read_length * 0.02)) return ''.join(random.choice('ABCDEG') for _ in range(int(read_length * 0.02)))
def generate_quality_middle_bases_rv(read_length): def generate_quality_middle_bases_rv(read_length):
return ''.join(random.choice('CDEFGH') for _ in range(read_length * 0.02)) return ''.join(random.choice('CDEFGH') for _ in range(int(read_length * 0.02)))
def generate_quality_main_bases_rv(read_length): def generate_quality_main_bases_rv(read_length):
return ''.join(random.choice('HI') for _ in range((read_length * 0.10 * 6) + (read_length * 0.05))) return ''.join(random.choice('HI') for _ in range((int(read_length * 0.10) * 6) + (int(read_length * 0.05))))
def generate_quality_decreasing_175_200_rv(read_length): def generate_quality_decreasing_175_200_rv(read_length):
return ''.join(random.choice('CDEFGH') for _ in range(read_length * 0.10)) return ''.join(random.choice('CDEFGH') for _ in range(int(read_length * 0.10)))
def generate_quality_decreasing_200_220_rv(read_length): def generate_quality_decreasing_200_220_rv(read_length):
return ''.join(random.choice('ABCDEG') for _ in range(read_length * 0.10)) return ''.join(random.choice('ABCDEG') for _ in range(int(read_length * 0.10)))
def generate_quality_decreasing_220_260_rv(read_length): def generate_quality_decreasing_220_260_rv(read_length):
return ''.join(random.choice('?@ABCD') for _ in range(read_length * 0.05)) return ''.join(random.choice('?@ABCD') for _ in range(int(read_length * 0.05)))
def generate_quality_decreasing_260_290_rv(read_length): def generate_quality_decreasing_260_290_rv(read_length):
return ''.join(random.choice('?@ABC') for _ in range(read_length * 0.04)) return ''.join(random.choice('?@ABC') for _ in range(int(read_length * 0.04)))
def generate_quality_decreasing_290_300_rv(read_length): def generate_quality_decreasing_290_300_rv(read_length):
return ''.join(random.choice('?@AB') for _ in range(read_length * 0.01)) return ''.join(random.choice('?@AB') for _ in range(int(read_length * 0.01)))
def change_quality_rv(input_file, output_file): def change_quality_rv(input_file, output_file):
with open(input_file, 'r') as f_in, open(output_file, 'w') as f_out: with open(input_file, 'r') as f_in, open(output_file, 'w') as f_out:
@ -90,16 +90,17 @@ def change_quality_rv(input_file, output_file):
elif line_num % 4 == 2: elif line_num % 4 == 2:
f_out.write(line) f_out.write(line)
else: else:
read_length = len(line.strip())
new_quality = ( new_quality = (
generate_quality_first_bases_rv() + generate_quality_first_bases_rv(read_length) +
generate_quality_next_bases_rv() + generate_quality_next_bases_rv(read_length) +
generate_quality_middle_bases_rv() + generate_quality_middle_bases_rv(read_length) +
generate_quality_main_bases_rv() + generate_quality_main_bases_rv(read_length) +
generate_quality_decreasing_175_200_rv() + generate_quality_decreasing_175_200_rv(read_length) +
generate_quality_decreasing_200_220_rv() + generate_quality_decreasing_200_220_rv(read_length) +
generate_quality_decreasing_220_260_rv() + generate_quality_decreasing_220_260_rv(read_length) +
generate_quality_decreasing_260_290_rv() + generate_quality_decreasing_260_290_rv(read_length) +
generate_quality_decreasing_290_300_rv() generate_quality_decreasing_290_300_rv(read_length)
) )
f_out.write(new_quality + '\n') f_out.write(new_quality + '\n')