This commit is contained in:
borrageiros 2024-07-07 19:57:44 +02:00
parent 6cd03d1c37
commit 9dab196e24

View File

@ -5,29 +5,29 @@ import os
### FORWARD
def generate_quality_first_bases_fw():
return ''.join(random.choice('?@ABCD') for _ in range(2))
def generate_quality_first_bases_fw(read_length):
return ''.join(random.choice('?@ABCD') for _ in range(int(read_length * 0.02)))
def generate_quality_next_bases_fw():
return ''.join(random.choice('ABCDEG') for _ in range(3))
def generate_quality_next_bases_fw(read_length):
return ''.join(random.choice('ABCDEG') for _ in range(int(read_length * 0.02)))
def generate_quality_middle_bases_fw():
return ''.join(random.choice('CDEFGH') for _ in range(5))
def generate_quality_middle_bases_fw(read_length):
return ''.join(random.choice('FGH') for _ in range(int(read_length * 0.10)))
def generate_quality_last_bases_fw():
return ''.join(random.choice('HI') for _ in range(200))
def generate_quality_last_bases_fw(read_length):
return ''.join(random.choice('HI') for _ in range((int(read_length * 0.60))))
def generate_quality_decreasing_200_220_fw():
return ''.join(random.choice('CDEFGH') for _ in range(30))
def generate_quality_decreasing_200_220_fw(read_length):
return ''.join(random.choice('CDEFGH') for _ in range(int(read_length * 0.10)))
def generate_quality_decreasing_220_260_fw():
return ''.join(random.choice('ABCDEG') for _ in range(25))
def generate_quality_decreasing_220_260_fw(read_length):
return ''.join(random.choice('ABCDEG') for _ in range(int(read_length * 0.10)))
def generate_quality_decreasing_260_290_fw():
return ''.join(random.choice('?@ABCD') for _ in range(25))
def generate_quality_decreasing_260_290_fw(read_length):
return ''.join(random.choice('?@ABCD') for _ in range(int(read_length * 0.04)))
def generate_quality_decreasing_290_300_fw():
return ''.join(random.choice('?@') for _ in range(10))
def generate_quality_decreasing_290_300_fw(read_length):
return ''.join(random.choice('?@AB') for _ in range(int(read_length * 0.02)))
def change_quality_fw(input_file, output_file):
with open(input_file, 'r') as f_in, open(output_file, 'w') as f_out:
@ -40,45 +40,45 @@ def change_quality_fw(input_file, output_file):
f_out.write(line)
else:
new_quality = (
generate_quality_first_bases_fw() +
generate_quality_next_bases_fw() +
generate_quality_middle_bases_fw() +
generate_quality_last_bases_fw() +
generate_quality_decreasing_200_220_fw() +
generate_quality_decreasing_220_260_fw() +
generate_quality_decreasing_260_290_fw() +
generate_quality_decreasing_290_300_fw()
generate_quality_first_bases_fw(read_length) +
generate_quality_next_bases_fw(read_length) +
generate_quality_middle_bases_fw(read_length) +
generate_quality_last_bases_fw(read_length) +
generate_quality_decreasing_200_220_fw(read_length) +
generate_quality_decreasing_220_260_fw(read_length) +
generate_quality_decreasing_260_290_fw(read_length) +
generate_quality_decreasing_290_300_fw(read_length)
)
f_out.write(new_quality + '\n')
### REVERSE
def generate_quality_first_bases_rv(read_length):
return ''.join(random.choice('?@ABCD') for _ in range(read_length * 0.01))
return ''.join(random.choice('?@ABCD') for _ in range(int(read_length * 0.01)))
def generate_quality_next_bases_rv(read_length):
return ''.join(random.choice('ABCDEG') for _ in range(read_length * 0.02))
return ''.join(random.choice('ABCDEG') for _ in range(int(read_length * 0.02)))
def generate_quality_middle_bases_rv(read_length):
return ''.join(random.choice('CDEFGH') for _ in range(read_length * 0.02))
return ''.join(random.choice('CDEFGH') for _ in range(int(read_length * 0.02)))
def generate_quality_main_bases_rv(read_length):
return ''.join(random.choice('HI') for _ in range((read_length * 0.10 * 6) + (read_length * 0.05)))
return ''.join(random.choice('HI') for _ in range((int(read_length * 0.10) * 6) + (int(read_length * 0.05))))
def generate_quality_decreasing_175_200_rv(read_length):
return ''.join(random.choice('CDEFGH') for _ in range(read_length * 0.10))
return ''.join(random.choice('CDEFGH') for _ in range(int(read_length * 0.10)))
def generate_quality_decreasing_200_220_rv(read_length):
return ''.join(random.choice('ABCDEG') for _ in range(read_length * 0.10))
return ''.join(random.choice('ABCDEG') for _ in range(int(read_length * 0.10)))
def generate_quality_decreasing_220_260_rv(read_length):
return ''.join(random.choice('?@ABCD') for _ in range(read_length * 0.05))
return ''.join(random.choice('?@ABCD') for _ in range(int(read_length * 0.05)))
def generate_quality_decreasing_260_290_rv(read_length):
return ''.join(random.choice('?@ABC') for _ in range(read_length * 0.04))
return ''.join(random.choice('?@ABC') for _ in range(int(read_length * 0.04)))
def generate_quality_decreasing_290_300_rv(read_length):
return ''.join(random.choice('?@AB') for _ in range(read_length * 0.01))
return ''.join(random.choice('?@AB') for _ in range(int(read_length * 0.01)))
def change_quality_rv(input_file, output_file):
with open(input_file, 'r') as f_in, open(output_file, 'w') as f_out:
@ -90,16 +90,17 @@ def change_quality_rv(input_file, output_file):
elif line_num % 4 == 2:
f_out.write(line)
else:
read_length = len(line.strip())
new_quality = (
generate_quality_first_bases_rv() +
generate_quality_next_bases_rv() +
generate_quality_middle_bases_rv() +
generate_quality_main_bases_rv() +
generate_quality_decreasing_175_200_rv() +
generate_quality_decreasing_200_220_rv() +
generate_quality_decreasing_220_260_rv() +
generate_quality_decreasing_260_290_rv() +
generate_quality_decreasing_290_300_rv()
generate_quality_first_bases_rv(read_length) +
generate_quality_next_bases_rv(read_length) +
generate_quality_middle_bases_rv(read_length) +
generate_quality_main_bases_rv(read_length) +
generate_quality_decreasing_175_200_rv(read_length) +
generate_quality_decreasing_200_220_rv(read_length) +
generate_quality_decreasing_220_260_rv(read_length) +
generate_quality_decreasing_260_290_rv(read_length) +
generate_quality_decreasing_290_300_rv(read_length)
)
f_out.write(new_quality + '\n')