diff --git a/change_quality.py b/change_quality.py index 84537ee..9d5f194 100644 --- a/change_quality.py +++ b/change_quality.py @@ -5,29 +5,29 @@ import os ### FORWARD -def generate_quality_first_bases_fw(): - return ''.join(random.choice('?@ABCD') for _ in range(2)) +def generate_quality_first_bases_fw(read_length): + return ''.join(random.choice('?@ABCD') for _ in range(int(read_length * 0.02))) -def generate_quality_next_bases_fw(): - return ''.join(random.choice('ABCDEG') for _ in range(3)) +def generate_quality_next_bases_fw(read_length): + return ''.join(random.choice('ABCDEG') for _ in range(int(read_length * 0.02))) -def generate_quality_middle_bases_fw(): - return ''.join(random.choice('CDEFGH') for _ in range(5)) +def generate_quality_middle_bases_fw(read_length): + return ''.join(random.choice('FGH') for _ in range(int(read_length * 0.10))) -def generate_quality_last_bases_fw(): - return ''.join(random.choice('HI') for _ in range(200)) +def generate_quality_last_bases_fw(read_length): + return ''.join(random.choice('HI') for _ in range((int(read_length * 0.60)))) -def generate_quality_decreasing_200_220_fw(): - return ''.join(random.choice('CDEFGH') for _ in range(30)) +def generate_quality_decreasing_200_220_fw(read_length): + return ''.join(random.choice('CDEFGH') for _ in range(int(read_length * 0.10))) -def generate_quality_decreasing_220_260_fw(): - return ''.join(random.choice('ABCDEG') for _ in range(25)) +def generate_quality_decreasing_220_260_fw(read_length): + return ''.join(random.choice('ABCDEG') for _ in range(int(read_length * 0.10))) -def generate_quality_decreasing_260_290_fw(): - return ''.join(random.choice('?@ABCD') for _ in range(25)) +def generate_quality_decreasing_260_290_fw(read_length): + return ''.join(random.choice('?@ABCD') for _ in range(int(read_length * 0.04))) -def generate_quality_decreasing_290_300_fw(): - return ''.join(random.choice('?@') for _ in range(10)) +def generate_quality_decreasing_290_300_fw(read_length): + return ''.join(random.choice('?@AB') for _ in range(int(read_length * 0.02))) def change_quality_fw(input_file, output_file): with open(input_file, 'r') as f_in, open(output_file, 'w') as f_out: @@ -40,45 +40,45 @@ def change_quality_fw(input_file, output_file): f_out.write(line) else: new_quality = ( - generate_quality_first_bases_fw() + - generate_quality_next_bases_fw() + - generate_quality_middle_bases_fw() + - generate_quality_last_bases_fw() + - generate_quality_decreasing_200_220_fw() + - generate_quality_decreasing_220_260_fw() + - generate_quality_decreasing_260_290_fw() + - generate_quality_decreasing_290_300_fw() + generate_quality_first_bases_fw(read_length) + + generate_quality_next_bases_fw(read_length) + + generate_quality_middle_bases_fw(read_length) + + generate_quality_last_bases_fw(read_length) + + generate_quality_decreasing_200_220_fw(read_length) + + generate_quality_decreasing_220_260_fw(read_length) + + generate_quality_decreasing_260_290_fw(read_length) + + generate_quality_decreasing_290_300_fw(read_length) ) f_out.write(new_quality + '\n') ### REVERSE def generate_quality_first_bases_rv(read_length): - return ''.join(random.choice('?@ABCD') for _ in range(read_length * 0.01)) + return ''.join(random.choice('?@ABCD') for _ in range(int(read_length * 0.01))) def generate_quality_next_bases_rv(read_length): - return ''.join(random.choice('ABCDEG') for _ in range(read_length * 0.02)) + return ''.join(random.choice('ABCDEG') for _ in range(int(read_length * 0.02))) def generate_quality_middle_bases_rv(read_length): - return ''.join(random.choice('CDEFGH') for _ in range(read_length * 0.02)) + return ''.join(random.choice('CDEFGH') for _ in range(int(read_length * 0.02))) def generate_quality_main_bases_rv(read_length): - return ''.join(random.choice('HI') for _ in range((read_length * 0.10 * 6) + (read_length * 0.05))) + return ''.join(random.choice('HI') for _ in range((int(read_length * 0.10) * 6) + (int(read_length * 0.05)))) def generate_quality_decreasing_175_200_rv(read_length): - return ''.join(random.choice('CDEFGH') for _ in range(read_length * 0.10)) + return ''.join(random.choice('CDEFGH') for _ in range(int(read_length * 0.10))) def generate_quality_decreasing_200_220_rv(read_length): - return ''.join(random.choice('ABCDEG') for _ in range(read_length * 0.10)) + return ''.join(random.choice('ABCDEG') for _ in range(int(read_length * 0.10))) def generate_quality_decreasing_220_260_rv(read_length): - return ''.join(random.choice('?@ABCD') for _ in range(read_length * 0.05)) + return ''.join(random.choice('?@ABCD') for _ in range(int(read_length * 0.05))) def generate_quality_decreasing_260_290_rv(read_length): - return ''.join(random.choice('?@ABC') for _ in range(read_length * 0.04)) + return ''.join(random.choice('?@ABC') for _ in range(int(read_length * 0.04))) def generate_quality_decreasing_290_300_rv(read_length): - return ''.join(random.choice('?@AB') for _ in range(read_length * 0.01)) + return ''.join(random.choice('?@AB') for _ in range(int(read_length * 0.01))) def change_quality_rv(input_file, output_file): with open(input_file, 'r') as f_in, open(output_file, 'w') as f_out: @@ -90,16 +90,17 @@ def change_quality_rv(input_file, output_file): elif line_num % 4 == 2: f_out.write(line) else: + read_length = len(line.strip()) new_quality = ( - generate_quality_first_bases_rv() + - generate_quality_next_bases_rv() + - generate_quality_middle_bases_rv() + - generate_quality_main_bases_rv() + - generate_quality_decreasing_175_200_rv() + - generate_quality_decreasing_200_220_rv() + - generate_quality_decreasing_220_260_rv() + - generate_quality_decreasing_260_290_rv() + - generate_quality_decreasing_290_300_rv() + generate_quality_first_bases_rv(read_length) + + generate_quality_next_bases_rv(read_length) + + generate_quality_middle_bases_rv(read_length) + + generate_quality_main_bases_rv(read_length) + + generate_quality_decreasing_175_200_rv(read_length) + + generate_quality_decreasing_200_220_rv(read_length) + + generate_quality_decreasing_220_260_rv(read_length) + + generate_quality_decreasing_260_290_rv(read_length) + + generate_quality_decreasing_290_300_rv(read_length) ) f_out.write(new_quality + '\n')