001、测试数据
[root@PC1 test2]# ls a.fa test.py [root@PC1 test2]# cat a.fa ## 测试数据 >OR4F5_ENSG00000186092_ENST00000641515_61_1038_2618 CCCAGATCTCTTCAGTTTTTATGCCTCATTCTGTGAAAATTGCTGTAGTCTCTTCCAGTTATGAAGAAGGTAACTGCAGAGGCTATTTCCTGGAATGAATCAACGAGTGAAACGAATAACTCTATGGTGACTGAATTCATTTTTCTGGGTCTCTCTGATTCTCAGGAACTCCAGACCTTCCTATTTATGTTGTTTTTT >OR4F29_ENSG00000284733_ENST00000426406_20_955_995 AGCCCAGTTGGCTGGACCAATGGATGGAGAGAATCACTCAGTGGTATCTGAGTTTTTGTTTCTGGGACTCACTCATTCATGGGAGATCCAGCTCCTCCTCCTAGTGTTTTCCTCTGTGCTCTATGTGGCAAGCATTACTGGAAACATCCTCATTGTGTTTTCTGTGACCACTGACCCTCACTTAC [root@PC1 test2]# cat test.py ## 转换脚本 #!/usr/bin/python dict1 = {} in_file = open("a.fa", "r") out_file = open("result.fa", "w") for i in in_file: i = i.strip() if i.startswith(">"): key = i dict1[key] = "" else: dict1[key] += i len_perline = 20 ## 指定每行的碱基数目 for key, val in dict1.items(): out_file.write(key + "\n") while len(val) > len_perline: out_file.write(val[:len_perline] + "\n") val = val[len_perline:] out_file.write(val + "\n") in_file.close() out_file.close()
[root@PC1 test2]# python test.py ## 执行程序 [root@PC1 test2]# ls a.fa result.fa test.py [root@PC1 test2]# cat result.fa ## 查看结果 >OR4F5_ENSG00000186092_ENST00000641515_61_1038_2618 CCCAGATCTCTTCAGTTTTT ATGCCTCATTCTGTGAAAAT TGCTGTAGTCTCTTCCAGTT ATGAAGAAGGTAACTGCAGA GGCTATTTCCTGGAATGAAT CAACGAGTGAAACGAATAAC TCTATGGTGACTGAATTCAT TTTTCTGGGTCTCTCTGATT CTCAGGAACTCCAGACCTTC CTATTTATGTTGTTTTTT >OR4F29_ENSG00000284733_ENST00000426406_20_955_995 AGCCCAGTTGGCTGGACCAA TGGATGGAGAGAATCACTCA GTGGTATCTGAGTTTTTGTT TCTGGGACTCACTCATTCAT GGGAGATCCAGCTCCTCCTC CTAGTGTTTTCCTCTGTGCT CTATGTGGCAAGCATTACTG GAAACATCCTCATTGTGTTT TCTGTGACCACTGACCCTCA CTTAC