File size: 1,887 Bytes
e4d73d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import unittest

from carbon_folding_demo.sequence import (
    SequenceInputError,
    analyze_sequence,
    clean_sequence,
    reverse_complement,
    translate_dna,
)


class SequenceTest(unittest.TestCase):
    def test_clean_sequence_removes_fasta_headers_and_spaces(self):
        self.assertEqual(clean_sequence(">x\natg aaa taa\n"), "ATGAAATAA")

    def test_translate_dna_handles_stop_and_unknown(self):
        self.assertEqual(translate_dna("ATGAAATAA"), "MK*")
        self.assertEqual(translate_dna("ATGNNNTAA"), "MX*")

    def test_detects_forward_orf(self):
        analysis = analyze_sequence("CCCATGAAACCCGGGTAA", min_orf_aa=4)
        self.assertEqual(analysis.sequence_type, "dna")
        self.assertEqual(len(analysis.orfs), 1)
        self.assertEqual(analysis.orfs[0].protein, "MKPG")
        self.assertEqual(analysis.orfs[0].strand, "+")
        self.assertTrue(analysis.orfs[0].complete)

    def test_detects_reverse_orf(self):
        reverse_only = reverse_complement("ATGAAACCCGGGTAA")
        analysis = analyze_sequence(f"GGG{reverse_only}CCC", min_orf_aa=4)
        self.assertEqual(len(analysis.orfs), 1)
        self.assertEqual(analysis.orfs[0].protein, "MKPG")
        self.assertEqual(analysis.orfs[0].strand, "-")

    def test_protein_input_gets_single_foldable_choice(self):
        analysis = analyze_sequence("MKTFFVLLL", min_orf_aa=30)
        self.assertEqual(analysis.sequence_type, "protein")
        self.assertEqual(analysis.orfs[0].id, "protein_input")
        self.assertEqual(analysis.orfs[0].protein, "MKTFFVLLL")

    def test_rejects_oversized_inputs(self):
        with self.assertRaises(SequenceInputError):
            analyze_sequence("ATG" * 5000, dna_limit=100)
        with self.assertRaises(SequenceInputError):
            analyze_sequence("M" * 2000, protein_limit=10)


if __name__ == "__main__":
    unittest.main()