protein-folding-demo / tests /test_sequence.py
edbeeching's picture
edbeeching HF Staff
Add Carbon protein folding demo
e4d73d6 verified
import unittest
from carbon_folding_demo.sequence import (
SequenceInputError,
analyze_sequence,
clean_sequence,
reverse_complement,
translate_dna,
)
class SequenceTest(unittest.TestCase):
def test_clean_sequence_removes_fasta_headers_and_spaces(self):
self.assertEqual(clean_sequence(">x\natg aaa taa\n"), "ATGAAATAA")
def test_translate_dna_handles_stop_and_unknown(self):
self.assertEqual(translate_dna("ATGAAATAA"), "MK*")
self.assertEqual(translate_dna("ATGNNNTAA"), "MX*")
def test_detects_forward_orf(self):
analysis = analyze_sequence("CCCATGAAACCCGGGTAA", min_orf_aa=4)
self.assertEqual(analysis.sequence_type, "dna")
self.assertEqual(len(analysis.orfs), 1)
self.assertEqual(analysis.orfs[0].protein, "MKPG")
self.assertEqual(analysis.orfs[0].strand, "+")
self.assertTrue(analysis.orfs[0].complete)
def test_detects_reverse_orf(self):
reverse_only = reverse_complement("ATGAAACCCGGGTAA")
analysis = analyze_sequence(f"GGG{reverse_only}CCC", min_orf_aa=4)
self.assertEqual(len(analysis.orfs), 1)
self.assertEqual(analysis.orfs[0].protein, "MKPG")
self.assertEqual(analysis.orfs[0].strand, "-")
def test_protein_input_gets_single_foldable_choice(self):
analysis = analyze_sequence("MKTFFVLLL", min_orf_aa=30)
self.assertEqual(analysis.sequence_type, "protein")
self.assertEqual(analysis.orfs[0].id, "protein_input")
self.assertEqual(analysis.orfs[0].protein, "MKTFFVLLL")
def test_rejects_oversized_inputs(self):
with self.assertRaises(SequenceInputError):
analyze_sequence("ATG" * 5000, dna_limit=100)
with self.assertRaises(SequenceInputError):
analyze_sequence("M" * 2000, protein_limit=10)
if __name__ == "__main__":
unittest.main()