""" Create CSV version of the demo database for easy import testing. """ import csv import os # Same sequences as in create_demo_db.py UTR5_BETAGLOBIN = ( "ACATTTGCTTCTGACACAACTGTGTTCACTAGCAACCTCAAACAGACACCATGGTGCATCTGAC" "TCCTGAGGAGAAGTCTGCCGTTACTGCCCTGTGGGGCAAGGTGAACGTGGATGAAGTTGGTGGT" )[:80] UTR5_EMCV = "GGGAAATAAGAGAGAAAAGAAGAGTAAGAAGAAATATAAGAGCCACCATG" KOZAK_TEV = "GCCACC" UTR3_BETAGLOBIN = ( "GCTCGCTTTCTTGCTGTCCAATTTCTATTAAAGGTTCCTTTGTTCCCTAAGTCCAACTACTA" "AACTGGGGGATATTATGAAGGGCCTTGAGCATCTGGATTCTGCCTAATAAAAAACATTTATT" "TTCATTGCAATGATGTATTTAAATTATTTCTGAATATTTTACTAAAAATAAATGTTTTTTAT" )[:100] UTR3_ALBUMIN = ( "AATAAAGATCTTTATTTTCATTAGATCTGTGTGTTGGTTTTTTGTGTGAATCGATAGTACTA" "AATACTTTTCAGACACCAGAAATGCAGAGCAGTTCAGAGGCAGAGCCATCTATTGCTTACAT" )[:100] POLYA_120 = "A" * 120 POLYA_60 = "A" * 60 CDS_EGFP = ( "ATGGTGAGCAAGGGCGAGGAGCTGTTCACCGGGGTGGTGCCCATCCTGGTCGAGCTGGACGGCGACGTAAACGGCCACAAGTTCAGCGTGTCCGGCGAGGGCGAGGGCGATGCCACCTACGGCAAGCTGACCCTGAAGTTCATCTGCACCACCGGCAAGCTGCCCGTGCCCTGGCCCACCCTCGTGACCACCCTGACCTACGGCGTGCAGTGCTTCAGCCGCTACCCCGACCACATGAAGCAGCACGACTTCTTCAAGTCCGCCATGCCCGAAGGCTACGTCCAGGAGCGCACCATCTTCTTCAAGGACGACGGCAACTACAAGACCCGCGCCGAGGTGAAGTTCGAGGGCGACACCCTGGTGAACCGCATCGAGCTGAAGGGCATCGACTTCAAGGAGGACGGCAACATCCTGGGGCACAAGCTGGAGTACAACTACAACAGCCACAACGTCTATATCATGGCCGACAAGCAGAAGAACGGCATCAAGGTGAACTTCAAGATCCGCCACAACATCGAGGACGGCAGCGTGCAGCTCGCCGACCACTACCAGCAGAACACCCCCATCGGCGACGGCCCCGTGCTGCTGCCCGACAACCACTACCTGAGCACCCAGTCCGCCCTGAGCAAAGACCCCAACGAGAAGCGCGATCACATGGTCCTGCTGGAGTTCGTGACCGCCGCCGGGATCACTCTCGGCATGGACGAGCTGTACAAG" "TAA" ) CDS_MCHERRY = ( "ATGGTGAGCAAGGGCGAGGAGGATAACATGGCCATCATCAAGGAGTTCATGCGCTTCAAGGTGCACATGGAGGGCTCCGTGAACGGCCACGAGTTCGAGATCGAGGGCGAGGGCGAGGGCCGCCCCTACGAGGGCACCCAGACCGCCAAGCTGAAGGTGACCAAGGGTGGCCCCCTGCCCTTCGCCTGGGACATCCTGTCCCCTCAGTTCATGTACGGCTCCAAGGCCTACGTGAAGCACCCCGCCGACATCCCCGACTACTTGAAGCTGTCCTTCCCCGAGGGCTTCAAGTGGGAGCGCGTGATGAACTTCGAGGACGGCGGCGTGGTGACCGTGACCCAGGACTCCTCCCTGCAGGACGGCGAGTTCATCTACAAGGTGAAGCTGCGCGGCACCAACTTCCCCTCCGACGGCCCCGTAATGCAGAAGAAGACCATGGGCTGGGAGGCCTCCTCCGAGCGGATGTACCCCGAGGACGGCGCCCTGAAGGGCGAGATCAAGCAGAGGCTGAAGCTGAAGGACGGCGGCCACTACGACGCTGAGGTCAAGACCACCTACAAGGCCAAGAAGCCCGTGCAGCTGCCCGGCGCCTACAACGTCAACATCAAGTTGGACATCACCTCCCACAACGAGGACTACACCATCGTGGAACAGTACGAACGCGCCGAGGGCCGCCACTCCACCGGCGGCATGGACGAGCTGTACAAG" "TAA" ) CDS_LUC2 = ( "ATGGAAGATGCCAAAAACATTAAGAAGGGCCCAGCGCCATTCTACCCACTCGAAGACGGGAC" "CGCCGGCGAGCAGCTGCACAAAGCCATGAAGCGCTACGCCCTGGTGCCCGGCACCATCGCCT" "TTACCGACGCACATATCGAGGTGGACATTACCTACGCCGAGTACTTCGAGATGAGCGTTCGG" "CTGGCAGAAGCTATGAAGCGCTATGGGCTGAATACAAACCATCGGATCGTGGTGTGCAGCGA" "GAATAGTCTGGAGAAGATCCTGCTGAACAAAGGCCTGCCTGTAGCCGGCCTTTTCCTCCTGG" "AAGAGCTGCGGCAGCAGTTCCAGAAGGCCCGGGAGCAGATGTTCACCTTCGTGCTCGATCTG" "GAGGAAATGACCGCCGAAGAGGCGATTGAGAATCTGGTATTCGAGCAGTATGGAATCGACCA" "TTATCTTGATAACCCACAATGCCTGCATGACCTGGTGCATCTGGAACCCCGAGGTCAATGTG" "GAAGAGTTCCTGGAAAAGCTGCTGAAGGACGGTATCATCATGTTCAGCATCCATGGTTATGG" "CTACATCCTGGGGCCCGGAACCAACTTCGATCTGGAGCGCATGATCAAGCGCGATGGGGAG" "GTGGATATGGCCCTGATTAAGGTGTCGATGGAGCAGGCCGGCATCGACCCCGATGAGGCCGG" "AGCCATTCGGCTGTACAAGCTGATGAAGGATAAG" "TAA" )[:900] while len(CDS_LUC2) % 3 != 0: CDS_LUC2 = CDS_LUC2[:-3] + "TAA" CDS_SPIKE_RBD = ( "ATGTTCGTGTTCCTGGTGCTGCTGCCCCTGGTGTCCTCCCAGGTGTGCAACCTGACCACCAG" "AACCCAGCTGCCCCCCGCCTACACCAACTCCTTCACCCGGGGCGTGTACTACCCCGACAAGG" "TGTTCCGCTCCTCCGTGCTGCACTCCACCCAGGACCTGTTTCTGCCCTTTTTCTCCAACGTG" "ACCTGGTTCCACGCCATCCACGTGTCCGGCACCAACGGCACAAAGCGGTTCGACAACCCCGTG" "CTGCCCTTCAACGACGGGGTGTACTTTGCCAGCACCGAGAAGTCCAACATCATCCGGGGCTG" "GATCTTCGGCACCACCCTGGACTCCAAGACCCAGTCCCTGCTGATCGTGAACAACGCCACCA" "ACGTGGTCATCAAGGTGTGCGAGTTCCAGTTCTGCAACGACCCCTTCCTGGGCGTCTACTAC" "CACAAGAACAACAAGTCCTGGATGGAGTCCGAGTTCCGGGTGTACTCCTCCGCCAACAACTG" "CACCTTTGAGTACGTGTCCCAGCCCTTTCTGATGGACCTGGAGGGCAAACAGGGCAACTTCA" "AGAACCTGCGCGAGTTTGTGTTTAAGAACATCGACGGCTACTTCAAGATCTACAGCAAGCAC" ) CDS_SPIKE_RBD = CDS_SPIKE_RBD[:len(CDS_SPIKE_RBD) - len(CDS_SPIKE_RBD) % 3] CDS_SPIKE_RBD = CDS_SPIKE_RBD[:-3] + "TAA" CDS_EPO = ( "ATGGGGGTGCACGAATGTCCCGCCTGGCTGTGGCTGCTGCTGTCGCTGCCGTTCTCTGTGCT" "GCCCGCCCGCGCCGTCCTCACCGTCAACTTCCCGCACCCTGCTTCCACGCCTCAGAGTCCTG" "GAGAGGTACCTCTTGGAGGCCAAGGAGGCCGAGAATATCACGACGGGCTGTGCTGAACACTGC" "AGCTTGAATGAGAATATCACGGTGCGCTTTCCACGCCTCATTTGCGACAGCTTTGTTCGTGG" "TCAGGCCGTGGTCAGCTCCGATGAGGTCTTCAGGGCCCCTGTCCTCCTGCAGCTGGAATCCT" "GGCAGCGTCTCAGCCCCTGCAGCCAGCCCTCCCAGCTGCCCTCAGCCACCTGTCCCGCCTGCT" "CCAGAGCCTGGAGAACTTCTACCAGCCTCTGGAGCAGCTCCAGGAAGTGATCCAGGAGATGAG" "CAAGCTGTCCGCCACGGCCGTGGAGGTCTTGGCCAGTAAGCCGGAG" "TAA" ) CDS_EPO = CDS_EPO[:len(CDS_EPO) - len(CDS_EPO) % 3] if not CDS_EPO.endswith("TAA") and not CDS_EPO.endswith("TAG") and not CDS_EPO.endswith("TGA"): CDS_EPO = CDS_EPO[:-3] + "TAA" CDS_FIX = ( "ATGCAGCGCGTGAACATGATCATGGCCTCCCTGTGGCTGTGCTTCGTGGCCCTGTGGCAGGC" "TGGCAACCCCAGAGAAGTACCTGTTCAAGAACGGCGACCAGCGGCCCAACAAGGAGATCCCCA" "AGAGCATCATCCTGGAGGAGTTCAAGGCCTTCTTCTCCACCTTCATCAACCGGAAGATGATCA" "AGCAGACCGACAAAGACCAGGTGATCAGCCTGGGCGGCAAGGACCAGGTGCTGATCCAGATGC" "AGCCCCAGGTGAGCAAGGACTTTGGCTTCAGCCTGTGCACCTGCCCCTGGGGCCACCCCAGCC" "CCTGCAGCAGCACATCCTGTACTTCCTGAACCAGAAGGCCAAACAGTTCCTGCTGCAGGACGAG" "AAGGTGAAGGGCATCAACCACTGCAAGGTGCGGGTGGCCCTGGAGCAGGACGGCAGCAAGGTG" ) CDS_FIX = CDS_FIX[:len(CDS_FIX) - len(CDS_FIX) % 3] CDS_FIX = CDS_FIX[:-3] + "TAA" FULL_MRNA_EGFP = UTR5_BETAGLOBIN + KOZAK_TEV + "ATG" + CDS_EGFP[3:] + UTR3_BETAGLOBIN + POLYA_120 FULL_MRNA_MCHERRY = UTR5_EMCV + CDS_MCHERRY + UTR3_ALBUMIN + POLYA_60 # Build CSV rows CSV_DATA = [ { 'id': 1, 'gene_name': 'eGFP-hBG-UTRs', 'five_prime_utr': UTR5_BETAGLOBIN, 'cds': CDS_EGFP, 'three_prime_utr': UTR3_BETAGLOBIN, 'poly_a_tail': POLYA_120, 'full_mrna': '', 'target_protein': 'Enhanced GFP', 'organism': 'Aequorea victoria', 'expression_system': 'HEK293T', 'gc_target_percent': 52.4, 'notes': 'Human beta-globin UTRs. Classic reporter construct for mRNA transfection.' }, { 'id': 2, 'gene_name': 'mCherry-AlbUTR', 'five_prime_utr': UTR5_BETAGLOBIN, 'cds': CDS_MCHERRY, 'three_prime_utr': UTR3_ALBUMIN, 'poly_a_tail': POLYA_120, 'full_mrna': '', 'target_protein': 'mCherry red fluorescent protein', 'organism': 'Discosoma sp.', 'expression_system': 'CHO', 'gc_target_percent': 50.1, 'notes': 'Albumin 3\'UTR for extended expression. Good cell viability.' }, { 'id': 3, 'gene_name': 'Luc2-reporter', 'five_prime_utr': UTR5_EMCV, 'cds': CDS_LUC2, 'three_prime_utr': UTR3_ALBUMIN, 'poly_a_tail': POLYA_60, 'full_mrna': '', 'target_protein': 'Firefly luciferase', 'organism': 'Photinus pyralis', 'expression_system': 'Huh-7', 'gc_target_percent': 53.8, 'notes': 'Bioluminescence reporter. Used for LNP screening.' }, { 'id': 4, 'gene_name': 'SpRBD-v1', 'five_prime_utr': UTR5_BETAGLOBIN, 'cds': CDS_SPIKE_RBD, 'three_prime_utr': UTR3_ALBUMIN, 'poly_a_tail': POLYA_120, 'full_mrna': '', 'target_protein': 'SARS-CoV-2 Spike RBD', 'organism': 'SARS-CoV-2', 'expression_system': 'HEK293T', 'gc_target_percent': 55.2, 'notes': 'Vaccine antigen candidate. Proline-stabilized RBD.' }, ] def create_csv(): output_path = os.path.join(os.path.dirname(__file__), 'mrna_sequences.csv') with open(output_path, 'w', newline='') as f: fieldnames = ['id', 'gene_name', 'five_prime_utr', 'cds', 'three_prime_utr', 'poly_a_tail', 'full_mrna', 'target_protein', 'organism', 'expression_system', 'gc_target_percent', 'notes'] writer = csv.DictWriter(f, fieldnames=fieldnames) writer.writeheader() writer.writerows(CSV_DATA) print(f'✓ Created {output_path}') print(f' {len(CSV_DATA)} sequences exported') return output_path if __name__ == '__main__': create_csv()