| """ |
| Create CSV version of the demo database for easy import testing. |
| """ |
| import csv |
| import os |
|
|
| |
| UTR5_BETAGLOBIN = ( |
| "ACATTTGCTTCTGACACAACTGTGTTCACTAGCAACCTCAAACAGACACCATGGTGCATCTGAC" |
| "TCCTGAGGAGAAGTCTGCCGTTACTGCCCTGTGGGGCAAGGTGAACGTGGATGAAGTTGGTGGT" |
| )[:80] |
|
|
| UTR5_EMCV = "GGGAAATAAGAGAGAAAAGAAGAGTAAGAAGAAATATAAGAGCCACCATG" |
| KOZAK_TEV = "GCCACC" |
|
|
| UTR3_BETAGLOBIN = ( |
| "GCTCGCTTTCTTGCTGTCCAATTTCTATTAAAGGTTCCTTTGTTCCCTAAGTCCAACTACTA" |
| "AACTGGGGGATATTATGAAGGGCCTTGAGCATCTGGATTCTGCCTAATAAAAAACATTTATT" |
| "TTCATTGCAATGATGTATTTAAATTATTTCTGAATATTTTACTAAAAATAAATGTTTTTTAT" |
| )[:100] |
|
|
| UTR3_ALBUMIN = ( |
| "AATAAAGATCTTTATTTTCATTAGATCTGTGTGTTGGTTTTTTGTGTGAATCGATAGTACTA" |
| "AATACTTTTCAGACACCAGAAATGCAGAGCAGTTCAGAGGCAGAGCCATCTATTGCTTACAT" |
| )[:100] |
|
|
| POLYA_120 = "A" * 120 |
| POLYA_60 = "A" * 60 |
|
|
| CDS_EGFP = ( |
| "ATGGTGAGCAAGGGCGAGGAGCTGTTCACCGGGGTGGTGCCCATCCTGGTCGAGCTGGACGGCGACGTAAACGGCCACAAGTTCAGCGTGTCCGGCGAGGGCGAGGGCGATGCCACCTACGGCAAGCTGACCCTGAAGTTCATCTGCACCACCGGCAAGCTGCCCGTGCCCTGGCCCACCCTCGTGACCACCCTGACCTACGGCGTGCAGTGCTTCAGCCGCTACCCCGACCACATGAAGCAGCACGACTTCTTCAAGTCCGCCATGCCCGAAGGCTACGTCCAGGAGCGCACCATCTTCTTCAAGGACGACGGCAACTACAAGACCCGCGCCGAGGTGAAGTTCGAGGGCGACACCCTGGTGAACCGCATCGAGCTGAAGGGCATCGACTTCAAGGAGGACGGCAACATCCTGGGGCACAAGCTGGAGTACAACTACAACAGCCACAACGTCTATATCATGGCCGACAAGCAGAAGAACGGCATCAAGGTGAACTTCAAGATCCGCCACAACATCGAGGACGGCAGCGTGCAGCTCGCCGACCACTACCAGCAGAACACCCCCATCGGCGACGGCCCCGTGCTGCTGCCCGACAACCACTACCTGAGCACCCAGTCCGCCCTGAGCAAAGACCCCAACGAGAAGCGCGATCACATGGTCCTGCTGGAGTTCGTGACCGCCGCCGGGATCACTCTCGGCATGGACGAGCTGTACAAG" |
| "TAA" |
| ) |
|
|
| CDS_MCHERRY = ( |
| "ATGGTGAGCAAGGGCGAGGAGGATAACATGGCCATCATCAAGGAGTTCATGCGCTTCAAGGTGCACATGGAGGGCTCCGTGAACGGCCACGAGTTCGAGATCGAGGGCGAGGGCGAGGGCCGCCCCTACGAGGGCACCCAGACCGCCAAGCTGAAGGTGACCAAGGGTGGCCCCCTGCCCTTCGCCTGGGACATCCTGTCCCCTCAGTTCATGTACGGCTCCAAGGCCTACGTGAAGCACCCCGCCGACATCCCCGACTACTTGAAGCTGTCCTTCCCCGAGGGCTTCAAGTGGGAGCGCGTGATGAACTTCGAGGACGGCGGCGTGGTGACCGTGACCCAGGACTCCTCCCTGCAGGACGGCGAGTTCATCTACAAGGTGAAGCTGCGCGGCACCAACTTCCCCTCCGACGGCCCCGTAATGCAGAAGAAGACCATGGGCTGGGAGGCCTCCTCCGAGCGGATGTACCCCGAGGACGGCGCCCTGAAGGGCGAGATCAAGCAGAGGCTGAAGCTGAAGGACGGCGGCCACTACGACGCTGAGGTCAAGACCACCTACAAGGCCAAGAAGCCCGTGCAGCTGCCCGGCGCCTACAACGTCAACATCAAGTTGGACATCACCTCCCACAACGAGGACTACACCATCGTGGAACAGTACGAACGCGCCGAGGGCCGCCACTCCACCGGCGGCATGGACGAGCTGTACAAG" |
| "TAA" |
| ) |
|
|
| CDS_LUC2 = ( |
| "ATGGAAGATGCCAAAAACATTAAGAAGGGCCCAGCGCCATTCTACCCACTCGAAGACGGGAC" |
| "CGCCGGCGAGCAGCTGCACAAAGCCATGAAGCGCTACGCCCTGGTGCCCGGCACCATCGCCT" |
| "TTACCGACGCACATATCGAGGTGGACATTACCTACGCCGAGTACTTCGAGATGAGCGTTCGG" |
| "CTGGCAGAAGCTATGAAGCGCTATGGGCTGAATACAAACCATCGGATCGTGGTGTGCAGCGA" |
| "GAATAGTCTGGAGAAGATCCTGCTGAACAAAGGCCTGCCTGTAGCCGGCCTTTTCCTCCTGG" |
| "AAGAGCTGCGGCAGCAGTTCCAGAAGGCCCGGGAGCAGATGTTCACCTTCGTGCTCGATCTG" |
| "GAGGAAATGACCGCCGAAGAGGCGATTGAGAATCTGGTATTCGAGCAGTATGGAATCGACCA" |
| "TTATCTTGATAACCCACAATGCCTGCATGACCTGGTGCATCTGGAACCCCGAGGTCAATGTG" |
| "GAAGAGTTCCTGGAAAAGCTGCTGAAGGACGGTATCATCATGTTCAGCATCCATGGTTATGG" |
| "CTACATCCTGGGGCCCGGAACCAACTTCGATCTGGAGCGCATGATCAAGCGCGATGGGGAG" |
| "GTGGATATGGCCCTGATTAAGGTGTCGATGGAGCAGGCCGGCATCGACCCCGATGAGGCCGG" |
| "AGCCATTCGGCTGTACAAGCTGATGAAGGATAAG" |
| "TAA" |
| )[:900] |
| while len(CDS_LUC2) % 3 != 0: |
| CDS_LUC2 = CDS_LUC2[:-3] + "TAA" |
|
|
| CDS_SPIKE_RBD = ( |
| "ATGTTCGTGTTCCTGGTGCTGCTGCCCCTGGTGTCCTCCCAGGTGTGCAACCTGACCACCAG" |
| "AACCCAGCTGCCCCCCGCCTACACCAACTCCTTCACCCGGGGCGTGTACTACCCCGACAAGG" |
| "TGTTCCGCTCCTCCGTGCTGCACTCCACCCAGGACCTGTTTCTGCCCTTTTTCTCCAACGTG" |
| "ACCTGGTTCCACGCCATCCACGTGTCCGGCACCAACGGCACAAAGCGGTTCGACAACCCCGTG" |
| "CTGCCCTTCAACGACGGGGTGTACTTTGCCAGCACCGAGAAGTCCAACATCATCCGGGGCTG" |
| "GATCTTCGGCACCACCCTGGACTCCAAGACCCAGTCCCTGCTGATCGTGAACAACGCCACCA" |
| "ACGTGGTCATCAAGGTGTGCGAGTTCCAGTTCTGCAACGACCCCTTCCTGGGCGTCTACTAC" |
| "CACAAGAACAACAAGTCCTGGATGGAGTCCGAGTTCCGGGTGTACTCCTCCGCCAACAACTG" |
| "CACCTTTGAGTACGTGTCCCAGCCCTTTCTGATGGACCTGGAGGGCAAACAGGGCAACTTCA" |
| "AGAACCTGCGCGAGTTTGTGTTTAAGAACATCGACGGCTACTTCAAGATCTACAGCAAGCAC" |
| ) |
| CDS_SPIKE_RBD = CDS_SPIKE_RBD[:len(CDS_SPIKE_RBD) - len(CDS_SPIKE_RBD) % 3] |
| CDS_SPIKE_RBD = CDS_SPIKE_RBD[:-3] + "TAA" |
|
|
| CDS_EPO = ( |
| "ATGGGGGTGCACGAATGTCCCGCCTGGCTGTGGCTGCTGCTGTCGCTGCCGTTCTCTGTGCT" |
| "GCCCGCCCGCGCCGTCCTCACCGTCAACTTCCCGCACCCTGCTTCCACGCCTCAGAGTCCTG" |
| "GAGAGGTACCTCTTGGAGGCCAAGGAGGCCGAGAATATCACGACGGGCTGTGCTGAACACTGC" |
| "AGCTTGAATGAGAATATCACGGTGCGCTTTCCACGCCTCATTTGCGACAGCTTTGTTCGTGG" |
| "TCAGGCCGTGGTCAGCTCCGATGAGGTCTTCAGGGCCCCTGTCCTCCTGCAGCTGGAATCCT" |
| "GGCAGCGTCTCAGCCCCTGCAGCCAGCCCTCCCAGCTGCCCTCAGCCACCTGTCCCGCCTGCT" |
| "CCAGAGCCTGGAGAACTTCTACCAGCCTCTGGAGCAGCTCCAGGAAGTGATCCAGGAGATGAG" |
| "CAAGCTGTCCGCCACGGCCGTGGAGGTCTTGGCCAGTAAGCCGGAG" |
| "TAA" |
| ) |
| CDS_EPO = CDS_EPO[:len(CDS_EPO) - len(CDS_EPO) % 3] |
| if not CDS_EPO.endswith("TAA") and not CDS_EPO.endswith("TAG") and not CDS_EPO.endswith("TGA"): |
| CDS_EPO = CDS_EPO[:-3] + "TAA" |
|
|
| CDS_FIX = ( |
| "ATGCAGCGCGTGAACATGATCATGGCCTCCCTGTGGCTGTGCTTCGTGGCCCTGTGGCAGGC" |
| "TGGCAACCCCAGAGAAGTACCTGTTCAAGAACGGCGACCAGCGGCCCAACAAGGAGATCCCCA" |
| "AGAGCATCATCCTGGAGGAGTTCAAGGCCTTCTTCTCCACCTTCATCAACCGGAAGATGATCA" |
| "AGCAGACCGACAAAGACCAGGTGATCAGCCTGGGCGGCAAGGACCAGGTGCTGATCCAGATGC" |
| "AGCCCCAGGTGAGCAAGGACTTTGGCTTCAGCCTGTGCACCTGCCCCTGGGGCCACCCCAGCC" |
| "CCTGCAGCAGCACATCCTGTACTTCCTGAACCAGAAGGCCAAACAGTTCCTGCTGCAGGACGAG" |
| "AAGGTGAAGGGCATCAACCACTGCAAGGTGCGGGTGGCCCTGGAGCAGGACGGCAGCAAGGTG" |
| ) |
| CDS_FIX = CDS_FIX[:len(CDS_FIX) - len(CDS_FIX) % 3] |
| CDS_FIX = CDS_FIX[:-3] + "TAA" |
|
|
| FULL_MRNA_EGFP = UTR5_BETAGLOBIN + KOZAK_TEV + "ATG" + CDS_EGFP[3:] + UTR3_BETAGLOBIN + POLYA_120 |
| FULL_MRNA_MCHERRY = UTR5_EMCV + CDS_MCHERRY + UTR3_ALBUMIN + POLYA_60 |
|
|
| |
| CSV_DATA = [ |
| { |
| 'id': 1, |
| 'gene_name': 'eGFP-hBG-UTRs', |
| 'five_prime_utr': UTR5_BETAGLOBIN, |
| 'cds': CDS_EGFP, |
| 'three_prime_utr': UTR3_BETAGLOBIN, |
| 'poly_a_tail': POLYA_120, |
| 'full_mrna': '', |
| 'target_protein': 'Enhanced GFP', |
| 'organism': 'Aequorea victoria', |
| 'expression_system': 'HEK293T', |
| 'gc_target_percent': 52.4, |
| 'notes': 'Human beta-globin UTRs. Classic reporter construct for mRNA transfection.' |
| }, |
| { |
| 'id': 2, |
| 'gene_name': 'mCherry-AlbUTR', |
| 'five_prime_utr': UTR5_BETAGLOBIN, |
| 'cds': CDS_MCHERRY, |
| 'three_prime_utr': UTR3_ALBUMIN, |
| 'poly_a_tail': POLYA_120, |
| 'full_mrna': '', |
| 'target_protein': 'mCherry red fluorescent protein', |
| 'organism': 'Discosoma sp.', |
| 'expression_system': 'CHO', |
| 'gc_target_percent': 50.1, |
| 'notes': 'Albumin 3\'UTR for extended expression. Good cell viability.' |
| }, |
| { |
| 'id': 3, |
| 'gene_name': 'Luc2-reporter', |
| 'five_prime_utr': UTR5_EMCV, |
| 'cds': CDS_LUC2, |
| 'three_prime_utr': UTR3_ALBUMIN, |
| 'poly_a_tail': POLYA_60, |
| 'full_mrna': '', |
| 'target_protein': 'Firefly luciferase', |
| 'organism': 'Photinus pyralis', |
| 'expression_system': 'Huh-7', |
| 'gc_target_percent': 53.8, |
| 'notes': 'Bioluminescence reporter. Used for LNP screening.' |
| }, |
| { |
| 'id': 4, |
| 'gene_name': 'SpRBD-v1', |
| 'five_prime_utr': UTR5_BETAGLOBIN, |
| 'cds': CDS_SPIKE_RBD, |
| 'three_prime_utr': UTR3_ALBUMIN, |
| 'poly_a_tail': POLYA_120, |
| 'full_mrna': '', |
| 'target_protein': 'SARS-CoV-2 Spike RBD', |
| 'organism': 'SARS-CoV-2', |
| 'expression_system': 'HEK293T', |
| 'gc_target_percent': 55.2, |
| 'notes': 'Vaccine antigen candidate. Proline-stabilized RBD.' |
| }, |
| ] |
|
|
| def create_csv(): |
| output_path = os.path.join(os.path.dirname(__file__), 'mrna_sequences.csv') |
|
|
| with open(output_path, 'w', newline='') as f: |
| fieldnames = ['id', 'gene_name', 'five_prime_utr', 'cds', 'three_prime_utr', |
| 'poly_a_tail', 'full_mrna', 'target_protein', 'organism', |
| 'expression_system', 'gc_target_percent', 'notes'] |
| writer = csv.DictWriter(f, fieldnames=fieldnames) |
| writer.writeheader() |
| writer.writerows(CSV_DATA) |
|
|
| print(f'✓ Created {output_path}') |
| print(f' {len(CSV_DATA)} sequences exported') |
| return output_path |
|
|
| if __name__ == '__main__': |
| create_csv() |
|
|