| --- |
| datasets: |
| - multimolecule/bprna-spot |
| language: rna |
| library_name: multimolecule |
| license: agpl-3.0 |
| pipeline: rna-secondary-structure |
| pipeline_tag: other |
| tags: |
| - Biology |
| - RNA |
| widget: |
| - example_title: microRNA 21 |
| output: |
| text: .(((..(((((.)))))))).. |
| pipeline_tag: rna-secondary-structure |
| sequence_type: ncRNA |
| task: rna-secondary-structure |
| text: UAGCUUAUCAGACUGAUGUUGA |
| - example_title: microRNA 146a |
| output: |
| text: '......................' |
| pipeline_tag: rna-secondary-structure |
| sequence_type: ncRNA |
| task: rna-secondary-structure |
| text: UGAGAACUGAAUUCCAUGGGUU |
| - example_title: microRNA 155 |
| output: |
| text: '......((.........)).....' |
| pipeline_tag: rna-secondary-structure |
| sequence_type: ncRNA |
| task: rna-secondary-structure |
| text: UUAAUGCUAAUCGUGAUAGGGGUU |
| - example_title: RNA component of mitochondrial RNA processing endoribonuclease |
| output: |
| text: '......................(((((((........((((((....))))))........)))))))((.....)).......................(((((((..)))))))....................................(((((.....))))).................((...........))....................(((((((..)))))))..........................................' |
| pipeline_tag: rna-secondary-structure |
| sequence_type: ncRNA |
| task: rna-secondary-structure |
| text: GGUUCGUGCUGAAGGCCUGUAUCCUAGGCUACACACUGAGGACUCUGUUCCUCCCCUUUCCGCCUAGGGGAAAGUCCCCGGACCUCGGGCAGAGAGUGCCACGUGCAUACGCACGUAGACAUUCCCCGCUUCCCACUCCAAAGUCCGCCAAGAAGCGUAUCCCGCUGAGCGGCGUGGCGCGGGGGCGUCAUCCGUCAGCUCCCUCUAGUUACGCAGGCAGUGCGUGUCCGCGCACCAACCACACGGGGCUCAUUCUCAGCGCGGCUGUAAAAAAAAA |
| - example_title: 7SK small nuclear RNA |
| output: |
| text: (((((((...((((((.......((((..))))........))))))(((................)))............................................((((..........(.....)........[[[[.))))...................(((((.]]]])))))...........................(...............................................)...........................))))))).....(((((((((.....)))))..))))....... |
| pipeline_tag: rna-secondary-structure |
| sequence_type: ncRNA |
| task: rna-secondary-structure |
| text: GGAUGUGAGGGCGAUCUGGCUGCGACAUCUGUCACCCCAUUGAUCGCCAGGGUUGAUUCGGCUGAUCUGGCUGGCUAGGCGGGUGUCCCCUUCCUCCCUCACCGCUCCAUGUGCGUCCCUCCCGAAGCUGCGCGCUCGGUCGAAGAGGACGACCAUCCCCGAUAGAGGAGGACCGGUCUUCGGUCAAGGGUAUACGAGUAGCUGCGCUCCCCUGCUAGAACCUCCAAACAAGCUCUCAAGGUCCAUUUGUAGGAGAACGUAGGGUAGUCAAGCUUCCAAGACUCCAGACACAUCCAAAUGAGGCGCUGCAUGUGGCAGUCUGCCUUUCUUUU |
| - example_title: telomerase RNA component |
| output: |
| text: '......................(((((((........((((((((.........................[[[[[[.............(((.............((((((...)))))).)))...[[[[[[[......[[[.........................))))))))..................{{)))))))....(((((((((((..........<<..{{{{{{{{.................]]].......]]]]]]].[[[[..((((.....))))........................))))))))))).}}}}}}}}}}.>>.........(((((...))))).............]]]].(((((((.......]]]]]]....((..........))...........)))))))............' |
| pipeline_tag: rna-secondary-structure |
| sequence_type: ncRNA |
| task: rna-secondary-structure |
| text: GGGUUGCGGAGGGUGGGCCUGGGAGGGGUGGUGGCCAUUUUUUGUCUAACCCUAACUGAGAAGGGCGUAGGCGCCGUGCUUUUGCUCCCCGCGCGCUGUUUUUCUCGCUGACUUUCAGCGGGCGGAAAAGCCUCGGCCUGCCGCCUUCCACCGUUCAUUCUAGAGCAAACAAAAAAUGUCAGCUGCUGGCCCGUUCGCCCCUCCCGGGGACCUGCGGCGGGUCGCCUGCCCAGCCCCCGAACCCCGCCUGGAGGCCGCGGUCGGCCCGGGGCUUCUCCGGAGGCACCCACUGCCACCGCGAAGAGUUGGGCUCUGUCAGCCGCGGGUCUCUCGGGGGCGAGGGCGAGGUUCAGGCCUUUCAGGCCGCAGGAAGAGGAACGGAGCGAGUCCCCGCGCGCGGCGCGAUUCCCUGAGCUGUGGGACGUGCACCCAGGACUCGGCUCACACAUGC |
| - example_title: vault RNA 2-1 |
| output: |
| text: .(((((((....(((....((.(...............((((.........))))...............(...)...).))....)))..))))))).......... |
| pipeline_tag: rna-secondary-structure |
| sequence_type: ncRNA |
| task: rna-secondary-structure |
| text: CGGGUCGGAGUUAGCUCAAGCGGUUACCUCCUCAUGCCGGACUUUCUAUCUGUCCAUCUCUGUGCUGGGGUUCGAGACCCGCGGGUGCUUACUGACCCUUUUAUGCAA |
| - example_title: brain cytoplasmic RNA 1 |
| output: |
| text: '.............(((....))).....((.....((..[[[........))..{{{))....((((...(((((((.........]]].))))))).....)))).....}}}......................................................................................' |
| pipeline_tag: rna-secondary-structure |
| sequence_type: ncRNA |
| task: rna-secondary-structure |
| text: GGCCGGGCGCGGUGGCUCACGCCUGUAAUCCCAGCUCUCAGGGAGGCUAAGAGGCGGGAGGAUAGCUUGAGCCCAGGAGUUCGAGACCUGCCUGGGCAAUAUAGCGAGACCCCGUUCUCCAGAAAAAGGAAAAAAAAAAACAAAAGACAAAAAAAAAAUAAGCGUAACUUCCCUCAAAGCAACAACCCCCCCCCCCCUUU |
| - example_title: HIV-1 TAR-WT |
| output: |
| text: (((..((((((((((.(((((...(((((...).))))))))))))))))))).))) |
| pipeline_tag: rna-secondary-structure |
| sequence_type: ncRNA |
| task: rna-secondary-structure |
| text: GGUCUCUCUGGUUAGACCAGAUCUGAGCCUGGGAGCUCUCUGGCUAACUAGGGAACC |
| - example_title: prion protein (Kanno blood group) |
| output: |
| text: '...(.........((...........[[[.[[...[[[[))]]]]...]]..]]]..........)' |
| pipeline_tag: rna-secondary-structure |
| sequence_type: mRNA |
| task: rna-secondary-structure |
| text: AUGGCGAACCUUGGCUGCUGGAUGCUGGUUCUCUUUGUGGCCACAUGGAGUGACCUGGGCCUCUGC |
| - example_title: interleukin 10 |
| output: |
| text: '..........................((((((([[...)))))))...]]....' |
| pipeline_tag: rna-secondary-structure |
| sequence_type: mRNA |
| task: rna-secondary-structure |
| text: AUGCACAGCUCAGCACUGCUCUGUUGCCUGGUCCUCCUGACUGGGGUGAGGGCC |
| - example_title: Zaire ebolavirus |
| output: |
| text: '.....................(((((((............(((((............)))))...........[[)))))))..]]..................((((....(((((.[[[..........)))))..................(((((.................))))).....................................................................]]]..........................)))).......................' |
| pipeline_tag: rna-secondary-structure |
| sequence_type: mRNA |
| task: rna-secondary-structure |
| text: AAUGUUCAAACACUUUGUGAAGCUCUGUUAGCUGAUGGUCUUGCUAAAGCAUUUCCUAGCAAUAUGAUGGUAGUCACAGAGCGUGAGCAAAAAGAAAGCUUAUUGCAUCAAGCAUCAUGGCACCACACAAGUGAUGAUUUUGGUGAGCAUGCCACAGUUAGAGGGAGUAGCUUUGUAACUGAUUUAGAGAAAUACAAUCUUGCAUUUAGAUAUGAGUUUACAGCACCUUUUAUAGAAUAUUGUAACCGUUGCUAUGGUGUUAAGAAUGUUUUUAAUUGGAUGCAUUAUACAAUCCCACAGUGUUAU |
| - example_title: SARS coronavirus |
| output: |
| text: '.................................((((((....(((......)))..))))))..............................................................((((((.................................))))))............................((((((((((..((............)).[)).)))))))).]..............................................(((((((((((((...............))))))))))))).(((((.....))))).......(((((...........)))))((((...................)))).......................................................((((((((((((((((..)))))))))))))))).......' |
| pipeline_tag: rna-secondary-structure |
| sequence_type: mRNA |
| task: rna-secondary-structure |
| text: AUGUUUAUUUUCUUAUUAUUUCUUACUCUCACUAGUGGUAGUGACCUUGACCGGUGCACCACUUUUGAUGAUGUUCAAGCUCCUAAUUACACUCAACAUACUUCAUCUAUGAGGGGGGUUUACUAUCCUGAUGAAAUUUUUAGAUCAGACACUCUUUAUUUAACUCAGGAUUUAUUUCUUCCAUUUUAUUCUAAUGUUACAGGGUUUCAUACUAUUAAUCAUACGUUUGACAACCCUGUCAUACCUUUUAAGGAUGGUAUUUAUUUUGCUGCCACAGAGAAAUCAAAUGUUGUCCGUGGUUGGGUUUUUGGUUCUACCAUGAACAACAAGUCACAGUCGGUGAUUAUUAUUAACAAUUCUACUAAUGUUGUUAUACGAGCAUGUAACUUUGAAUUGUGUGACAACCCUUUCUUUGCUGUUUCUAAACCCAUGGGUACACAGACACAUACUAUGAUAUUCGAUAAUGCAUUUAAAUGCACUUUCGAGUACAUAUCU |
| - example_title: insulin |
| output: |
| text: '..............((((..[[[[[[[......))))..........(((((.......)))))(((([......................)))).........(((((.......)))))......(((((............................)))))................].......]]]]]]].......................((((((.))))))(.......((.........))...).((((.....((((((((...))))))))...[[[[[.))))............]]]]].................' |
| pipeline_tag: rna-secondary-structure |
| sequence_type: mRNA |
| task: rna-secondary-structure |
| text: AUGGCCCUGUGGAUGCGCCUCCUGCCCCUGCUGGCGCUGCUGGCCCUCUGGGGACCUGACCCAGCCGCAGCCUUUGUGAACCAACACCUGUGCGGCUCACACCUGGUGGAAGCUCUCUACCUAGUGUGCGGGGAACGAGGCUUCUUCUACACACCCAAGACCCGCCGGGAGGCAGAGGACCUGCAGGUGGGGCAGGUGGAGCUGGGCGGGGGCCCUGGUGCAGGCAGCCUGCAGCCCUUGGCCCUGGAGGGGUCCCUGCAGAAGCGUGGCAUUGUGGAACAAUGCUGUACCAGCAUCUGCUCCCUCUACCAGCUGGAGAACUACUGCAACUAG |
| - example_title: cyclin dependent kinase inhibitor 2A |
| output: |
| text: '....................(((([[[[((((...)))).....))))...................(((((((......((((((...................[[[[[[[[[[.)))))).........(((.......)))................]]]]]]]]]]....(((((..................(((((((.........))))))).....)))))....))))))).........................................((........)).......................................................(((((....)))))...........(((.........(((...((((((..((((..................]]]].)))).......))))))..)))........)))...........' |
| pipeline_tag: rna-secondary-structure |
| sequence_type: mRNA |
| task: rna-secondary-structure |
| text: AUGGAGCCGGCGGCGGGGAGCAGCAUGGAGCCUUCGGCUGACUGGCUGGCCACGGCCGCGGCCCGGGGUCGGGUAGAGGAGGUGCGGGCGCUGCUGGAGGCGGGGGCGCUGCCCAACGCACCGAAUAGUUACGGUCGGAGGCCGAUCCAGGUCAUGAUGAUGGGCAGCGCCCGAGUGGCGGAGCUGCUGCUGCUCCACGGCGCGGAGCCCAACUGCGCCGACCCCGCCACUCUCACCCGACCCGUGCACGACGCUGCCCGGGAGGGCUUCCUGGACACGCUGGUGGUGCUGCACCGGGCCGGGGCGCGGCUGGACGUGCGCGAUGCCUGGGGCCGUCUGCCCGUGGACCUGGCUGAGGAGCUGGGCCAUCGCGAUGUCGCACGGUACCUGCGCGCGGCUGCGGGGGGCACCAGAGGCAGUAACCAUGCCCGCAUAGAUGCCGCGGAAGGUCCCUCAGACAUCCCCGAUUGA |
| - example_title: human papillomavirus type 16 E6 |
| output: |
| text: ..(((((...................[[[[[[[..[[[[.....................................{{{......((({{{{....[[[[(((((((((.........................................{{{{{{{{{{{{{..........(.(...................{{{{.......).)....................)))))))))..........))).........]]]].......}}}}....................(((((.....))))).......[[[[[...))))).]]]]]............]]]]]]]]]]].}}}}}}}}}}}}}.....................................................}}}}..(((((..}}}......)))))........................ |
| pipeline_tag: rna-secondary-structure |
| sequence_type: mRNA |
| task: rna-secondary-structure |
| text: AUGCACCAAAAGAGAACUGCAAUGUUUCAGGACCCACAGGAGCGACCCAGAAAGUUACCACAGUUAUGCACAGAGCUGCAAACAACUAUACAUGAUAUAAUAUUAGAAUGUGUGUACUGCAAGCAACAGUUACUGCGACGUGAGGUAUAUGACUUUGCUUUUCGGGAUUUAUGCAUAGUAUAUAGAGAUGGGAAUCCAUAUGCUGUAUGUGAUAAAUGUUUAAAGUUUUAUUCUAAAAUUAGUGAGUAUAGACAUUAUUGUUAUAGUUUGUAUGGAACAACAUUAGAACAGCAAUACAACAAACCGUUGUGUGAUUUGUUAAUUAGGUGUAUUAACUGUCAAAAGCCACUGUGUCCUGAAGAAAAGCAAAGACAUCUGGACAAAAAGCAAAGAUUCCAUAAUAUAAGGGGUCGGUGGACCGGUCGAUGUAUGUCUUGUUGCAGAUCAUCAAGAACACGUAGAGAAACCCAGCUGUAA |
| - example_title: NRAS proto-oncogene |
| output: |
| text: '.............................................(((((...............))))).............(((((.....)))))......(((((.....)))))............' |
| pipeline_tag: rna-secondary-structure |
| sequence_type: 5' UTR |
| task: rna-secondary-structure |
| text: GGGGCCGGAAGUGCCGCUCCUUGGUGGGGGCUGUUCAUGGCGGUUCCGGGGUCUCCAACAUUUUUCCCGGCUGUGGUCCUAAAUCUGUCCAAAGCAGAGGCAGUGGAGCUUGAGGUUCUUGCUGGUGUGAA |
| - example_title: amyloid beta precursor protein |
| output: |
| text: '.............................((((((......))))))((((((................................................................................))))))...........' |
| pipeline_tag: rna-secondary-structure |
| sequence_type: 5' UTR |
| task: rna-secondary-structure |
| text: GUCAGUUUCCUCGGCAGCGGUAGGCGAGAGCACGCGGAGGAGCGUGCGCGGGGGCCCCGGGAGACGGCGGCGGUGGCGGCGCGGGCAGAGCAAGGACGCGGCGGAUCCCACUCGCACAGCAGCGCACUCGGUGCCCCGCGCAGGGUCGCG |
| - example_title: RUNX family transcription factor 1 |
| output: |
| text: ((((((.........................((....(((((((..............((....))..)))))))...[[))........(...............)..((((((((]]..))))).))).............................((((((....))))))............)))))). |
| pipeline_tag: rna-secondary-structure |
| sequence_type: 5' UTR |
| task: rna-secondary-structure |
| text: ACUUCUUUGGGCCUCAUAAACAACCACAGAACCACAAGUUGGGUAGCCUGGCAGUGUCAGAAGUCUGAACCCAGCAUAGUGGUCAGCAGGCAGGACGAAUCACACUGAAUGCAAACCACAGGGUUUCGCAGCGUGGUAAAAGAAAUCAUUGAGUCCCCCGCCUUCAGAAGAGGGUGCAUUUUCAGGAGGAAGCG |
| - example_title: fragile X messenger ribonucleoprotein 1 |
| output: |
| text: '..............................((((.....))))....(((((........)))))....................((((((((............))))))))..........(......)...............................................................((......[[[[[))..(((((.............)))))....]]]]]..................' |
| pipeline_tag: rna-secondary-structure |
| sequence_type: 5' UTR |
| task: rna-secondary-structure |
| text: CUCAGUCAGGCGCUCAGCUCCGUUUCGGUUUCACUUCCGGUGGAGGGCCGCCUCUGAGCGGGCGGCGGGCCGACGGCGAGCGCGGGCGGCGGCGGUGACGGAGGCGCCGCUGCCAGGGGGCGUGCGGCAGCGCGGCGGCGGCGGCGGCGGCGGCGGCGGCGGAGGCGGCGGCGGCGGCGGCGGCGGCGGCGGCUGGGCCUCGAGCGCCCGCAGCCCACCUCUCGGGGGCGGGCUCCCGGCGCUAGCAGGGCUGAAGAGAAG |
| - example_title: MYC proto-oncogene |
| output: |
| text: ..(((((((.........))))))).......................(((((............))))).............(((((((((.(((((.....))))).)))))))))......(((((.............)))))....................................(...................................................................)((((................(((((((............(....)........))))))).[[[[[..........................))))]]]]].......... |
| pipeline_tag: rna-secondary-structure |
| sequence_type: 5' UTR |
| task: rna-secondary-structure |
| text: AACUCGCUGUAGUAAUUCCAGCGAGAGGCAGAGGGAGCGAGCGGGCGGCCGGCUAGGGUGGAAGAGCCGGGCGAGCAGAGCUGCGCUGCGGGCGUCCUGGGAAGGGAGAUCCGGAGCGAAUAGGGGGCUUCGCCUCUGGCCCAGCCCUCCCGCUGAUCCCCCAGCCAGCGGUCCGCAACCCUUGCCGCAUCCACGAAACUUUGCCCAUAGCAGCGGGCGGGCACUUUGCACUGGAACUUACAACACCCGAGCAAGGACGCGACUCUCCCGACGCGGGGAGGCUAUUCUGCCCAUUUGGGGACACUUCCCCGCCGCUGCCAGGACCCGCUUCUCUGAAAGGCUCUCCUUGCAGCUGCUUAGACG |
| - example_title: activating transcription factor 4 |
| output: |
| text: '........((((...........................(((....)))...................................((((((........................................................(.....)........................((..))))))))................................................................................)))).........' |
| pipeline_tag: rna-secondary-structure |
| sequence_type: 5' UTR |
| task: rna-secondary-structure |
| text: CAUUUCUACUUUGCCCGCCCACAGAUGUAGUUUUCUCUGCGCGUGUGCGUUUUCCCUCCUCCCCGCCCUCAGGGUCCACGGCCACCAUGGCGUAUUAGGGGCAGCAGUGCCUGCGGCAGCAUUGGCCUUUGCAGCGGCGGCAGCAGCACCAGGCUCUGCAGCGGCAACCCCCAGCGGCUUAAGCCAUGGCGCUUCUCACGGCAUUCAGCAGCAGCGUUGCUGUAACCGACAAAGACACCUUCGAAUUAAGCACAUUCCUCGAUUCCAGCAAAGCACCGCAAC |
| - example_title: Human GPI protein p137 |
| output: |
| text: '..............(((((.........................(((((..[[......)))))..[[[[[[[[.............(((((((((((...)))))))))))..........................((((..................))))........)))))......]]]]]]]]..........]]........(((......................))).............................................................' |
| pipeline_tag: rna-secondary-structure |
| sequence_type: 3' UTR |
| task: rna-secondary-structure |
| text: UUUUUAAAAGGAAAAGAUACCAAAUGCCUGCUGCUACCACCCUUUUCAAUUGCUAUGUUUUGAAAGGCACCAGUAUGUGUUUUAGAUUGAUUUAAAUGUUUCAUUUAAAUCACGGACAGUAGUUUCAGUUCUGAUGGUAUAAGCAAAACAAAUAAAACGUUUAUAAAAGUUGUAUCUUGAAACACUGGUGUUCAACAGCUAGCAGCUUAUGUGAUUCACCCCAUGCCACGUUAGUGUCACAAAUUUUAUGGUUUAUCUCCAGCAACAUUUCUCUAGUACUUGCACUUAUUAUCUGAAUUC |
| - example_title: nucleophosmin 1 |
| output: |
| text: ((([[[[[.............[[[[......))).....((((((((........(((...........))).............................((((.{{{{{.(((((.....................................................................................[...........[...................................))))).............))))............]...))))))))...]........]]]]........}}}}}...]]]]].. |
| pipeline_tag: rna-secondary-structure |
| sequence_type: 3' UTR |
| task: rna-secondary-structure |
| text: GAAAAUAGUUUAAACAAUUUGUUAAAAAAUUUUCCGUCUUAUUUCAUUUCUGUAACAGUUGAUAUCUGGCUGUCCUUUUUAUAAUGCAGAGUGAGAACUUUCCCUACCGUGUUUGAUAAAUGUUGUCCAGGUUCUAUUGCCAAGAAUGUGUUGUCCAAAAUGCCUGUUUAGUUUUUAAAGAUGGAACUCCACCCUUUGCUUGGUUUUAAGUAUGUAUGGAAUGUUAUGAUAGGACAUAGUAGUAGCGGUGGUCAGACAUGGAAAUGGUGGGGAGACAAAAAUAUACAUGUGAAAUAAAACUCAGUAUUUUAAUAAAGUAGCACGGUUUCUAUUGA |
| - example_title: superoxide dismutase 1 |
| output: |
| text: '......((((((((...))))))))........(((((((((..................(((((..........)))))..........(((((..(((((....................................................((.....)))))))[[))))).......((((...[)))).................................................)))))))))................................(((((.........................(..]..))))))...........]]..............' |
| pipeline_tag: rna-secondary-structure |
| sequence_type: 3' UTR |
| task: rna-secondary-structure |
| text: ACAUUCCCUUGGAUGUAGUCUGAGGCCCCUUAACUCAUCUGUUAUCCUGCUAGCUGUAGAAAUGUAUCCUGAUAAACAUUAAACACUGUAAUCUUAAAAGUGUAAUUGUGUGACUUUUUCAGAGUUGCUUUAAAGUACCUGUAGUGAGAAACUGAUUUAUGAUCACUUGGAAGAUUUGUAUAGUUUUAUAAAACUCAGUUAAAAUGUCUGUUUCAAUGACCUGUAUUUUGCCAGACUUAAAUCACAGAUGGGUAUUAAACUUGUCAGAAUUUCUUUGUCAUUCAAGCCUGUGAAUAAAAACCCUGUAUGGCACUUAUUAUGAGGCUAUUAAAAGAAUCCAAAUUCAAACUAAA |
| - example_title: hemoglobin subunit alpha 2 |
| output: |
| text: '.......(((.(.....................(((((........)))))............................................).))).........' |
| pipeline_tag: rna-secondary-structure |
| sequence_type: 3' UTR |
| task: rna-secondary-structure |
| text: CUGGAGCCUCGGUAGCCGUUCCUCCUGCCCGCUGGGCCUCCCAACGGGCCCUCCUCCCCUCCUUGCACCGGCCCUUCCUGGUCUUUGAAUAAAGUCUGAGUGGGCAGCA |
| - example_title: BRAF proto-oncogene |
| output: |
| text: '........................((((((.[[[[[[[[[[..........(((((((........))))))).............))))))..................................................((.......................))..((((((((((((.......))))))).)))))............................((((((((...)))))))).................................................................(..]]]]]]]]]])((................(((((((((......))))))))).....((((((....)))))).......))..................' |
| pipeline_tag: rna-secondary-structure |
| sequence_type: 3' UTR |
| task: rna-secondary-structure |
| text: AACAAAUGAGUGAGAGAGUUCAGGAGAGUAGCAACAAAAGGAAAAUAAAUGAACAUAUGUUUGCUUAUAUGUUAAAUUGAAUAAAAUACUCUCUUUUUUUUUAAGGUGAACCAAAGAACACUUGUGUGGUUAAAGACUAGAUAUAAUUUUUCCCCAAACUAAAAUUUAUACUUAACAUUGGAUUUUUAACAUCCAAGGGUUAAAAUACAUAGACAUUGCUAAAAAUUGGCAGAGCCUCUUCUAGAGGCUUUACUUUCUGUUCCGGGUUUGUAUCAUUCACUUGGUUAUUUUAAGUAGUAAACUUCAGUUUCUCAUGCAACUUUUGUUGCCAGCUAUCACAUGUCCACUAGGGACUCCAGAAGAAGACCCUACCUAUGCCUGUGUUUGCAGGUGAGAAGUUGGCAGUCGGUUAGCCUGGG |
| - example_title: H3 clustered histone 1 |
| output: |
| text: '.....(((.......................(((((((..)))))))..)))......' |
| pipeline_tag: rna-secondary-structure |
| sequence_type: 3' UTR |
| task: rna-secondary-structure |
| text: UUACUGUGGUCUCUCUGACGGUCCAAGCAAAGGCUCUUUUCAGAGCCACCACCUUUUC |
| --- |
| |
| # SPOT-RNA |
|
|
| Pre-trained model for RNA secondary structure prediction using two-dimensional deep neural networks and transfer learning. |
|
|
| ## Disclaimer |
|
|
| This is an UNOFFICIAL implementation of the [RNA secondary structure prediction using an ensemble of two-dimensional deep neural networks and transfer learning](https://doi.org/10.1038/s41467-019-13395-9) by Jaswinder Singh, et al. |
|
|
| The OFFICIAL repository of SPOT-RNA is at [jaswindersingh2/SPOT-RNA](https://github.com/jaswindersingh2/SPOT-RNA). |
|
|
| > [!TIP] |
| > The MultiMolecule team has confirmed that the provided model and checkpoints are producing the same intermediate representations as the original implementation. |
|
|
| **The team releasing SPOT-RNA did not write this model card for this model so this model card has been written by the MultiMolecule team.** |
|
|
| ## Model Details |
|
|
| SPOT-RNA is a 2D convolutional neural network for predicting RNA secondary structure (base-pair contact maps) from single RNA sequences. It predicts both canonical (Watson-Crick and wobble) and non-canonical base pairs, including pseudoknots and other tertiary interactions. |
|
|
| The model uses: |
|
|
| - pairwise representation: outer concatenation of canonical nucleotide features into an `L x L x 8` feature matrix. |
| - convolutional blocks: 2D residual convolution blocks with LayerNorm, dropout, and checkpoint-matched ReLU/ELU activations. |
| - architecture paths: checkpoint-matched 2D-BLSTM or dilated-convolution paths where used by the released predictor. |
| - training strategy: transfer learning from bpRNA to high-resolution PDB RNA structures. |
|
|
| MultiMolecule provides SPOT-RNA as a single checkpoint, [`multimolecule/spotrna`](https://huggingface.co/multimolecule/spotrna). |
|
|
| ### Model Specification |
|
|
| | Num Parameters (M) | FLOPs (G) | MACs (G) | |
| | ------------------ | --------- | -------- | |
| | 17.46 | 8642.10 | 4302.16 | |
|
|
| ### Links |
|
|
| - **Code**: [multimolecule.spotrna](https://github.com/DLS5-Omics/multimolecule/tree/master/multimolecule/models/spotrna) |
| - **Weights**: [multimolecule/spotrna](https://huggingface.co/multimolecule/spotrna) |
| - **Data**: [multimolecule/bprna-spot](https://huggingface.co/datasets/multimolecule/bprna-spot) |
| - **Paper**: [RNA secondary structure prediction using an ensemble of two-dimensional deep neural networks and transfer learning](https://doi.org/10.1038/s41467-019-13395-9) |
| - **Developed by**: Jaswinder Singh, Jack Hanson, Kuldip Paliwal, Yaoqi Zhou |
| - **Original Repository**: [jaswindersingh2/SPOT-RNA](https://github.com/jaswindersingh2/SPOT-RNA) |
|
|
| ## Usage |
|
|
| The model file depends on the [`multimolecule`](https://multimolecule.danling.org) library. You can install it using pip: |
|
|
| ```bash |
| pip install multimolecule |
| ``` |
|
|
| ### Direct Use |
|
|
| #### RNA Secondary Structure Pipeline |
|
|
| You can use SPOT-RNA directly with the MultiMolecule secondary-structure pipeline: |
|
|
| ```python |
| import multimolecule # you must import multimolecule to register models |
| from transformers import pipeline |
| |
| predictor = pipeline("rna-secondary-structure", model="multimolecule/spotrna") |
| output = predictor("GGGCUAUUAGCUCAGUUGGUUAGAGCGCACCCCUGAUAAGGGUGAGGUCGCUGAUUCGAAUUCAGCAUAGCUCA") |
| ``` |
|
|
| #### PyTorch Inference |
|
|
| Here is how to use this model to predict RNA secondary structure in PyTorch: |
|
|
| ```python |
| import torch |
| from multimolecule import RnaTokenizer, SpotRnaModel |
| |
| tokenizer = RnaTokenizer.from_pretrained("multimolecule/spotrna") |
| model = SpotRnaModel.from_pretrained("multimolecule/spotrna") |
| |
| sequence = "GGGCUAUUAGCUCAGUUGGUUAGAGCGCACCCCUGAUAAGGGUGAGGUCGCUGAUUCGAAUUCAGCAUAGCUCA" |
| input = tokenizer(sequence, return_tensors="pt") |
| |
| output = model(**input) |
| contact_map = output.contact_map # (1, L, L) base-pair probability matrix |
| ``` |
|
|
| ## Training Details |
|
|
| SPOT-RNA was trained using a two-stage transfer learning approach on RNA secondary structure prediction. |
|
|
| ### Training Data |
|
|
| - initial training source: bpRNA-1m (Version 1.0) with 102,348 annotated RNAs. |
| - initial training filtering: CD-HIT-EST at 80% sequence identity, removal of RNAs with PDB structures, and maximum sequence length of 500 nucleotides. |
| - initial training corpus: 13,419 RNAs after preprocessing. |
| - initial training split: TR0 = 10,814, VL0 = 1,300, TS0 = 1,305. |
| - transfer-learning source: high-resolution PDB RNAs downloaded on March 2, 2019. |
| - transfer-learning filtering: resolution better than 3.5 A and CD-HIT-EST at 80% sequence identity. |
| - transfer-learning corpus: 226 nonredundant RNAs after preprocessing. |
| - transfer-learning split before homology filtering: TR1 = 120, VL1 = 30, TS1 = 76. |
| - additional TS1 filtering: CD-HIT-EST against the training data at 80% identity, followed by BLAST-N against TR0 and TR1 with e-value cutoff 10. |
| - final TS1 benchmark: 67 RNAs. |
| - additional evaluation set: TS2 = 39 NMR-solved RNAs selected from 641 candidates after CD-HIT-EST filtering at 80% identity and BLAST-N filtering against TR0, TR1, and TS1. |
| - use of TS2: post-training evaluation only. |
|
|
| ### Training Procedure |
|
|
| #### Preprocessing |
|
|
| - input representation: one-hot `L x 4` matrix following the MultiMolecule tokenizer order. |
| - missing-value handling: invalid or missing residues encoded as `-1` in the original TensorFlow implementation before one-hot conversion. |
| - pairwise features: outer concatenation from `L x 4` to `L x L x 8`. |
| - input normalization: standardization to zero mean and unit variance using training-set statistics. |
| - structure labels: extracted from PDB coordinates with DSSR. |
| - reference NMR model: model 1. |
| - pseudoknot and motif definitions: bpRNA definitions from the paper. |
| - unknown-token handling: `N` tokens are excluded from the canonical four-base features before pairwise feature construction. |
|
|
| #### Pre-training |
|
|
| The paper states that training was run on Nvidia GTX TITAN X GPUs. |
|
|
| - training split: TR0. |
| - validation split: VL0. |
| - optimizer: Adam. |
| - regularization: 25% dropout before convolution layers and 50% dropout in hidden fully connected layers. |
| - hyperparameter search over `N_A`: 16 to 32 residual blocks. |
| - hyperparameter search over `D_RES`: 32 to 72 convolution channels. |
| - hyperparameter search over `D_BL`: 128 to 256 2D-BLSTM hidden units per direction. |
| - hyperparameter search over `N_B`: 0 to 4 fully connected blocks. |
| - hyperparameter search over `D_FC`: 256 to 512 fully connected hidden units. |
| - model selection: validation-performance model selection described in the paper. |
|
|
| #### Transfer Learning |
|
|
| The pretrained TR0 models were retrained on TR1 with the same architecture and optimization settings. |
|
|
| - initialization: start from the TR0-trained models. |
| - training split: TR1. |
| - validation split: VL1. |
| - frozen layers: none; all weights were updated. |
| - architecture and optimization settings: same as the TS0-trained models. |
| - model selection: validation-performance model selection described in the paper. |
| - decision rule: a single probability threshold chosen to optimize validation performance. |
|
|
| ## Citation |
|
|
| ```bibtex |
| @article{singh2019rna, |
| title = "{RNA} secondary structure prediction using an ensemble of two-dimensional deep neural networks and transfer learning", |
| author = "Singh, Jaswinder and Hanson, Jack and Paliwal, Kuldip and Zhou, Yaoqi", |
| journal = "Nature Communications", |
| doi = "10.1038/s41467-019-13395-9", |
| publisher = "Springer Science and Business Media LLC", |
| url = "https://doi.org/10.1038/s41467-019-13395-9", |
| volume = 10, |
| number = 1, |
| pages = "5407", |
| month = nov, |
| year = 2019, |
| copyright = "https://creativecommons.org/licenses/by/4.0", |
| language = "en" |
| } |
| ``` |
|
|
| > [!NOTE] |
| > The artifacts distributed in this repository are part of the MultiMolecule project. |
| > If you use MultiMolecule in your research, you must cite the MultiMolecule project as follows: |
|
|
| ```bibtex |
| @software{chen_2024_12638419, |
| author = {Chen, Zhiyuan and Zhu, Sophia Y.}, |
| title = {MultiMolecule}, |
| doi = {10.5281/zenodo.12638419}, |
| publisher = {Zenodo}, |
| url = {https://doi.org/10.5281/zenodo.12638419}, |
| year = 2024, |
| month = may, |
| day = 4 |
| } |
| ``` |
|
|
| ## Contact |
|
|
| Please use GitHub issues of [MultiMolecule](https://github.com/DLS5-Omics/multimolecule/issues) for any questions or comments on the model card. |
|
|
| Please contact the authors of the [SPOT-RNA paper](https://doi.org/10.1038/s41467-019-13395-9) for questions or comments on the paper/model. |
|
|
| ## License |
|
|
| This model is licensed under the [GNU Affero General Public License](license.md). |
|
|
| For additional terms and clarifications, please refer to our [License FAQ](license-faq.md). |
|
|
| ```spdx |
| SPDX-License-Identifier: AGPL-3.0-or-later |
| ``` |