File size: 2,871 Bytes
a22388d
 
 
08f6bb3
8de6892
a22388d
143f377
ea0ff0f
eac4f0d
ea0ff0f
 
 
08f6bb3
c4b8905
 
 
ba82a11
7793b11
 
 
 
 
 
143f377
ba82a11
02b1613
 
c0ce260
02b1613
ea0ff0f
da2e179
c3b15fe
143f377
a22388d
ea0ff0f
a22388d
5b5d366
74c63a3
6c288a5
74c63a3
f28a0df
a88f223
74c63a3
c7d800a
 
 
 
 
5b5d366
 
 
 
ea0ff0f
 
c0ce260
ea0ff0f
aae1a61
 
74c63a3
b1ca59a
74c63a3
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import gradio as gr
import subprocess as sp
from sincfold.utils import ct2dot
import shutil 
import os

def fold(sequence):
    sequence = sequence.upper().replace(" ", "").replace("\n", "").replace("T", "U")
    if len(sequence) > 512:
        return "", "Max sequence length is 512", "", ""
    if len(sequence) < 3:
        return "", "Min sequence length is 3", "", ""
    shutil.rmtree("output", ignore_errors=True)
    ct_file = f"output/pred_id.ct"
    structure_draw = f"output/pred_id.png"
    fasta_file = f"output/pred_id.fasta"
    
    resolution = 1
    if len(sequence)>100:
        resolution = 10
    elif len(sequence)>50:
        resolution = 5
    
    sp.run(f"sincFold pred {sequence} -o output --draw --draw-resolution {resolution}", shell=True)

    dotbracket = ct2dot(ct_file)
    if not dotbracket:
        dotbracket = None
        structure_draw = None
        dotbracket = "CT to dotbracket conversion failed (.ct file should be ok)"

    with open(fasta_file, "w") as f:
        f.write(f">seq\n{sequence}\n{dotbracket}")

    return ct_file, dotbracket, structure_draw, fasta_file  


def run():
    
    demo = gr.Interface(
        title='sincFold: an end-to-end deep learning approach for RNA structure prediction',
        description='sincFold is an end-to-end deep learning approach that predicts the nucleotides contact matrix using only the RNA sequence as input. This demo provides a sincFold model pre-trained with known RNAs secondary structures. Input RNA sequences of up to 512 nucleotides can be processed with this demo. To process sequences longer than 512 nucleotides you can either install the package (via pip) or clone the entire repository.\n\nWebdemo: https://sinc.unl.edu.ar/web-demo/sincFold/\nSource-code: https://github.com/sinc-lab/sincFold.\nPaper: https://academic.oup.com/bib/article/25/4/bbae271/7690295',
        fn=fold,
        examples=[
            ["AACCGGGUCAGGUCCGGAAGGAAGCAGCCCUAA"],
            ["CCACGGCGACUAUAUCCCUGGUGUUCACCUCUUCCCAUUCCGAACAGAGUCGUUAAGCCCAGGAGAGCCGAUGGUACUGCUUUAUUGCGGGAGAGUAGGUCGUCGCCGAGU"],
            ["GAUAAACCUUUAGCAAUAAACGAAAGUUUAACUAAGCCAUACUAACCCCAGGGUUGGUCAAUUUCGUGCCAGCCACCGCGGUCACACGAUUAACCCAAGCCAAUAGAAAUCGGCGUAAAGAGUGUUUUAGAUCAAUCCCCCAAUAAAGCUAAAAUUCACCUG"],
            ],
        inputs=[
            gr.Textbox(label="RNA Sequence",
                        value="GAUAAACCUUUAGCAAUAAACGAAAGUUUAACUAAGCCAUACUAACCCCAGGGUUGGUCAAUUUCGUGCCAGCCACCGCGGUCACACGAUUAACCCAAGCCAAUAGAAAUCGGCGUAAAGAGUGUUUUAGAUCAAUCCCCCAAUAAAGCUAAAAUUCACCUG")], 
        outputs=[
            gr.File(label="Model output"),
            gr.Textbox(label="Dot-bracket"),
            gr.Image(label="Structure"),
            gr.File(label="Fasta format"), 
            ],
    allow_flagging="never")

    demo.launch(server_name="0.0.0.0", server_port=7860)


if __name__ == "__main__":
    run()