sincFold / app.py
lbugnon's picture
Update app.py
a88f223 verified
import gradio as gr
import subprocess as sp
from sincfold.utils import ct2dot
import shutil
import os
def fold(sequence):
sequence = sequence.upper().replace(" ", "").replace("\n", "").replace("T", "U")
if len(sequence) > 512:
return "", "Max sequence length is 512", "", ""
if len(sequence) < 3:
return "", "Min sequence length is 3", "", ""
shutil.rmtree("output", ignore_errors=True)
ct_file = f"output/pred_id.ct"
structure_draw = f"output/pred_id.png"
fasta_file = f"output/pred_id.fasta"
resolution = 1
if len(sequence)>100:
resolution = 10
elif len(sequence)>50:
resolution = 5
sp.run(f"sincFold pred {sequence} -o output --draw --draw-resolution {resolution}", shell=True)
dotbracket = ct2dot(ct_file)
if not dotbracket:
dotbracket = None
structure_draw = None
dotbracket = "CT to dotbracket conversion failed (.ct file should be ok)"
with open(fasta_file, "w") as f:
f.write(f">seq\n{sequence}\n{dotbracket}")
return ct_file, dotbracket, structure_draw, fasta_file
def run():
demo = gr.Interface(
title='sincFold: an end-to-end deep learning approach for RNA structure prediction',
description='sincFold is an end-to-end deep learning approach that predicts the nucleotides contact matrix using only the RNA sequence as input. This demo provides a sincFold model pre-trained with known RNAs secondary structures. Input RNA sequences of up to 512 nucleotides can be processed with this demo. To process sequences longer than 512 nucleotides you can either install the package (via pip) or clone the entire repository.\n\nWebdemo: https://sinc.unl.edu.ar/web-demo/sincFold/\nSource-code: https://github.com/sinc-lab/sincFold.\nPaper: https://academic.oup.com/bib/article/25/4/bbae271/7690295',
fn=fold,
examples=[
["AACCGGGUCAGGUCCGGAAGGAAGCAGCCCUAA"],
["CCACGGCGACUAUAUCCCUGGUGUUCACCUCUUCCCAUUCCGAACAGAGUCGUUAAGCCCAGGAGAGCCGAUGGUACUGCUUUAUUGCGGGAGAGUAGGUCGUCGCCGAGU"],
["GAUAAACCUUUAGCAAUAAACGAAAGUUUAACUAAGCCAUACUAACCCCAGGGUUGGUCAAUUUCGUGCCAGCCACCGCGGUCACACGAUUAACCCAAGCCAAUAGAAAUCGGCGUAAAGAGUGUUUUAGAUCAAUCCCCCAAUAAAGCUAAAAUUCACCUG"],
],
inputs=[
gr.Textbox(label="RNA Sequence",
value="GAUAAACCUUUAGCAAUAAACGAAAGUUUAACUAAGCCAUACUAACCCCAGGGUUGGUCAAUUUCGUGCCAGCCACCGCGGUCACACGAUUAACCCAAGCCAAUAGAAAUCGGCGUAAAGAGUGUUUUAGAUCAAUCCCCCAAUAAAGCUAAAAUUCACCUG")],
outputs=[
gr.File(label="Model output"),
gr.Textbox(label="Dot-bracket"),
gr.Image(label="Structure"),
gr.File(label="Fasta format"),
],
allow_flagging="never")
demo.launch(server_name="0.0.0.0", server_port=7860)
if __name__ == "__main__":
run()