File size: 4,160 Bytes
6b844e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import argparse
import subprocess
from typing import Generator, Tuple, List, Iterable, Optional, Dict


def RNAsubopt(
    seq: str, RNAsubopt_bin: str = "RNAsubopt", delta_energy: float = 5.0
) -> List[Tuple[str, float]]:
    """
    Uses RNAsubpot to return a list of secondary structures
    and their associated energies in kcal/mol.
    """
    try:
        cmd = " ".join(
            [
                "bash -c '",
                f"{RNAsubopt_bin}",
                f"--deltaEnergy {delta_energy}",
                f"-i <(echo {seq})" "'",
            ]
        )
        out = subprocess.check_output(cmd, shell=True).decode()

        typify = lambda t: (str(t[0]), float(t[1]))
        processed = [typify(line.split()) for line in out.split("\n")[1:-1]]
        return processed

    except subprocess.CalledProcessError as exc:
        print(exc)
        return []

def RNAsample(
    seqs: List[str], RNAfold_bin: str = "RNAsubopt", temperature: float = 37.0, num_structs: int = 5, maxBPspan: int = 0
) -> List[List[str]]:
    """
    Uses RNAsubopt to sample num_structs structures from the Boltzmann distribution. Accepts a list of sequences, and calls RNAsubopt in a batched way on them.
    """
    try:
        input_binary = b"\n".join([seqs[i].encode('ascii') for i in range(len(seqs))]) # convert to binary

        out = subprocess.check_output([RNAfold_bin, f"--stochBT={num_structs}", f"-T {temperature}"] +
                                      ([f"--maxBPspan={maxBPspan}"]  if maxBPspan != 0 else []), 
                                      input=input_binary).decode().split("\n")[:-1] # remove the new line at the end

        assert(len(out) == (num_structs+1)*len(seqs))
        
        ret_value = [  out[(num_structs+1)*i+1:(num_structs+1)*(i+1)] for i in range(len(seqs))]

        return ret_value

    except subprocess.CalledProcessError as exc:
        print(exc)
        return 0.0



def RNAfold(
    seqs: List[str], RNAfold_bin: str = "RNAfold", temperature: float = 37.0, maxBPspan: int = 0, commands_file: str = ""
) -> (str, float):
    """
    Uses RNAfold to return MFE energy. Accepts a list of sequences, and calls RNAfold in a batched way on them.
    """
    try:
        input_binary = b"\n".join([seqs[i].encode('ascii') for i in range(len(seqs))]) # convert to binary

        out = subprocess.check_output([RNAfold_bin, "-j 8", "--noPS", f"-T {temperature}"] + 
            (["--commands="+commands_file]  if commands_file != "" else []) +
            ([f"--maxBPspan={maxBPspan}"]  if maxBPspan != 0 else []), 
            input=input_binary).decode().split("\n")

        mfe_lines = [out[i].split(maxsplit=1) for i in range(1,len(out),2)]
        
        ret_value = [[mfe_lines[i][0], float(mfe_lines[i][1].strip('() '))] for i in range(len(mfe_lines))]

        return ret_value

    except subprocess.CalledProcessError as exc:
        print(exc)
        return 0.0


def RNA_partition_function(
    seqs: List[str], constraints: List[str], RNAfold_bin: str = "RNAfold", temperature: float = 37.0, commands_file: str = ""
) -> (str, float):
    """
    Uses RNAfold to compute partition function with constraints on the structure
    """
    assert(len(seqs)==len(constraints))
    try:
        input_binary = b"\n".join([(seqs[i]+"\n"+constraints[i]).encode('ascii') for i in range(len(seqs))]) # convert to binary

        out = subprocess.check_output([RNAfold_bin, "-j 8",  "--noPS", "--noDP", "-p0" ,"-C", f"-T {temperature}"] + (["--commands="+commands_file]  if commands_file != "" else []), input=input_binary).decode().split("\n")

        ret_value = [float(out[i].split(sep=" ")[-2]) for i in range(2,len(out),4)] # take the third line in each group of 4 lines
        
        return ret_value

    except subprocess.CalledProcessError as exc:
        print(exc)
        return 0.0




if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--seq",
        type=str,
        default="AGUAGUCUAGUAUGACUGUA",
        help="RNA sequence to process",
    )
    args = parser.parse_args()
    output = RNAsubopt(seq=args.seq)
    print(output)