import argparse import subprocess from typing import Generator, Tuple, List, Iterable, Optional, Dict def RNAsubopt( seq: str, RNAsubopt_bin: str = "RNAsubopt", delta_energy: float = 5.0 ) -> List[Tuple[str, float]]: """ Uses RNAsubpot to return a list of secondary structures and their associated energies in kcal/mol. """ try: cmd = " ".join( [ "bash -c '", f"{RNAsubopt_bin}", f"--deltaEnergy {delta_energy}", f"-i <(echo {seq})" "'", ] ) out = subprocess.check_output(cmd, shell=True).decode() typify = lambda t: (str(t[0]), float(t[1])) processed = [typify(line.split()) for line in out.split("\n")[1:-1]] return processed except subprocess.CalledProcessError as exc: print(exc) return [] def RNAsample( seqs: List[str], RNAfold_bin: str = "RNAsubopt", temperature: float = 37.0, num_structs: int = 5, maxBPspan: int = 0 ) -> List[List[str]]: """ Uses RNAsubopt to sample num_structs structures from the Boltzmann distribution. Accepts a list of sequences, and calls RNAsubopt in a batched way on them. """ try: input_binary = b"\n".join([seqs[i].encode('ascii') for i in range(len(seqs))]) # convert to binary out = subprocess.check_output([RNAfold_bin, f"--stochBT={num_structs}", f"-T {temperature}"] + ([f"--maxBPspan={maxBPspan}"] if maxBPspan != 0 else []), input=input_binary).decode().split("\n")[:-1] # remove the new line at the end assert(len(out) == (num_structs+1)*len(seqs)) ret_value = [ out[(num_structs+1)*i+1:(num_structs+1)*(i+1)] for i in range(len(seqs))] return ret_value except subprocess.CalledProcessError as exc: print(exc) return 0.0 def RNAfold( seqs: List[str], RNAfold_bin: str = "RNAfold", temperature: float = 37.0, maxBPspan: int = 0, commands_file: str = "" ) -> (str, float): """ Uses RNAfold to return MFE energy. Accepts a list of sequences, and calls RNAfold in a batched way on them. """ try: input_binary = b"\n".join([seqs[i].encode('ascii') for i in range(len(seqs))]) # convert to binary out = subprocess.check_output([RNAfold_bin, "-j 8", "--noPS", f"-T {temperature}"] + (["--commands="+commands_file] if commands_file != "" else []) + ([f"--maxBPspan={maxBPspan}"] if maxBPspan != 0 else []), input=input_binary).decode().split("\n") mfe_lines = [out[i].split(maxsplit=1) for i in range(1,len(out),2)] ret_value = [[mfe_lines[i][0], float(mfe_lines[i][1].strip('() '))] for i in range(len(mfe_lines))] return ret_value except subprocess.CalledProcessError as exc: print(exc) return 0.0 def RNA_partition_function( seqs: List[str], constraints: List[str], RNAfold_bin: str = "RNAfold", temperature: float = 37.0, commands_file: str = "" ) -> (str, float): """ Uses RNAfold to compute partition function with constraints on the structure """ assert(len(seqs)==len(constraints)) try: input_binary = b"\n".join([(seqs[i]+"\n"+constraints[i]).encode('ascii') for i in range(len(seqs))]) # convert to binary out = subprocess.check_output([RNAfold_bin, "-j 8", "--noPS", "--noDP", "-p0" ,"-C", f"-T {temperature}"] + (["--commands="+commands_file] if commands_file != "" else []), input=input_binary).decode().split("\n") ret_value = [float(out[i].split(sep=" ")[-2]) for i in range(2,len(out),4)] # take the third line in each group of 4 lines return ret_value except subprocess.CalledProcessError as exc: print(exc) return 0.0 if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--seq", type=str, default="AGUAGUCUAGUAUGACUGUA", help="RNA sequence to process", ) args = parser.parse_args() output = RNAsubopt(seq=args.seq) print(output)