| | |
| | |
| | """ |
| | Title : drawRNA.py |
| | project : web |
| | Created by: julse |
| | Created on: 2025/7/4 14:24 |
| | des: TODO |
| | """ |
| |
|
| | import sys |
| | import os |
| | import time |
| |
|
| | import pandas as pd |
| | import numpy as np |
| |
|
| | import gradio as gr |
| | import pandas as pd |
| | import numpy as np |
| | import os |
| | import tempfile |
| | import subprocess |
| | from PIL import Image |
| |
|
| | |
| | COLORS = [ |
| | '#FF0000', |
| | '#0000FF', |
| | '#FFC0CB', |
| | '#FFA500', |
| | '#FFFF00', |
| | '#800080' |
| | ] |
| |
|
| | COLOR_MAP = { |
| | 'UTR5': '#FF0000', |
| | 'CDS_start': '#0000FF', |
| | 'CDS_mid': '#00FF00', |
| | 'CDS_end': '#FFC0CB', |
| | 'UTR3': '#FFA500', |
| | 'start_codon': '#FFFF00', |
| | 'stop_codon': '#800080', |
| | 'intron': '#A9A9A9', |
| | 'exon': '#90EE90', |
| | } |
| |
|
| | def get_bases_index(utr5, cds, utr3): |
| | """计算各区域的位置索引""" |
| | start_codon_idx = len(utr5) |
| | stop_codon_idx = len(utr5) + len(cds) |
| |
|
| | |
| | utr5_start = max(0, start_codon_idx - 300) |
| | utr5_range = list(range(utr5_start + 1, start_codon_idx + 1)) |
| |
|
| | |
| | cds_start = start_codon_idx + 3 |
| | cds_end = min(start_codon_idx + 300, stop_codon_idx - 3) |
| | start_codon_range = list(range(cds_start + 1, cds_end + 1)) |
| |
|
| | |
| | cds_start = max(start_codon_idx, stop_codon_idx - 300) |
| | stop_codon_range = list(range(cds_start + 1, stop_codon_idx - 2)) |
| |
|
| | |
| | utr3_range = list(range(stop_codon_idx + 1, min(stop_codon_idx + 301, stop_codon_idx + len(utr3) + 1))) |
| |
|
| | |
| | start_codon = list(range(start_codon_idx + 1, start_codon_idx + 4)) |
| |
|
| | |
| | stop_codon = list(range(stop_codon_idx - 2, stop_codon_idx + 1)) |
| |
|
| | |
| | return ( |
| | ",".join(map(str, utr5_range)), |
| | ",".join(map(str, start_codon_range)), |
| | ",".join(map(str, stop_codon_range)), |
| | ",".join(map(str, utr3_range)), |
| | ",".join(map(str, start_codon)), |
| | ",".join(map(str, stop_codon)) |
| | ) |
| |
|
| |
|
| | def calc_mfe(seq): |
| | import RNA |
| |
|
| | fc = RNA.fold_compound(seq) |
| | ss, mfe = fc.mfe() |
| | return ss, mfe |
| |
|
| |
|
| | def dbn_to_tuple(dbn, c1_region=[], c2_region=[]): |
| | |
| | stack, pairs = [], {} |
| | for i, char in enumerate(dbn): |
| | if char == '(': |
| | stack.append(i) |
| | elif char == ')': |
| | j = stack.pop() |
| | if len(c1_region) == 0 or len(c2_region) == 0: |
| | pairs[i + 1] = j + 1 |
| | else: |
| | if i + 1 in c2_region and j + 1 in c1_region: |
| | pairs[i + 1] = j + 1 |
| | return pairs |
| |
|
| |
|
| | def run_cmd(command, output_file): |
| | |
| | result = subprocess.run(command, capture_output=True, text=True) |
| | |
| | if result.returncode != 0: |
| | error_msg = f"执行VARNA命令时出错:\n{result.stderr}" |
| | os.unlink(output_file) |
| | raise RuntimeError(error_msg) |
| |
|
| | |
| | if not os.path.exists(output_file): |
| | raise FileNotFoundError("未能生成结构图文件") |
| |
|
| |
|
| |
|
| |
|
| | def run_draw_rna_advanced(full_sequence, structure, utr5_range, start_codon_range, |
| | stop_codon_range, utr3_range, start_codon, stop_codon, |
| | focus_region, auxBPs, output_file,algorithm, title=''): |
| | import matplotlib.pyplot as plt |
| | from draw_rna.ipynb_draw import draw_struct |
| |
|
| | |
| | utr5_range = eval(utr5_range) |
| | start_codon_range = eval(start_codon_range) |
| | stop_codon_range = eval(stop_codon_range) |
| | utr3_range = eval(utr3_range) |
| | start_codon = eval(start_codon) |
| | stop_codon = eval(stop_codon) |
| |
|
| | |
| |
|
| | |
| | COLOR_MAP = { |
| | 'UTR5': '#FF0000', |
| | 'CDS_start': '#0000FF', |
| | 'CDS_end': '#FFC0CB', |
| | 'UTR3': '#FFA500', |
| | 'start_codon': '#FFFF00', |
| | 'stop_codon': '#800080', |
| | 'default': '#808080' |
| | } |
| |
|
| |
|
| | |
| | region_to_value = { |
| | 'default':0, |
| | 'UTR5': 1, |
| | 'CDS_start': 2, |
| | 'CDS_end': 3, |
| | 'UTR3': 4, |
| | 'start_codon': 5, |
| | 'stop_codon': 6 |
| | } |
| |
|
| | |
| | from matplotlib.colors import ListedColormap |
| |
|
| | |
| | custom_colors = [ |
| | COLOR_MAP['default'], |
| | COLOR_MAP['UTR5'], |
| | COLOR_MAP['CDS_start'], |
| | COLOR_MAP['CDS_end'], |
| | COLOR_MAP['UTR3'], |
| | COLOR_MAP['start_codon'], |
| | COLOR_MAP['stop_codon'] |
| | ] |
| |
|
| | custom_cmap = ListedColormap(custom_colors) |
| | |
| |
|
| | colors = [region_to_value['default']]*len(full_sequence) |
| | for i in utr5_range: |
| | colors[i-1]= region_to_value['UTR5'] |
| | for i in utr3_range: |
| | colors[i-1] = region_to_value['UTR3'] |
| | for i in start_codon_range: |
| | colors[i-1] = region_to_value['CDS_start'] |
| | for i in stop_codon_range: |
| | colors[i-1] = region_to_value['CDS_end'] |
| | for i in start_codon: |
| | colors[i-1] = region_to_value['start_codon'] |
| | for i in stop_codon: |
| | colors[i-1] = region_to_value['stop_codon'] |
| |
|
| | draw_struct(full_sequence, structure, |
| | c = colors, |
| | cmap = custom_cmap, |
| | vmin = 0, |
| | vmax = 6, |
| | line=algorithm, |
| | ) |
| | |
| | color_scheme = COLOR_MAP |
| | legend_elements = [ |
| | plt.Rectangle((0, 0), 1, 1, facecolor=color_scheme['UTR5'], edgecolor='black', label="5'UTR"), |
| | plt.Rectangle((0, 0), 1, 1, facecolor=color_scheme['CDS_start'], edgecolor='black', label="CDS Start"), |
| | plt.Rectangle((0, 0), 1, 1, facecolor=color_scheme['CDS_end'], edgecolor='black', label="CDS End"), |
| | plt.Rectangle((0, 0), 1, 1, facecolor=color_scheme['UTR3'], edgecolor='black', label="3'UTR"), |
| | plt.Rectangle((0, 0), 1, 1, facecolor=color_scheme['start_codon'], edgecolor='black', label="Start Codon"), |
| | plt.Rectangle((0, 0), 1, 1, facecolor=color_scheme['stop_codon'], edgecolor='black', label="Stop Codon"), |
| | ] |
| |
|
| | plt.legend(handles=legend_elements, loc='upper left', bbox_to_anchor=(1.05, 1), fontsize=10) |
| |
|
| | |
| | plt.savefig(output_file, dpi=300, bbox_inches='tight') |
| | plt.close() |
| |
|
| | print(f"Successfully created: {output_file}") |
| |
|
| | def draw_simple(utr5_seq, title=''): |
| | |
| | stru5, mfe = calc_mfe(utr5_seq) |
| | import matplotlib.pyplot as plt |
| | from draw_rna.ipynb_draw import draw_struct |
| | draw_struct(utr5_seq, stru5) |
| |
|
| | |
| | with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmpfile: |
| | output_file = tmpfile.name |
| | |
| | plt.title(title) |
| | |
| | plt.savefig(output_file, |
| | dpi=300, |
| | bbox_inches='tight', |
| | facecolor='white', |
| | edgecolor='none') |
| | return output_file, mfe, stru5 |
| |
|
| |
|
| |
|
| |
|
| | def generate_rna_structure(utr5_seq, cds_seq, utr3_seq, structure, draw_2d=["mRNA"]): |
| | """生成RNA结构图""" |
| | message = "" |
| | |
| | full_sequence = utr5_seq + cds_seq + utr3_seq |
| | mfe = None |
| | img_paths = [] |
| |
|
| | if "Full mRNA" in draw_2d: |
| | if structure == "": |
| | structure, mfe = calc_mfe(full_sequence) |
| | |
| | if len(full_sequence) != len(structure): |
| | return f"序列长度({len(full_sequence)})与结构长度({len(structure)})不匹配" |
| | '''full mRNA''' |
| | |
| | utr5_range, start_codon_range, stop_codon_range, utr3_range, start_codon, stop_codon = get_bases_index( |
| | utr5_seq, cds_seq, utr3_seq |
| | ) |
| | focus_region = f'{min(eval(utr5_range))}-{max(eval(start_codon_range))}:fill=#bcffdd;{min(eval(stop_codon_range))}-{max(eval(utr3_range))}:fill=#bcffdd' |
| | pairs = dbn_to_tuple(structure, c1_region=eval(','.join([utr5_range, start_codon, start_codon_range])), |
| | c2_region=eval(','.join([stop_codon_range, utr3_range, stop_codon]))) |
| | auxBPs = ';'.join([f'({key},{value}):color=#6ed86e' for key, value in pairs.items()]) |
| | for algorithm in ["line", "naview"]: |
| | |
| | with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmpfile: |
| | output_file = tmpfile.name |
| | img_paths.append((output_file,f'mRNA_{algorithm}')) |
| | |
| | |
| | |
| | |
| | |
| |
|
| |
|
| | algorithm = algorithm=="line" |
| | |
| | run_draw_rna_advanced(full_sequence, structure, utr5_range, start_codon_range, stop_codon_range, utr3_range, |
| | start_codon, stop_codon, focus_region, auxBPs, output_file,algorithm, title='mRNA') |
| |
|
| | if "5'leader (30 nt)" in draw_2d: |
| | img_path, local_mfe, stru5 = draw_simple(full_sequence[:30], title="5'leader (30 nt))") |
| | img_paths.append((img_path,'head_30')) |
| | message += f"\nhead(30nt) MFE={local_mfe:.2f} kcal/mol" |
| | if "5'UTR" in draw_2d: |
| | img_path, local_mfe, stru5 = draw_simple(utr5_seq, title="5'UTR") |
| | img_paths.append((img_path,'utr5')) |
| | message += f"\n5'UTR MFE={local_mfe:.2f} kcal/mol" |
| | if "CDS" in draw_2d: |
| | img_path, local_mfe, stru5 = draw_simple(cds_seq, title="CDS") |
| | img_paths.append((img_path,'cds')) |
| | message += f"\nCDS MFE={local_mfe:.2f} kcal/mol" |
| | if "3'UTR" in draw_2d: |
| | img_path, local_mfe, stru5 = draw_simple(utr3_seq, title="3'UTR") |
| | img_paths.append((img_path,'utr3')) |
| | message += f"\n3'UTR MFE={local_mfe:.2f} kcal/mol" |
| |
|
| | return img_paths, mfe, structure, message |
| |
|
| |
|
| | def visualize_rna(utr5_seq, cds_seq, utr3_seq, structure): |
| | """可视化RNA结构的主函数""" |
| | |
| | image_path, mfe, structure, message = generate_rna_structure(utr5_seq, cds_seq, utr3_seq, structure) |
| | mfe = f'MFE={mfe:.2f} kcal/mol' if mfe else None |
| |
|
| | |
| | return image_path, mfe, structure, message |
| |
|
| |
|
| | def draw_rna_2d(): |
| | |
| | with gr.Blocks(title="RNA结构可视化") as demo: |
| | gr.Markdown("# RNA结构可视化工具") |
| | gr.Markdown("使用VARNA可视化RNA二级结构,并高亮显示不同区域") |
| |
|
| | with gr.Row(): |
| | with gr.Column(scale=1): |
| | utr5_seq = gr.Textbox(label="5'UTR序列", value="AUGCCAUGAACAGCUAC", placeholder="输入5'UTR序列...") |
| | cds_seq = gr.Textbox(label="CDS序列", value="AUGCCAUGAACAGCUAC", placeholder="输入CDS序列...") |
| | utr3_seq = gr.Textbox(label="3'UTR序列", value="AUGCCAUGAACAGCUAC", placeholder="输入3'UTR序列...") |
| | structure = gr.Textbox( |
| | label="二级结构", |
| | value="...........((((.((((.((((........)))).))))...))))..", |
| | placeholder="输入点括号表示的二级结构..." |
| | ) |
| | submit_btn = gr.Button("生成结构图", variant="primary") |
| |
|
| | with gr.Column(): |
| | |
| | output_image = gr.Gallery(label="RNA结构图", interactive=False, object_fit="contain") |
| | mfe = gr.Markdown(label="MFE", value="") |
| | message = gr.Markdown(label="Message", value="") |
| |
|
| | |
| | with gr.Accordion("颜色说明", open=False): |
| | gr.Markdown(""" |
| | | 颜色 | 区域 | |
| | |------|------| |
| | | <span style="color:red">■</span> 红色 | 5'UTR 区域 | |
| | | <span style="color:blue">■</span> 蓝色 | CDS起始区域 | |
| | | <span style="color:#FFC0CB">■</span> 粉色 | CDS终止区域 | |
| | | <span style="color:orange">■</span> 橙色 | 3'UTR 区域 | |
| | | <span style="color:yellow">■</span> 黄色 | 起始密码子 (AUG) | |
| | | <span style="color:purple">■</span> 紫色 | 终止密码子 (UAA, UAG, UGA) | |
| | """) |
| |
|
| | |
| | with gr.Accordion("示例数据", open=False): |
| | gr.Examples( |
| | examples=[ |
| | [ |
| | "AUGCCAUGAACAGCUAC", |
| | "AUGCCAUGAACAGCUAC", |
| | "AUGCCAUGAACAGCUAC", |
| | "...........((((.((((.((((........)))).))))...)))).." |
| | ], |
| | [ |
| | "GGGAAAUUUCCC", |
| | "AUGCCAUGAACAGCUAC", |
| | "UUUAAAGGGCCC", |
| | "((((....))))..(((.((((.......))))...))).." |
| | ] |
| | ], |
| | inputs=[utr5_seq, cds_seq, utr3_seq, structure] |
| | ) |
| |
|
| | |
| | submit_btn.click( |
| | visualize_rna, |
| | inputs=[utr5_seq, cds_seq, utr3_seq, structure], |
| | outputs=[output_image, mfe, structure, message] |
| | ) |
| | return demo |
| |
|
| |
|
| | |
| | if __name__ == "__main__": |
| | demo = draw_rna_2d() |
| | demo.launch(server_port=8080, debug=True) |
| |
|