Spaces:
Running on Zero
Running on Zero
File size: 9,755 Bytes
d95502a 4b2b2a9 da26c56 d95502a 713e17a 4b2b2a9 713e17a 7da4c00 713e17a 7da4c00 713e17a 7da4c00 713e17a 7da4c00 695ce01 6124d80 713e17a 4b2b2a9 f8c3aaf 3d974bc ff3199e 3d974bc ff3199e 3d974bc f8c3aaf 1ed35e3 f8c3aaf 6124d80 116a83c f8c3aaf f081204 f8c3aaf f081204 6124d80 4b2b2a9 d95502a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 | import gradio as gr
import warnings
import os
import subprocess
from pathlib import Path
import shutil
import spaces
from space_utils.download_weights import download_ligandmpnn_weights
import random
import time
from space_utils.pipelines import *
from space_utils.handle_events import *
from space_utils.handle_files import *
download_ligandmpnn_weights()
with gr.Blocks(title="") as demo:
gr.Markdown("# LigandMPNN for Inverse Folding 🔙 ")
with gr.Row():
gr.Markdown("""
Inverse Protein Folding is the step following protein backbone generation in protein design pipelines. It involves designing amino acid sequences that will fold into a given protein backbone structure.
<a href="https://www.science.org/doi/10.1126/science.add2187" target="_blank">ProteinMPNN</a> tackles this problem by using a message-passing neural network architecture to design sequences conditioned on backbone structure.
<a href="https://www.nature.com/articles/s41592-025-02626-1" target="_blank">LigandMPNN</a> extends this method to include ligand information, thus conditioning the designed sequence on the atomic context as well.
It shows increased recovery of native sequences in pockets interacting with small molecules, nucleotides and metals.
This space allows you to run inverse folding jobs using Hugging Face's hardware and download the results! It is based on <a href="https://github.com/dauparas/LigandMPNN/tree/main" target="_blank">LigandMPNN's original Github repository.</a>
<u>Image and Model Source</u>: Dauparas, J., Lee, G.R., Pecoraro, R. et al. Atomic context-conditioned protein sequence design using LigandMPNN. Nat Methods 22, 717–723 (2025). https://doi.org/10.1038/s41592-025-02626-1
""")
gr.Image("assets/ligand_mpnn.png", width=600)
gr.Markdown("## How to Use this Space")
gr.Markdown("""
Refer to <a href="https://github.com/dauparas/LigandMPNN/tree/main" target="_blank">the original repo</a> for a detailed description of the available command line arguments. While essential parameters
such as number of designs to generate, temperature, and which chains to design can be easily controlled through the UI, more advanced parameters can still be specified under `Advanced Options`.
Please note that this space hard-codes the version of LigandMPNN weights so that trying to change the checkpoint using `--checkpoint_ligand_mpnn` will cause errors.
Batch generation allows to design sequences for multiple PDB files at once. Note that CLI options `--fixed_residues_multi`, `--redesigned_residues_multi` and the like that allow fine-grained
pdb-specific control over design parameters are not yet implemented. However, one can still specify which residues to fix for all PDBs in the batch at once using `--fixed_residues`.
This space pairs well with the <a href="https://huggingface.co/spaces/hugging-science/RFdiffusion3" target="_blank">RFD3 backbone generation space</a> as the PDB files in its output can be directly uploaded for batch generation.
""")
with gr.Tabs():
with gr.TabItem("Batch generation"):
with gr.Row():
with gr.Column():
num_designs_per_batch_multi = gr.Number(
value=2,
label="Number of Designs per Batch",
precision=0,
minimum=1,
maximum=16
)
num_batches_multi = gr.Number(
value=5,
label="Number of Batches",
precision=0,
minimum=1,
maximum=10
)
temperature_multi = gr.Number(
label="Temperature",
value=0.3,
minimum=0,
maximum=1.0
)
max_duration = gr.Number(
label="Max Duration (seconds)",
value=300,
minimum=1,
maximum=3600,
)
chains_to_design_multi = gr.Textbox(
label="Chains to Design (comma-separated)",
placeholder="e.g., 'A,B' to redesign chains A and B. all chains if left blank"
)
with gr.Accordion(label="Advanced Options", open=False):
extra_args_multi = gr.Textbox(
label="Additional CLI Arguments",
placeholder="e.g., --fixed_residues 'C1 C2 C3 C4 C5 C6 C7 C8 C9 C10' --parse_these_chains_only 'A,B'",
lines=3,
info="Add extra LigandMPNN CLI arguments here (optional)"
)
with gr.Column():
pdb_folder = gr.File(label="Upload all PDB files", file_count="multiple", file_types=[".pdb"])
run_btn_multi = gr.Button("Run Generation", variant="primary")
runtextbox_multi = gr.Textbox(label="Run status", value="Waiting for generation run...")
output_file_multi = gr.File(label="Download LigandMPNN results as zip", visible=True)
select_fasta_to_show = gr.Dropdown(label="Select PDB file to visualize fasta results from", choices=[], interactive=True)
fastatextbox_multi = gr.Textbox(label="Visualize Fasta results", value="Waiting for generation run...")
gen_directory_multi = gr.State(value=None)
run_btn_multi.click(
run_generation_folder, inputs=[pdb_folder, num_batches_multi, num_designs_per_batch_multi, chains_to_design_multi, temperature_multi, extra_args_multi, max_duration], outputs=[runtextbox_multi, gen_directory_multi, output_file_multi]
).then(
display_fasta, inputs=gen_directory_multi, outputs=fastatextbox_multi
).then(
lambda gen_dir: gr.update(choices=os.listdir(os.path.join(gen_dir, "seqs"))) if gen_dir else gr.update(), inputs=gen_directory_multi, outputs=select_fasta_to_show
)
select_fasta_to_show.change(display_specific_fasta, inputs=[gen_directory_multi, select_fasta_to_show], outputs=fastatextbox_multi)
#with gr.TabItem("Single PDB generation"):
# with gr.Row():
# with gr.Column():
# num_designs_per_batch = gr.Number(
# value=2,
# label="Number of Designs per Batch",
# precision=0,
# minimum=1,
# maximum=16
# )
# num_batches= gr.Number(
# value=5,
# label="Number of Batches",
# precision=0,
# minimum=1,
# maximum=10
# )
# temperature = gr.Number(
# label="Temperature",
# value=0.3,
# minimum=0,
# maximum=1.0
# )
#
# chains_to_design = gr.Textbox(
# label="Chains to Design (comma-separated)",
# placeholder="e.g., 'A,B' to redesign chains A and B. all chains if left blank"
# )
#
# with gr.Accordion(label="Advanced Options", open=False):
# extra_args = gr.Textbox(
# label="Additional CLI Arguments",
# placeholder="e.g., --fixed_residues 'C1 C2 C3 C4 C5 C6 C7 C8 C9 C10' --parse_these_chains_only 'A,B'",
# lines=3,
# info="Add extra LigandMPNN CLI arguments here (optional)"
# )
#
# with gr.Column():
# pdb_file = gr.File(label="Upload PDB File", file_types=[".pdb"])
#
# run_btn = gr.Button("Run Generation", variant="primary")
# runtextbox = gr.Textbox(label="Run status", value="Waiting for generation run...")
#
#
# output_file = gr.File(label="Download LigandMPNN results as zip", visible=True)
# fastatextbox = gr.Textbox(label="Visualize Fasta results", value="Waiting for generation run...")
#
# gen_directory = gr.State(value=None)
#
#
# def generate(pdb_file, num_batches, num_designs_per_batch, chains_to_design, temperature, extra_args):
# if pdb_file is None:
# return gr.update(), gr.update(value=None)
# else:
# return run_generation_single_pdb(pdb_file.name, num_batches, num_designs_per_batch, chains_to_design, temperature, extra_args)
#
#
# run_btn.click(give_run_status_single, inputs=[pdb_file, num_batches, num_designs_per_batch], outputs=runtextbox).then(
# generate, inputs=[pdb_file, num_batches, num_designs_per_batch, chains_to_design, temperature, extra_args], outputs=[runtextbox, gen_directory]
# ).then(
# display_fasta, inputs=gen_directory, outputs=fastatextbox
# ).then(
# download_results_as_zip, inputs=gen_directory, outputs=output_file
# )
if __name__ == "__main__":
demo.launch() |