Spaces:

hugging-science
/

LigandMPNN

Running on Zero

File size: 9,755 Bytes

import gradio as gr
import warnings
import os
import subprocess
from pathlib import Path
import shutil
import spaces
from space_utils.download_weights import download_ligandmpnn_weights
import random
import time
from space_utils.pipelines import *
from space_utils.handle_events import *
from space_utils.handle_files import *

download_ligandmpnn_weights()

with gr.Blocks(title="") as demo:


    gr.Markdown("# LigandMPNN for Inverse Folding 🔙 ")

    with gr.Row():
        gr.Markdown("""
        Inverse Protein Folding is the step following protein backbone generation in protein design pipelines. It involves designing amino acid sequences that will fold into a given protein backbone structure. 
        <a href="https://www.science.org/doi/10.1126/science.add2187" target="_blank">ProteinMPNN</a> tackles this problem by using a message-passing neural network architecture to design sequences conditioned on backbone structure.
        
                    
        <a href="https://www.nature.com/articles/s41592-025-02626-1" target="_blank">LigandMPNN</a> extends this method to include ligand information, thus conditioning the designed sequence on the atomic context as well.
        It shows increased recovery of native sequences in pockets interacting with small molecules, nucleotides and metals.
        
        This space allows you to run inverse folding jobs using Hugging Face's hardware and download the results! It is based on <a href="https://github.com/dauparas/LigandMPNN/tree/main" target="_blank">LigandMPNN's original Github repository.</a>
                    
        <u>Image and Model Source</u>: Dauparas, J., Lee, G.R., Pecoraro, R. et al. Atomic context-conditioned protein sequence design using LigandMPNN. Nat Methods 22, 717–723 (2025). https://doi.org/10.1038/s41592-025-02626-1
    
        """)

        gr.Image("assets/ligand_mpnn.png", width=600)

    gr.Markdown("## How to Use this Space")

    gr.Markdown("""
                Refer to <a href="https://github.com/dauparas/LigandMPNN/tree/main" target="_blank">the original repo</a> for a detailed description of the available command line arguments. While essential parameters
                such as number of designs to generate, temperature, and which chains to design can be easily controlled through the UI, more advanced parameters can still be specified under `Advanced Options`.
                Please note that this space hard-codes the version of LigandMPNN weights so that trying to change the checkpoint using `--checkpoint_ligand_mpnn` will cause errors.
                
                Batch generation allows to design sequences for multiple PDB files at once. Note that CLI options `--fixed_residues_multi`, `--redesigned_residues_multi` and the like that allow fine-grained
                pdb-specific control over design parameters are not yet implemented. However, one can still specify which residues to fix for all PDBs in the batch at once using `--fixed_residues`.
                This space pairs well with the <a href="https://huggingface.co/spaces/hugging-science/RFdiffusion3" target="_blank">RFD3 backbone generation space</a> as the PDB files in its output can be directly uploaded for batch generation.
                """)

    with gr.Tabs():
        with gr.TabItem("Batch generation"):
            with gr.Row():
                with gr.Column():
                    num_designs_per_batch_multi = gr.Number(
                        value=2,
                        label="Number of Designs per Batch",
                        precision=0,
                        minimum=1,
                        maximum=16
                    )
                    num_batches_multi = gr.Number(
                        value=5,
                        label="Number of Batches",
                        precision=0,
                        minimum=1,
                        maximum=10
                    )
                    temperature_multi = gr.Number(
                        label="Temperature",
                        value=0.3,
                        minimum=0,
                        maximum=1.0
                    )

                    max_duration = gr.Number(
                        label="Max Duration (seconds)",
                        value=300,
                        minimum=1,
                        maximum=3600,
                    )

                    chains_to_design_multi = gr.Textbox(
                        label="Chains to Design (comma-separated)",
                        placeholder="e.g., 'A,B' to redesign chains A and B. all chains if left blank"
                    )

                    with gr.Accordion(label="Advanced Options", open=False):
                        extra_args_multi = gr.Textbox(
                            label="Additional CLI Arguments",
                            placeholder="e.g., --fixed_residues 'C1 C2 C3 C4 C5 C6 C7 C8 C9 C10' --parse_these_chains_only 'A,B'",
                            lines=3,
                            info="Add extra LigandMPNN CLI arguments here (optional)"
                        )

                with gr.Column():
                    pdb_folder = gr.File(label="Upload all PDB files", file_count="multiple", file_types=[".pdb"])

            run_btn_multi = gr.Button("Run Generation", variant="primary")
            runtextbox_multi = gr.Textbox(label="Run status", value="Waiting for generation run...")

            output_file_multi = gr.File(label="Download LigandMPNN results as zip", visible=True)

            select_fasta_to_show = gr.Dropdown(label="Select PDB file to visualize fasta results from", choices=[], interactive=True)
            fastatextbox_multi = gr.Textbox(label="Visualize Fasta results", value="Waiting for generation run...")

            gen_directory_multi = gr.State(value=None)
            
            run_btn_multi.click(
                run_generation_folder, inputs=[pdb_folder, num_batches_multi, num_designs_per_batch_multi, chains_to_design_multi, temperature_multi, extra_args_multi, max_duration], outputs=[runtextbox_multi, gen_directory_multi, output_file_multi]
            ).then(
                display_fasta, inputs=gen_directory_multi, outputs=fastatextbox_multi
            ).then(
                lambda gen_dir: gr.update(choices=os.listdir(os.path.join(gen_dir, "seqs"))) if gen_dir else gr.update(), inputs=gen_directory_multi, outputs=select_fasta_to_show
            )

            select_fasta_to_show.change(display_specific_fasta, inputs=[gen_directory_multi, select_fasta_to_show], outputs=fastatextbox_multi)
            

        #with gr.TabItem("Single PDB generation"):
        #    with gr.Row():
        #        with gr.Column():
        #            num_designs_per_batch = gr.Number(
        #                value=2,
        #                label="Number of Designs per Batch",
        #                precision=0,
        #                minimum=1,
        #                maximum=16
        #            )
        #            num_batches= gr.Number(
        #                value=5,
        #                label="Number of Batches",
        #                precision=0,
        #                minimum=1,
        #                maximum=10
        #            )
        #            temperature = gr.Number(
        #                label="Temperature",
        #                value=0.3,
        #                minimum=0,
        #                maximum=1.0
        #            )
#
        #            chains_to_design = gr.Textbox(
        #                label="Chains to Design (comma-separated)",
        #                placeholder="e.g., 'A,B' to redesign chains A and B. all chains if left blank"
        #            )
#
        #            with gr.Accordion(label="Advanced Options", open=False):
        #                extra_args = gr.Textbox(
        #                    label="Additional CLI Arguments",
        #                    placeholder="e.g., --fixed_residues 'C1 C2 C3 C4 C5 C6 C7 C8 C9 C10' --parse_these_chains_only 'A,B'",
        #                    lines=3,
        #                    info="Add extra LigandMPNN CLI arguments here (optional)"
        #                )
#
        #        with gr.Column():
        #            pdb_file = gr.File(label="Upload PDB File", file_types=[".pdb"])
#
        #    run_btn = gr.Button("Run Generation", variant="primary")
        #    runtextbox = gr.Textbox(label="Run status", value="Waiting for generation run...")
#
#
        #    output_file = gr.File(label="Download LigandMPNN results as zip", visible=True)
        #    fastatextbox = gr.Textbox(label="Visualize Fasta results", value="Waiting for generation run...")
#
        #    gen_directory = gr.State(value=None)
#
        #    
        #    def generate(pdb_file, num_batches, num_designs_per_batch, chains_to_design, temperature, extra_args):
        #        if pdb_file is None:
        #            return gr.update(), gr.update(value=None)
        #        else:
        #            return run_generation_single_pdb(pdb_file.name, num_batches, num_designs_per_batch, chains_to_design, temperature, extra_args)
#
#
        #    run_btn.click(give_run_status_single, inputs=[pdb_file, num_batches, num_designs_per_batch], outputs=runtextbox).then(
        #        generate, inputs=[pdb_file, num_batches, num_designs_per_batch, chains_to_design, temperature, extra_args], outputs=[runtextbox, gen_directory]
        #    ).then(
        #        display_fasta, inputs=gen_directory, outputs=fastatextbox
        #    ).then(
        #        download_results_as_zip, inputs=gen_directory, outputs=output_file
        #    )


if __name__ == "__main__":
    demo.launch()