File size: 9,755 Bytes
d95502a
 
 
 
 
 
 
 
4b2b2a9
 
da26c56
 
 
d95502a
 
 
713e17a
4b2b2a9
 
 
 
713e17a
 
 
7da4c00
713e17a
 
7da4c00
713e17a
 
7da4c00
713e17a
 
 
 
 
 
 
 
 
 
7da4c00
695ce01
 
 
 
 
6124d80
713e17a
4b2b2a9
f8c3aaf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3d974bc
 
ff3199e
3d974bc
ff3199e
3d974bc
 
f8c3aaf
 
 
 
 
 
 
 
 
 
 
 
 
 
1ed35e3
f8c3aaf
 
 
 
 
 
 
 
 
 
 
6124d80
116a83c
f8c3aaf
 
f081204
 
f8c3aaf
 
f081204
 
 
6124d80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4b2b2a9
 
d95502a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
import gradio as gr
import warnings
import os
import subprocess
from pathlib import Path
import shutil
import spaces
from space_utils.download_weights import download_ligandmpnn_weights
import random
import time
from space_utils.pipelines import *
from space_utils.handle_events import *
from space_utils.handle_files import *

download_ligandmpnn_weights()

with gr.Blocks(title="") as demo:


    gr.Markdown("# LigandMPNN for Inverse Folding 🔙 ")

    with gr.Row():
        gr.Markdown("""
        Inverse Protein Folding is the step following protein backbone generation in protein design pipelines. It involves designing amino acid sequences that will fold into a given protein backbone structure. 
        <a href="https://www.science.org/doi/10.1126/science.add2187" target="_blank">ProteinMPNN</a> tackles this problem by using a message-passing neural network architecture to design sequences conditioned on backbone structure.
        
                    
        <a href="https://www.nature.com/articles/s41592-025-02626-1" target="_blank">LigandMPNN</a> extends this method to include ligand information, thus conditioning the designed sequence on the atomic context as well.
        It shows increased recovery of native sequences in pockets interacting with small molecules, nucleotides and metals.
        
        This space allows you to run inverse folding jobs using Hugging Face's hardware and download the results! It is based on <a href="https://github.com/dauparas/LigandMPNN/tree/main" target="_blank">LigandMPNN's original Github repository.</a>
                    
        <u>Image and Model Source</u>: Dauparas, J., Lee, G.R., Pecoraro, R. et al. Atomic context-conditioned protein sequence design using LigandMPNN. Nat Methods 22, 717–723 (2025). https://doi.org/10.1038/s41592-025-02626-1
    
        """)

        gr.Image("assets/ligand_mpnn.png", width=600)

    gr.Markdown("## How to Use this Space")

    gr.Markdown("""
                Refer to <a href="https://github.com/dauparas/LigandMPNN/tree/main" target="_blank">the original repo</a> for a detailed description of the available command line arguments. While essential parameters
                such as number of designs to generate, temperature, and which chains to design can be easily controlled through the UI, more advanced parameters can still be specified under `Advanced Options`.
                Please note that this space hard-codes the version of LigandMPNN weights so that trying to change the checkpoint using `--checkpoint_ligand_mpnn` will cause errors.
                
                Batch generation allows to design sequences for multiple PDB files at once. Note that CLI options `--fixed_residues_multi`, `--redesigned_residues_multi` and the like that allow fine-grained
                pdb-specific control over design parameters are not yet implemented. However, one can still specify which residues to fix for all PDBs in the batch at once using `--fixed_residues`.
                This space pairs well with the <a href="https://huggingface.co/spaces/hugging-science/RFdiffusion3" target="_blank">RFD3 backbone generation space</a> as the PDB files in its output can be directly uploaded for batch generation.
                """)

    with gr.Tabs():
        with gr.TabItem("Batch generation"):
            with gr.Row():
                with gr.Column():
                    num_designs_per_batch_multi = gr.Number(
                        value=2,
                        label="Number of Designs per Batch",
                        precision=0,
                        minimum=1,
                        maximum=16
                    )
                    num_batches_multi = gr.Number(
                        value=5,
                        label="Number of Batches",
                        precision=0,
                        minimum=1,
                        maximum=10
                    )
                    temperature_multi = gr.Number(
                        label="Temperature",
                        value=0.3,
                        minimum=0,
                        maximum=1.0
                    )

                    max_duration = gr.Number(
                        label="Max Duration (seconds)",
                        value=300,
                        minimum=1,
                        maximum=3600,
                    )

                    chains_to_design_multi = gr.Textbox(
                        label="Chains to Design (comma-separated)",
                        placeholder="e.g., 'A,B' to redesign chains A and B. all chains if left blank"
                    )

                    with gr.Accordion(label="Advanced Options", open=False):
                        extra_args_multi = gr.Textbox(
                            label="Additional CLI Arguments",
                            placeholder="e.g., --fixed_residues 'C1 C2 C3 C4 C5 C6 C7 C8 C9 C10' --parse_these_chains_only 'A,B'",
                            lines=3,
                            info="Add extra LigandMPNN CLI arguments here (optional)"
                        )

                with gr.Column():
                    pdb_folder = gr.File(label="Upload all PDB files", file_count="multiple", file_types=[".pdb"])

            run_btn_multi = gr.Button("Run Generation", variant="primary")
            runtextbox_multi = gr.Textbox(label="Run status", value="Waiting for generation run...")

            output_file_multi = gr.File(label="Download LigandMPNN results as zip", visible=True)

            select_fasta_to_show = gr.Dropdown(label="Select PDB file to visualize fasta results from", choices=[], interactive=True)
            fastatextbox_multi = gr.Textbox(label="Visualize Fasta results", value="Waiting for generation run...")

            gen_directory_multi = gr.State(value=None)
            
            run_btn_multi.click(
                run_generation_folder, inputs=[pdb_folder, num_batches_multi, num_designs_per_batch_multi, chains_to_design_multi, temperature_multi, extra_args_multi, max_duration], outputs=[runtextbox_multi, gen_directory_multi, output_file_multi]
            ).then(
                display_fasta, inputs=gen_directory_multi, outputs=fastatextbox_multi
            ).then(
                lambda gen_dir: gr.update(choices=os.listdir(os.path.join(gen_dir, "seqs"))) if gen_dir else gr.update(), inputs=gen_directory_multi, outputs=select_fasta_to_show
            )

            select_fasta_to_show.change(display_specific_fasta, inputs=[gen_directory_multi, select_fasta_to_show], outputs=fastatextbox_multi)
            

        #with gr.TabItem("Single PDB generation"):
        #    with gr.Row():
        #        with gr.Column():
        #            num_designs_per_batch = gr.Number(
        #                value=2,
        #                label="Number of Designs per Batch",
        #                precision=0,
        #                minimum=1,
        #                maximum=16
        #            )
        #            num_batches= gr.Number(
        #                value=5,
        #                label="Number of Batches",
        #                precision=0,
        #                minimum=1,
        #                maximum=10
        #            )
        #            temperature = gr.Number(
        #                label="Temperature",
        #                value=0.3,
        #                minimum=0,
        #                maximum=1.0
        #            )
#
        #            chains_to_design = gr.Textbox(
        #                label="Chains to Design (comma-separated)",
        #                placeholder="e.g., 'A,B' to redesign chains A and B. all chains if left blank"
        #            )
#
        #            with gr.Accordion(label="Advanced Options", open=False):
        #                extra_args = gr.Textbox(
        #                    label="Additional CLI Arguments",
        #                    placeholder="e.g., --fixed_residues 'C1 C2 C3 C4 C5 C6 C7 C8 C9 C10' --parse_these_chains_only 'A,B'",
        #                    lines=3,
        #                    info="Add extra LigandMPNN CLI arguments here (optional)"
        #                )
#
        #        with gr.Column():
        #            pdb_file = gr.File(label="Upload PDB File", file_types=[".pdb"])
#
        #    run_btn = gr.Button("Run Generation", variant="primary")
        #    runtextbox = gr.Textbox(label="Run status", value="Waiting for generation run...")
#
#
        #    output_file = gr.File(label="Download LigandMPNN results as zip", visible=True)
        #    fastatextbox = gr.Textbox(label="Visualize Fasta results", value="Waiting for generation run...")
#
        #    gen_directory = gr.State(value=None)
#
        #    
        #    def generate(pdb_file, num_batches, num_designs_per_batch, chains_to_design, temperature, extra_args):
        #        if pdb_file is None:
        #            return gr.update(), gr.update(value=None)
        #        else:
        #            return run_generation_single_pdb(pdb_file.name, num_batches, num_designs_per_batch, chains_to_design, temperature, extra_args)
#
#
        #    run_btn.click(give_run_status_single, inputs=[pdb_file, num_batches, num_designs_per_batch], outputs=runtextbox).then(
        #        generate, inputs=[pdb_file, num_batches, num_designs_per_batch, chains_to_design, temperature, extra_args], outputs=[runtextbox, gen_directory]
        #    ).then(
        #        display_fasta, inputs=gen_directory, outputs=fastatextbox
        #    ).then(
        #        download_results_as_zip, inputs=gen_directory, outputs=output_file
        #    )


if __name__ == "__main__":
    demo.launch()