Spaces:

nninva
/

predict_protein_cmp_bind_aff

Sleeping

App Files Files Community

nninva commited on Mar 21, 2025

Commit

d96e3d2

verified ·

1 Parent(s): 0f8a299

Chaning the interface to allow use input FASTA/SMILES or files

Browse files

Files changed (1) hide show

app.py +102 -34

app.py CHANGED Viewed

@@ -18,36 +18,66 @@ scaler = joblib.load("scaler.pkl")
 pca = joblib.load("pca.pkl")
 svr = joblib.load("svr_model.pkl")
-def generate_protein_embedding(protein):
-    # Generate FASTA string
-    mol = Chem.MolFromPDBFile(protein)
-    if not mol:
-        print("Could not convert file to protein molecule")
-        return None
-    fasta = Chem.MolToFASTA(mol).splitlines()[1]
-    # Generate protein embedding
     esm_model.eval()
     with torch.no_grad():
         inputs = esm_tokenizer(fasta, return_tensors="pt", padding=True, truncation=True, max_length=1024).to(device)
         outputs = esm_model(**inputs)
-        embedding = outputs.last_hidden_state.mean(dim=1).cpu().numpy()  # Extract last layer and mean pooling
     return embedding
-def generate_ligand_embedding(ligand):
-    # Generate SMILES string
-    mol = Chem.MolFromMol2File(ligand)
-    if not mol:
-        print("Could not convert file to ligand molecule")
-        return None
-    smiles = Chem.MolToSmiles(mol)
-    # Generate ligand embedding
     chemberta_model.eval()
     with torch.no_grad():
         inputs = chemberta_tokenizer(smiles, return_tensors="pt", padding=True, truncation=True).to(device)
         outputs = chemberta_model(**inputs)
-        embedding = outputs.last_hidden_state.mean(dim=1).cpu().numpy()
     return embedding
 def value_conversion(logKa):
@@ -63,9 +93,13 @@ def value_conversion(logKa):
     else:
         return f"{Kd * 1e12:.4f} pM"  # Picomolar
-def predict_affinity(protein_file, ligand_file):
-    protein = generate_protein_embedding(protein_file)
-    ligand = generate_ligand_embedding(ligand_file)
     if protein is None:
         return "Unable to parse protein .pdb file"
     if ligand is None:
@@ -81,15 +115,49 @@ def predict_affinity(protein_file, ligand_file):
     affinity_value = value_conversion(log_prediction)
     return f"Predicted Binding Affinity:\nlogKa = {log_prediction:.4f}\nKd = {affinity_value}"
-# Gradio interface
-iface = gr.Interface(
-    fn=predict_affinity,
-    inputs=[gr.File(label="Protein .pdb file"), gr.File(label="Ligand .mol2 file")],
-    outputs="text",
-    title="Predict Protein-Ligand Binding Affinity",
-    description="Upload the protein and ligand files to predict the binding affinity of the protein-ligand complex.",
-)
-# Run Gradio App
-if __name__ == "__main__":
-    iface.launch()

 pca = joblib.load("pca.pkl")
 svr = joblib.load("svr_model.pkl")
+# def generate_protein_embedding(protein):
+#     # Generate FASTA string
+#     mol = Chem.MolFromPDBFile(protein)
+#     if not mol:
+#         print("Could not convert file to protein molecule")
+#         return None
+#     fasta = Chem.MolToFASTA(mol).splitlines()[1]
+#     # Generate protein embedding
+#     esm_model.eval()
+#     with torch.no_grad():
+#         inputs = esm_tokenizer(fasta, return_tensors="pt", padding=True, truncation=True, max_length=1024).to(device)
+#         outputs = esm_model(**inputs)
+#         embedding = outputs.last_hidden_state.mean(dim=1).cpu().numpy()  # Extract last layer and mean pooling
+#     return embedding
+def generate_protein_embedding(protein_input, input_type):
+    if input_type == "File":
+        mol = Chem.MolFromPDBFile(protein_input)
+        if not mol:
+            return None
+        fasta = Chem.MolToFASTA(mol).splitlines()[1]
+    else:
+        fasta = protein_input.strip()
     esm_model.eval()
     with torch.no_grad():
         inputs = esm_tokenizer(fasta, return_tensors="pt", padding=True, truncation=True, max_length=1024).to(device)
         outputs = esm_model(**inputs)
+        embedding = outputs.last_hidden_state.mean(dim=1).cpu().numpy()
     return embedding
+# def generate_ligand_embedding(ligand):
+#     # Generate SMILES string
+#     mol = Chem.MolFromMol2File(ligand)
+#     if not mol:
+#         print("Could not convert file to ligand molecule")
+#         return None
+#     smiles = Chem.MolToSmiles(mol)
+#     # Generate ligand embedding
+#     chemberta_model.eval()
+#     with torch.no_grad():
+#         inputs = chemberta_tokenizer(smiles, return_tensors="pt", padding=True, truncation=True).to(device)
+#         outputs = chemberta_model(**inputs)
+#         embedding = outputs.last_hidden_state.mean(dim=1).cpu().numpy()
+#     return embedding
+def generate_ligand_embedding(ligand_input, input_type):
+    if input_type == "File":
+        mol = Chem.MolFromMol2File(ligand_input)
+        if not mol:
+            return None
+        smiles = Chem.MolToSmiles(mol)
+    else:
+        smiles = ligand_input.strip()
     chemberta_model.eval()
     with torch.no_grad():
         inputs = chemberta_tokenizer(smiles, return_tensors="pt", padding=True, truncation=True).to(device)
         outputs = chemberta_model(**inputs)
+        embedding = outputs.last_hidden_state.mean(dim=1).cpu().numpy()
     return embedding
 def value_conversion(logKa):
     else:
         return f"{Kd * 1e12:.4f} pM"  # Picomolar
+# def predict_affinity(protein_file, ligand_file):
+#     protein = generate_protein_embedding(protein_file)
+#     ligand = generate_ligand_embedding(ligand_file)
+def predict_affinity(protein_input, protein_type, ligand_input, ligand_type):
+    protein = generate_protein_embedding(protein_input, protein_type)
+    ligand = generate_ligand_embedding(ligand_input, ligand_type)
     if protein is None:
         return "Unable to parse protein .pdb file"
     if ligand is None:
     affinity_value = value_conversion(log_prediction)
     return f"Predicted Binding Affinity:\nlogKa = {log_prediction:.4f}\nKd = {affinity_value}"
+# # Gradio interface
+# iface = gr.Interface(
+#     fn=predict_affinity,
+#     inputs=[gr.File(label="Protein .pdb file"), gr.File(label="Ligand .mol2 file")],
+#     outputs="text",
+#     title="Predict Protein-Ligand Binding Affinity",
+#     description="Upload the protein and ligand files to predict the binding affinity of the protein-ligand complex.",
+# )
+# # Run Gradio App
+# if __name__ == "__main__":
+#     iface.launch()
+def update_inputs(protein_type, ligand_type):
+    return (
+        gr.update(visible=(protein_type == "File")),  # Protein file input
+        gr.update(visible=(protein_type == "FASTA")),  # Protein FASTA input
+        gr.update(visible=(ligand_type == "File")),  # Ligand file input
+        gr.update(visible=(ligand_type == "SMILES"))  # Ligand SMILES input
+    )
+with gr.Blocks() as iface:
+    gr.Markdown("# Predict Protein-Ligand Binding Affinity")
+    gr.Markdown("Upload files or enter FASTA/SMILES strings to predict binding affinity.")
+    protein_type = gr.Radio(["File", "FASTA"], label="Protein Input Type", value="File")
+    protein_file = gr.File(label="Protein .pdb file", visible=True)
+    protein_fasta = gr.Textbox(label="Protein FASTA sequence", visible=False)
+    ligand_type = gr.Radio(["File", "SMILES"], label="Ligand Input Type", value="File")
+    ligand_file = gr.File(label="Ligand .mol2 file", visible=True)
+    ligand_smiles = gr.Textbox(label="Ligand SMILES string", visible=False)
+    output = gr.Textbox(label="Prediction Result", lines=3)
+    submit_btn = gr.Button("Predict")
+    submit_btn.click(
+        predict_affinity,
+        inputs=[protein_file, protein_type, ligand_file, ligand_type],
+        outputs=output
+    )
+    protein_type.change(update_inputs, inputs=[protein_type, ligand_type], outputs=[protein_file, protein_fasta, ligand_file, ligand_smiles])
+    ligand_type.change(update_inputs, inputs=[protein_type, ligand_type], outputs=[protein_file, protein_fasta, ligand_file, ligand_smiles])
+iface.launch()