Spaces:

nninva
/

predict_protein_cmp_bind_aff

Running

App Files Files Community

nninva commited on Mar 8, 2025

Commit

ea594ff

verified ·

1 Parent(s): fbf1ca3

app.py

Browse files

first commit

Files changed (1) hide show

app.py +95 -0

app.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import gradio as gr
+import joblib
+import torch
+import numpy as np
+from transformers import AutoTokenizer, AutoModel, EsmTokenizer, EsmModel
+from rdkit import Chem
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Load the pretrained models
+esm = "facebook/esm2_t12_35M_UR50D" # generate protein embeddings
+esm_tokenizer = EsmTokenizer.from_pretrained(esm)
+esm_model = EsmModel.from_pretrained(esm).to(device)
+chemberta = "DeepChem/ChemBERTa-10M-MTR" # generate ligand embeddings
+chemberta_tokenizer = AutoTokenizer.from_pretrained(chemberta)
+chemberta_model = AutoModel.from_pretrained(chemberta).to(device)
+scaler = joblib.load("scaler.pkl")
+pca = joblib.load("pca.pkl")
+svr = joblib.load("svr_model.pkl")
+def generate_protein_embedding(protein):
+    # Generate FASTA string
+    mol = Chem.MolFromPDBFile(protein)
+    if not mol:
+        print("Could not convert file to protein molecule")
+        return None
+    fasta = Chem.MolToFASTA(mol).splitlines()[1]
+    # Generate protein embedding
+    esm_model.eval()
+    with torch.no_grad():
+        inputs = esm_tokenizer(fasta, return_tensors="pt", padding=True, truncation=True, max_length=1024).to(device)
+        outputs = esm_model(**inputs)
+        embedding = outputs.last_hidden_state.mean(dim=1).cpu().numpy()  # Extract last layer and mean pooling
+    return embedding
+def generate_ligand_embedding(ligand):
+    # Generate SMILES string
+    mol = Chem.MolFromMol2File(ligand)
+    if not mol:
+        print("Could not convert file to ligand molecule")
+        return None
+    smiles = Chem.MolToSmiles(mol)
+    # Generate ligand embedding
+    chemberta_model.eval()
+    with torch.no_grad():
+        inputs = chemberta_tokenizer(smiles, return_tensors="pt", padding=True, truncation=True).to(device)
+        outputs = chemberta_model(**inputs)
+        embedding = outputs.last_hidden_state.mean(dim=1).cpu().numpy()
+    return embedding
+def value_conversion(logKa):
+    logKd = logKa * -1
+    Kd = 10 ** (logKd)
+    if Kd >= 1e-3:
+        return f"{Kd * 1e3:.4f} mM"  # Millimolar
+    elif Kd >= 1e-6:
+        return f"{Kd * 1e6:.4f} µM"  # Micromolar
+    elif Kd >= 1e-9:
+        return f"{Kd * 1e9:.4f} nM"  # Nanomolar
+    else:
+        return f"{Kd * 1e12:.4f} pM"  # Picomolar
+def predict_affinity(protein_file, ligand_file):
+    protein = generate_protein_embedding(protein_file)
+    ligand = generate_ligand_embedding(ligand_file)
+    if protein is None:
+        return "Unable to parse protein .pdb file"
+    if ligand is None:
+        return "Unable to parse ligand .pdb file"
+    embedding = np.concatenate((protein, ligand), axis=1)
+    # Apply scaling and PCA
+    svr_input = scaler.transform(embedding)
+    svr_input = pca.transform(svr_input)
+    # Predict the log binding affinity
+    log_prediction = svr.predict(svr_input)[0]
+    affinity_value = value_conversion(log_prediction)
+    return f"Predicted Binding Affinity:\nlogKa = {log_prediction:.4f}\nKd = {affinity_value}"
+# Gradio interface
+iface = gr.Interface(
+    fn=predict_affinity,
+    inputs=[gr.File(label="Protein .pdb file"), gr.File(label="Ligand .mol2 file")],
+    outputs="text",
+    title="Predict Protein-Ligand Binding Affinity",
+    description="Upload the protein and ligand files to predict the binding affinity of the protein-ligand complex.",
+)
+# Run Gradio App
+if __name__ == "__main__":
+    iface.launch()