Spaces:
Build error
Build error
Switching between different models + examples
Browse files- app.py +37 -43
- models/DMS.pth +3 -0
- models/LGK-dupl.pth +3 -0
- models/TEM (bypos).pth +3 -0
- wrapper.py +31 -11
app.py
CHANGED
|
@@ -1,54 +1,32 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
from Bio.PDB.PDBParser import PDBParser
|
| 3 |
-
from Bio.PDB.Polypeptide import is_aa, three_to_one
|
| 4 |
|
| 5 |
from wrapper import *
|
| 6 |
|
| 7 |
parser = PDBParser(PERMISSIVE=1)
|
| 8 |
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
value = value.upper()
|
| 12 |
-
if len(value) == 1:
|
| 13 |
-
try:
|
| 14 |
-
value = AA.one_to_three(value)
|
| 15 |
-
except Exception:
|
| 16 |
-
pass
|
| 17 |
-
if not AA.is_aa(value):
|
| 18 |
-
raise ValueError(f"'{value}' is not a valid amino acid")
|
| 19 |
-
return AA.three_to_one(value.upper())
|
| 20 |
-
|
| 21 |
-
def predict_solubility(pdb_code, chain, orig, loc, mut, version=None):
|
| 22 |
-
from code.predictor import EnsemblePredictor # expensive import left for after the argument check
|
| 23 |
try:
|
| 24 |
-
|
| 25 |
pdb_code, pdb_path = Type_PDB(pdb_code)
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
mut_list = parseList(mut,
|
| 29 |
|
| 30 |
if len(loc_list) != len(mut_list):
|
| 31 |
if len(mut_list) == 1:
|
| 32 |
mut_list *= len(loc_list)
|
| 33 |
else:
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
assessment, prediction = predictor.predict_change(pdb_path, chain, orig_list, loc_list, mut_list)
|
| 38 |
-
assessment_str = {'+': 'solubilizing', 'N': 'neutral', '-': 'desolubilizing'}[assessment]
|
| 39 |
-
|
| 40 |
-
# ternary gradient orange-black-blue
|
| 41 |
-
ORANGE = (255, 165, 0)
|
| 42 |
-
BLUESH = (100, 100, 255)
|
| 43 |
-
color = tuple((o*max(0, 1-prediction*2) + b*max(0, (2*prediction)**2-1) for o, b in zip(ORANGE, BLUESH)))
|
| 44 |
-
|
| 45 |
-
return f"Predicted solubility change: {prediction:.3f} <span style='color: rgb{color}'>({assessment_str})</span>"
|
| 46 |
except Exception as e:
|
| 47 |
return f"Error: {str(e)}"
|
| 48 |
|
| 49 |
_pdb_code = None
|
| 50 |
_chains = None
|
| 51 |
-
def
|
| 52 |
global _pdb_code, _chains
|
| 53 |
|
| 54 |
if pdb_code != _pdb_code:
|
|
@@ -59,10 +37,14 @@ def validate_PDB(pdb_code):
|
|
| 59 |
_chains = [ch.id for ch in structure[0]]
|
| 60 |
return gr.update(choices=_chains, value=_chains[0])
|
| 61 |
except Exception as e:
|
| 62 |
-
# raise argparse.ArgumentTypeError(e)
|
| 63 |
raise gr.Error(str(e))
|
| 64 |
return gr.update(choices=_chains)
|
| 65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
# --- Gradio Interface ---
|
| 68 |
with gr.Blocks(
|
|
@@ -82,7 +64,6 @@ with gr.Blocks(
|
|
| 82 |
placeholder="1EER",
|
| 83 |
max_length=4 # 12 # new PDB identifier has a shape of: pdb_00001abc https://proteopedia.org/w/PDB_code
|
| 84 |
)
|
| 85 |
-
# chain = gr.Radio(label="Chain", value="A", choices=["A", "B", "C"], max_length=1, max_lines=1, scale=0)
|
| 86 |
chain = gr.Radio(choices=[], label="Chain", scale=1)
|
| 87 |
|
| 88 |
with gr.Row():
|
|
@@ -90,21 +71,20 @@ with gr.Blocks(
|
|
| 90 |
orig = gr.Textbox(label="Wild-type residue(s)", placeholder="F,R", scale=0)
|
| 91 |
mut = gr.Textbox(label="Mutant residue(s)", placeholder="D[,A]", scale=0)
|
| 92 |
|
| 93 |
-
# with gr.
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
output = gr.HTML()
|
| 98 |
|
| 99 |
_pdb_code = None
|
| 100 |
-
pdb_code.blur(fn=
|
| 101 |
-
pdb_code.submit(fn=
|
| 102 |
|
| 103 |
predict_btn = gr.Button("Predict solubility effect", variant='primary', size='lg', scale=0)
|
| 104 |
# predict_btn.style(full_width=False)
|
| 105 |
dict_submit = {
|
| 106 |
'fn': predict_solubility,
|
| 107 |
-
'inputs': [pdb_code, chain, orig, loc, mut],
|
| 108 |
'outputs': [output]
|
| 109 |
}
|
| 110 |
# submit by entering in the text boxes or by the submit button
|
|
@@ -112,7 +92,21 @@ with gr.Blocks(
|
|
| 112 |
orig.submit(**dict_submit)
|
| 113 |
mut.submit(**dict_submit)
|
| 114 |
predict_btn.click(**dict_submit)
|
| 115 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
gr.Markdown(value="""
|
| 117 |
<br/>
|
| 118 |
|
|
@@ -120,6 +114,6 @@ with gr.Blocks(
|
|
| 120 |
> Velecký, J., Faldynová H., Hermosilla, P., Sandlerová, N., Dörr, M., Egersdorfová, S., Bornscheuer, U., Prokop, Z., Damborský, J., Mazurenko, S., 2025:
|
| 121 |
> SoluProtMut: Siamese Deep Learning for Solubility Effect Prediction in Protein Mutations and Experimental Validation.
|
| 122 |
> *In preparation.*
|
| 123 |
-
|
| 124 |
|
| 125 |
demo.launch()
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from Bio.PDB.PDBParser import PDBParser
|
| 3 |
+
# from Bio.PDB.Polypeptide import is_aa, three_to_one
|
| 4 |
|
| 5 |
from wrapper import *
|
| 6 |
|
| 7 |
parser = PDBParser(PERMISSIVE=1)
|
| 8 |
|
| 9 |
+
def predict_solubility(pdb_code, chain, orig, loc, mut, model, version=None):
|
| 10 |
+
global MODELS
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
try:
|
|
|
|
| 12 |
pdb_code, pdb_path = Type_PDB(pdb_code)
|
| 13 |
+
loc_list = parseList(loc, Type_index)
|
| 14 |
+
orig_list = parseList(orig, Type_aminoAcid)
|
| 15 |
+
mut_list = parseList(mut, Type_aminoAcid)
|
| 16 |
|
| 17 |
if len(loc_list) != len(mut_list):
|
| 18 |
if len(mut_list) == 1:
|
| 19 |
mut_list *= len(loc_list)
|
| 20 |
else:
|
| 21 |
+
raise gr.Error("Inconsistent multi-point mutant specification")
|
| 22 |
+
|
| 23 |
+
return predict(pdb_path, chain, orig_list, loc_list, mut_list, weights=MODELS[model], rich_output=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
except Exception as e:
|
| 25 |
return f"Error: {str(e)}"
|
| 26 |
|
| 27 |
_pdb_code = None
|
| 28 |
_chains = None
|
| 29 |
+
def get_chains(pdb_code): #
|
| 30 |
global _pdb_code, _chains
|
| 31 |
|
| 32 |
if pdb_code != _pdb_code:
|
|
|
|
| 37 |
_chains = [ch.id for ch in structure[0]]
|
| 38 |
return gr.update(choices=_chains, value=_chains[0])
|
| 39 |
except Exception as e:
|
|
|
|
| 40 |
raise gr.Error(str(e))
|
| 41 |
return gr.update(choices=_chains)
|
| 42 |
|
| 43 |
+
from pathlib import Path
|
| 44 |
+
MODELS = list((Path(__file__).parent / "models").iterdir())
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
|
| 48 |
|
| 49 |
# --- Gradio Interface ---
|
| 50 |
with gr.Blocks(
|
|
|
|
| 64 |
placeholder="1EER",
|
| 65 |
max_length=4 # 12 # new PDB identifier has a shape of: pdb_00001abc https://proteopedia.org/w/PDB_code
|
| 66 |
)
|
|
|
|
| 67 |
chain = gr.Radio(choices=[], label="Chain", scale=1)
|
| 68 |
|
| 69 |
with gr.Row():
|
|
|
|
| 71 |
orig = gr.Textbox(label="Wild-type residue(s)", placeholder="F,R", scale=0)
|
| 72 |
mut = gr.Textbox(label="Mutant residue(s)", placeholder="D[,A]", scale=0)
|
| 73 |
|
| 74 |
+
# with gr.Accordion("Model selection"):
|
| 75 |
+
model = gr.Radio(choices=[m.stem for m in MODELS], label="Model selection", type="index", value=MODELS[0].stem)
|
| 76 |
+
|
|
|
|
| 77 |
output = gr.HTML()
|
| 78 |
|
| 79 |
_pdb_code = None
|
| 80 |
+
pdb_code.blur(fn=get_chains, inputs=[pdb_code], outputs=chain)
|
| 81 |
+
pdb_code.submit(fn=get_chains, inputs=[pdb_code], outputs=chain)
|
| 82 |
|
| 83 |
predict_btn = gr.Button("Predict solubility effect", variant='primary', size='lg', scale=0)
|
| 84 |
# predict_btn.style(full_width=False)
|
| 85 |
dict_submit = {
|
| 86 |
'fn': predict_solubility,
|
| 87 |
+
'inputs': [pdb_code, chain, orig, loc, mut, model],
|
| 88 |
'outputs': [output]
|
| 89 |
}
|
| 90 |
# submit by entering in the text boxes or by the submit button
|
|
|
|
| 92 |
orig.submit(**dict_submit)
|
| 93 |
mut.submit(**dict_submit)
|
| 94 |
predict_btn.click(**dict_submit)
|
| 95 |
+
|
| 96 |
+
examples = gr.Examples(
|
| 97 |
+
examples=[
|
| 98 |
+
["1EER", "48,150", "F,R", "D"],
|
| 99 |
+
["1EER", "13", "E", "K"],
|
| 100 |
+
# ["3QIB", "A,B,P,C,D", "YP7F,TP12S;YP7F;TP12S"],
|
| 101 |
+
# ["1KNE", "A,P", ';'.join([f"TP6{a}" for a in AMINO_ACID_CODES_1])]
|
| 102 |
+
],
|
| 103 |
+
# example_labels = ["1EER F48D,R150D"],
|
| 104 |
+
inputs=[pdb_code, loc, orig, mut],
|
| 105 |
+
label="Examples (click on a line to pre-fill the inputs)",
|
| 106 |
+
cache_examples=False
|
| 107 |
+
)
|
| 108 |
+
examples.load_input_event.then(fn=get_chains, inputs=pdb_code, outputs=chain)
|
| 109 |
+
|
| 110 |
gr.Markdown(value="""
|
| 111 |
<br/>
|
| 112 |
|
|
|
|
| 114 |
> Velecký, J., Faldynová H., Hermosilla, P., Sandlerová, N., Dörr, M., Egersdorfová, S., Bornscheuer, U., Prokop, Z., Damborský, J., Mazurenko, S., 2025:
|
| 115 |
> SoluProtMut: Siamese Deep Learning for Solubility Effect Prediction in Protein Mutations and Experimental Validation.
|
| 116 |
> *In preparation.*
|
| 117 |
+
""")
|
| 118 |
|
| 119 |
demo.launch()
|
models/DMS.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:318cb2e71583b8791f55dc7714baf08700934b8f3926d1a1b18d5b8dc10f11a1
|
| 3 |
+
size 53058100
|
models/LGK-dupl.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a961ce6fcce281000b8b81bcbfc92b8cfacd986717f5f852f07307783f8e1f37
|
| 3 |
+
size 53058100
|
models/TEM (bypos).pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0278374bfd523e2ee1f313a4b9a6719f2876131f1cf0e96154845c02ea57c67e
|
| 3 |
+
size 53058100
|
wrapper.py
CHANGED
|
@@ -2,17 +2,18 @@
|
|
| 2 |
#---------------------------------------------------------------------
|
| 3 |
#--- Predictor of a protein solubility change given a mutation ---
|
| 4 |
#--- by Jan Velecky velda@mail.muni.cz ---
|
| 5 |
-
#--- Loschmidt Laboratories, 2023-
|
| 6 |
#--- example use: python3 wrapper.py -h ---
|
| 7 |
#---------------------------------------------------------------------
|
| 8 |
import argparse
|
| 9 |
from functools import partial
|
| 10 |
|
| 11 |
import Bio.PDB.Polypeptide as AA
|
|
|
|
| 12 |
from code.data_preprocessing import get_PDB
|
| 13 |
|
| 14 |
# ---------------------------------------- DATA TYPES FOR ARGPARSE ----------------------------------------
|
| 15 |
-
def
|
| 16 |
try:
|
| 17 |
if not value[-1].isdigit(): # insertion code
|
| 18 |
ivalue = int(value[:-1])
|
|
@@ -52,7 +53,6 @@ def Type_PDB(pdb_code):
|
|
| 52 |
raise argparse.ArgumentTypeError(e)
|
| 53 |
return pdb_code, pdb_path
|
| 54 |
|
| 55 |
-
|
| 56 |
# ----------------------------------------------END OF DATA TYPES ------------------------------------------
|
| 57 |
|
| 58 |
argParser = argparse.ArgumentParser(add_help = True,
|
|
@@ -91,7 +91,7 @@ argParser.add_argument(
|
|
| 91 |
argParser.add_argument(
|
| 92 |
'loc',
|
| 93 |
metavar='location',
|
| 94 |
-
type=Type_listOf(
|
| 95 |
help="mutated position(s) integer[n]"
|
| 96 |
)
|
| 97 |
argParser.add_argument(
|
|
@@ -108,6 +108,32 @@ argParser.add_argument(
|
|
| 108 |
|
| 109 |
argParser.add_argument('--ver', default=None, help=argparse.SUPPRESS)
|
| 110 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
|
| 112 |
|
| 113 |
|
|
@@ -131,11 +157,5 @@ if __name__ == '__main__':
|
|
| 131 |
# modeling.VERBOSE_LEVEL = modeling.VERBOSE_VERBOSE
|
| 132 |
|
| 133 |
# ------------------------------------- PREPROCESSING & INFERENCE --------------------------------------
|
| 134 |
-
from code.predictor import EnsemblePredictor # expensive import left for after the argument check
|
| 135 |
-
|
| 136 |
-
pred_model = EnsemblePredictor(version=args.ver)
|
| 137 |
-
|
| 138 |
-
assesment, prediction = pred_model.predict_change(pdb_path, chain, args.orig, args.loc, args.mut)
|
| 139 |
-
assesment = {'+': 'solubilizing', 'N': 'neutral', '-': 'desolubilizing'}[assesment]
|
| 140 |
print()
|
| 141 |
-
print(
|
|
|
|
| 2 |
#---------------------------------------------------------------------
|
| 3 |
#--- Predictor of a protein solubility change given a mutation ---
|
| 4 |
#--- by Jan Velecky velda@mail.muni.cz ---
|
| 5 |
+
#--- Loschmidt Laboratories, 2023-25 ---
|
| 6 |
#--- example use: python3 wrapper.py -h ---
|
| 7 |
#---------------------------------------------------------------------
|
| 8 |
import argparse
|
| 9 |
from functools import partial
|
| 10 |
|
| 11 |
import Bio.PDB.Polypeptide as AA
|
| 12 |
+
|
| 13 |
from code.data_preprocessing import get_PDB
|
| 14 |
|
| 15 |
# ---------------------------------------- DATA TYPES FOR ARGPARSE ----------------------------------------
|
| 16 |
+
def Type_index(value): # test 1UCY 1A
|
| 17 |
try:
|
| 18 |
if not value[-1].isdigit(): # insertion code
|
| 19 |
ivalue = int(value[:-1])
|
|
|
|
| 53 |
raise argparse.ArgumentTypeError(e)
|
| 54 |
return pdb_code, pdb_path
|
| 55 |
|
|
|
|
| 56 |
# ----------------------------------------------END OF DATA TYPES ------------------------------------------
|
| 57 |
|
| 58 |
argParser = argparse.ArgumentParser(add_help = True,
|
|
|
|
| 91 |
argParser.add_argument(
|
| 92 |
'loc',
|
| 93 |
metavar='location',
|
| 94 |
+
type=Type_listOf(Type_index),
|
| 95 |
help="mutated position(s) integer[n]"
|
| 96 |
)
|
| 97 |
argParser.add_argument(
|
|
|
|
| 108 |
|
| 109 |
argParser.add_argument('--ver', default=None, help=argparse.SUPPRESS)
|
| 110 |
|
| 111 |
+
# ---------------------------------------------- DATA PREDICTION ------------------------------------------
|
| 112 |
+
|
| 113 |
+
def check_input(args):
|
| 114 |
+
pass
|
| 115 |
+
|
| 116 |
+
def predict(pdb_path, chain, orig, loc, mut, weights=None, version=None, rich_output=False):
|
| 117 |
+
from code.predictor import EnsemblePredictor # expensive import left for after the argument check
|
| 118 |
+
pred_model = EnsemblePredictor(weights=weights, version=version)
|
| 119 |
+
|
| 120 |
+
assesment, prediction = pred_model.predict_change(pdb_path, chain, orig, loc, mut)
|
| 121 |
+
assesment = {'+': 'solubilizing', 'N': 'neutral', '-': 'desolubilizing'}[assesment]
|
| 122 |
+
|
| 123 |
+
print(weights)
|
| 124 |
+
|
| 125 |
+
if rich_output:
|
| 126 |
+
# ternary gradient orange-black-blue
|
| 127 |
+
ORANGE = (255, 165, 0)
|
| 128 |
+
BLUESH = (100, 100, 255)
|
| 129 |
+
color = tuple((o*max(0, 1-prediction*2) + b*max(0, (2*prediction)**2-1) for o, b in zip(ORANGE, BLUESH)))
|
| 130 |
+
|
| 131 |
+
return f"Predicted solubility change: {prediction:.3f} <span style='color: rgb{color}'>({assesment})</span>"
|
| 132 |
+
else:
|
| 133 |
+
return "Predicted solubility change: %g (%s)" % (prediction, assesment)
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
|
| 137 |
|
| 138 |
|
| 139 |
|
|
|
|
| 157 |
# modeling.VERBOSE_LEVEL = modeling.VERBOSE_VERBOSE
|
| 158 |
|
| 159 |
# ------------------------------------- PREPROCESSING & INFERENCE --------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
print()
|
| 161 |
+
print(predict(pdb_path, chain, args.orig, args.loc, args.mut))
|