vvelda commited on
Commit
7eb3224
·
verified ·
1 Parent(s): 3068eb3

Switching between different models + examples

Browse files
Files changed (5) hide show
  1. app.py +37 -43
  2. models/DMS.pth +3 -0
  3. models/LGK-dupl.pth +3 -0
  4. models/TEM (bypos).pth +3 -0
  5. wrapper.py +31 -11
app.py CHANGED
@@ -1,54 +1,32 @@
1
  import gradio as gr
2
  from Bio.PDB.PDBParser import PDBParser
3
- from Bio.PDB.Polypeptide import is_aa, three_to_one
4
 
5
  from wrapper import *
6
 
7
  parser = PDBParser(PERMISSIVE=1)
8
 
9
- # --- Helper functions from wrapper.py ---
10
- def parse_amino_acid(value):
11
- value = value.upper()
12
- if len(value) == 1:
13
- try:
14
- value = AA.one_to_three(value)
15
- except Exception:
16
- pass
17
- if not AA.is_aa(value):
18
- raise ValueError(f"'{value}' is not a valid amino acid")
19
- return AA.three_to_one(value.upper())
20
-
21
- def predict_solubility(pdb_code, chain, orig, loc, mut, version=None):
22
- from code.predictor import EnsemblePredictor # expensive import left for after the argument check
23
  try:
24
-
25
  pdb_code, pdb_path = Type_PDB(pdb_code)
26
- orig_list = parseList(orig, parse_amino_acid)
27
- loc_list = parseList(loc, Type_Index)
28
- mut_list = parseList(mut, parse_amino_acid)
29
 
30
  if len(loc_list) != len(mut_list):
31
  if len(mut_list) == 1:
32
  mut_list *= len(loc_list)
33
  else:
34
- return "Error: Inconsistent multi-point mutant specification"
35
-
36
- predictor = EnsemblePredictor(version=version)
37
- assessment, prediction = predictor.predict_change(pdb_path, chain, orig_list, loc_list, mut_list)
38
- assessment_str = {'+': 'solubilizing', 'N': 'neutral', '-': 'desolubilizing'}[assessment]
39
-
40
- # ternary gradient orange-black-blue
41
- ORANGE = (255, 165, 0)
42
- BLUESH = (100, 100, 255)
43
- color = tuple((o*max(0, 1-prediction*2) + b*max(0, (2*prediction)**2-1) for o, b in zip(ORANGE, BLUESH)))
44
-
45
- return f"Predicted solubility change: {prediction:.3f} <span style='color: rgb{color}'>({assessment_str})</span>"
46
  except Exception as e:
47
  return f"Error: {str(e)}"
48
 
49
  _pdb_code = None
50
  _chains = None
51
- def validate_PDB(pdb_code):
52
  global _pdb_code, _chains
53
 
54
  if pdb_code != _pdb_code:
@@ -59,10 +37,14 @@ def validate_PDB(pdb_code):
59
  _chains = [ch.id for ch in structure[0]]
60
  return gr.update(choices=_chains, value=_chains[0])
61
  except Exception as e:
62
- # raise argparse.ArgumentTypeError(e)
63
  raise gr.Error(str(e))
64
  return gr.update(choices=_chains)
65
 
 
 
 
 
 
66
 
67
  # --- Gradio Interface ---
68
  with gr.Blocks(
@@ -82,7 +64,6 @@ with gr.Blocks(
82
  placeholder="1EER",
83
  max_length=4 # 12 # new PDB identifier has a shape of: pdb_00001abc https://proteopedia.org/w/PDB_code
84
  )
85
- # chain = gr.Radio(label="Chain", value="A", choices=["A", "B", "C"], max_length=1, max_lines=1, scale=0)
86
  chain = gr.Radio(choices=[], label="Chain", scale=1)
87
 
88
  with gr.Row():
@@ -90,21 +71,20 @@ with gr.Blocks(
90
  orig = gr.Textbox(label="Wild-type residue(s)", placeholder="F,R", scale=0)
91
  mut = gr.Textbox(label="Mutant residue(s)", placeholder="D[,A]", scale=0)
92
 
93
- # with gr.Row():
94
- # verbose = gr.Checkbox(label="Verbose Output")
95
- # version = gr.Textbox(label="Model Version (optional)", placeholder="v1.0")
96
-
97
  output = gr.HTML()
98
 
99
  _pdb_code = None
100
- pdb_code.blur(fn=validate_PDB, inputs=[pdb_code], outputs=chain)
101
- pdb_code.submit(fn=validate_PDB, inputs=[pdb_code], outputs=chain)
102
 
103
  predict_btn = gr.Button("Predict solubility effect", variant='primary', size='lg', scale=0)
104
  # predict_btn.style(full_width=False)
105
  dict_submit = {
106
  'fn': predict_solubility,
107
- 'inputs': [pdb_code, chain, orig, loc, mut],
108
  'outputs': [output]
109
  }
110
  # submit by entering in the text boxes or by the submit button
@@ -112,7 +92,21 @@ with gr.Blocks(
112
  orig.submit(**dict_submit)
113
  mut.submit(**dict_submit)
114
  predict_btn.click(**dict_submit)
115
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  gr.Markdown(value="""
117
  <br/>
118
 
@@ -120,6 +114,6 @@ with gr.Blocks(
120
  > Velecký, J., Faldynová H., Hermosilla, P., Sandlerová, N., Dörr, M., Egersdorfová, S., Bornscheuer, U., Prokop, Z., Damborský, J., Mazurenko, S., 2025:
121
  > SoluProtMut: Siamese Deep Learning for Solubility Effect Prediction in Protein Mutations and Experimental Validation.
122
  > *In preparation.*
123
- """)
124
 
125
  demo.launch()
 
1
  import gradio as gr
2
  from Bio.PDB.PDBParser import PDBParser
3
+ # from Bio.PDB.Polypeptide import is_aa, three_to_one
4
 
5
  from wrapper import *
6
 
7
  parser = PDBParser(PERMISSIVE=1)
8
 
9
+ def predict_solubility(pdb_code, chain, orig, loc, mut, model, version=None):
10
+ global MODELS
 
 
 
 
 
 
 
 
 
 
 
 
11
  try:
 
12
  pdb_code, pdb_path = Type_PDB(pdb_code)
13
+ loc_list = parseList(loc, Type_index)
14
+ orig_list = parseList(orig, Type_aminoAcid)
15
+ mut_list = parseList(mut, Type_aminoAcid)
16
 
17
  if len(loc_list) != len(mut_list):
18
  if len(mut_list) == 1:
19
  mut_list *= len(loc_list)
20
  else:
21
+ raise gr.Error("Inconsistent multi-point mutant specification")
22
+
23
+ return predict(pdb_path, chain, orig_list, loc_list, mut_list, weights=MODELS[model], rich_output=True)
 
 
 
 
 
 
 
 
 
24
  except Exception as e:
25
  return f"Error: {str(e)}"
26
 
27
  _pdb_code = None
28
  _chains = None
29
+ def get_chains(pdb_code): #
30
  global _pdb_code, _chains
31
 
32
  if pdb_code != _pdb_code:
 
37
  _chains = [ch.id for ch in structure[0]]
38
  return gr.update(choices=_chains, value=_chains[0])
39
  except Exception as e:
 
40
  raise gr.Error(str(e))
41
  return gr.update(choices=_chains)
42
 
43
+ from pathlib import Path
44
+ MODELS = list((Path(__file__).parent / "models").iterdir())
45
+
46
+
47
+
48
 
49
  # --- Gradio Interface ---
50
  with gr.Blocks(
 
64
  placeholder="1EER",
65
  max_length=4 # 12 # new PDB identifier has a shape of: pdb_00001abc https://proteopedia.org/w/PDB_code
66
  )
 
67
  chain = gr.Radio(choices=[], label="Chain", scale=1)
68
 
69
  with gr.Row():
 
71
  orig = gr.Textbox(label="Wild-type residue(s)", placeholder="F,R", scale=0)
72
  mut = gr.Textbox(label="Mutant residue(s)", placeholder="D[,A]", scale=0)
73
 
74
+ # with gr.Accordion("Model selection"):
75
+ model = gr.Radio(choices=[m.stem for m in MODELS], label="Model selection", type="index", value=MODELS[0].stem)
76
+
 
77
  output = gr.HTML()
78
 
79
  _pdb_code = None
80
+ pdb_code.blur(fn=get_chains, inputs=[pdb_code], outputs=chain)
81
+ pdb_code.submit(fn=get_chains, inputs=[pdb_code], outputs=chain)
82
 
83
  predict_btn = gr.Button("Predict solubility effect", variant='primary', size='lg', scale=0)
84
  # predict_btn.style(full_width=False)
85
  dict_submit = {
86
  'fn': predict_solubility,
87
+ 'inputs': [pdb_code, chain, orig, loc, mut, model],
88
  'outputs': [output]
89
  }
90
  # submit by entering in the text boxes or by the submit button
 
92
  orig.submit(**dict_submit)
93
  mut.submit(**dict_submit)
94
  predict_btn.click(**dict_submit)
95
+
96
+ examples = gr.Examples(
97
+ examples=[
98
+ ["1EER", "48,150", "F,R", "D"],
99
+ ["1EER", "13", "E", "K"],
100
+ # ["3QIB", "A,B,P,C,D", "YP7F,TP12S;YP7F;TP12S"],
101
+ # ["1KNE", "A,P", ';'.join([f"TP6{a}" for a in AMINO_ACID_CODES_1])]
102
+ ],
103
+ # example_labels = ["1EER F48D,R150D"],
104
+ inputs=[pdb_code, loc, orig, mut],
105
+ label="Examples (click on a line to pre-fill the inputs)",
106
+ cache_examples=False
107
+ )
108
+ examples.load_input_event.then(fn=get_chains, inputs=pdb_code, outputs=chain)
109
+
110
  gr.Markdown(value="""
111
  <br/>
112
 
 
114
  > Velecký, J., Faldynová H., Hermosilla, P., Sandlerová, N., Dörr, M., Egersdorfová, S., Bornscheuer, U., Prokop, Z., Damborský, J., Mazurenko, S., 2025:
115
  > SoluProtMut: Siamese Deep Learning for Solubility Effect Prediction in Protein Mutations and Experimental Validation.
116
  > *In preparation.*
117
+ """)
118
 
119
  demo.launch()
models/DMS.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:318cb2e71583b8791f55dc7714baf08700934b8f3926d1a1b18d5b8dc10f11a1
3
+ size 53058100
models/LGK-dupl.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a961ce6fcce281000b8b81bcbfc92b8cfacd986717f5f852f07307783f8e1f37
3
+ size 53058100
models/TEM (bypos).pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0278374bfd523e2ee1f313a4b9a6719f2876131f1cf0e96154845c02ea57c67e
3
+ size 53058100
wrapper.py CHANGED
@@ -2,17 +2,18 @@
2
  #---------------------------------------------------------------------
3
  #--- Predictor of a protein solubility change given a mutation ---
4
  #--- by Jan Velecky velda@mail.muni.cz ---
5
- #--- Loschmidt Laboratories, 2023-24 ---
6
  #--- example use: python3 wrapper.py -h ---
7
  #---------------------------------------------------------------------
8
  import argparse
9
  from functools import partial
10
 
11
  import Bio.PDB.Polypeptide as AA
 
12
  from code.data_preprocessing import get_PDB
13
 
14
  # ---------------------------------------- DATA TYPES FOR ARGPARSE ----------------------------------------
15
- def Type_Index(value):
16
  try:
17
  if not value[-1].isdigit(): # insertion code
18
  ivalue = int(value[:-1])
@@ -52,7 +53,6 @@ def Type_PDB(pdb_code):
52
  raise argparse.ArgumentTypeError(e)
53
  return pdb_code, pdb_path
54
 
55
-
56
  # ----------------------------------------------END OF DATA TYPES ------------------------------------------
57
 
58
  argParser = argparse.ArgumentParser(add_help = True,
@@ -91,7 +91,7 @@ argParser.add_argument(
91
  argParser.add_argument(
92
  'loc',
93
  metavar='location',
94
- type=Type_listOf(int),
95
  help="mutated position(s) integer[n]"
96
  )
97
  argParser.add_argument(
@@ -108,6 +108,32 @@ argParser.add_argument(
108
 
109
  argParser.add_argument('--ver', default=None, help=argparse.SUPPRESS)
110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
 
113
 
@@ -131,11 +157,5 @@ if __name__ == '__main__':
131
  # modeling.VERBOSE_LEVEL = modeling.VERBOSE_VERBOSE
132
 
133
  # ------------------------------------- PREPROCESSING & INFERENCE --------------------------------------
134
- from code.predictor import EnsemblePredictor # expensive import left for after the argument check
135
-
136
- pred_model = EnsemblePredictor(version=args.ver)
137
-
138
- assesment, prediction = pred_model.predict_change(pdb_path, chain, args.orig, args.loc, args.mut)
139
- assesment = {'+': 'solubilizing', 'N': 'neutral', '-': 'desolubilizing'}[assesment]
140
  print()
141
- print("Predicted solubility change: %g (%s)" % (prediction, assesment))
 
2
  #---------------------------------------------------------------------
3
  #--- Predictor of a protein solubility change given a mutation ---
4
  #--- by Jan Velecky velda@mail.muni.cz ---
5
+ #--- Loschmidt Laboratories, 2023-25 ---
6
  #--- example use: python3 wrapper.py -h ---
7
  #---------------------------------------------------------------------
8
  import argparse
9
  from functools import partial
10
 
11
  import Bio.PDB.Polypeptide as AA
12
+
13
  from code.data_preprocessing import get_PDB
14
 
15
  # ---------------------------------------- DATA TYPES FOR ARGPARSE ----------------------------------------
16
+ def Type_index(value): # test 1UCY 1A
17
  try:
18
  if not value[-1].isdigit(): # insertion code
19
  ivalue = int(value[:-1])
 
53
  raise argparse.ArgumentTypeError(e)
54
  return pdb_code, pdb_path
55
 
 
56
  # ----------------------------------------------END OF DATA TYPES ------------------------------------------
57
 
58
  argParser = argparse.ArgumentParser(add_help = True,
 
91
  argParser.add_argument(
92
  'loc',
93
  metavar='location',
94
+ type=Type_listOf(Type_index),
95
  help="mutated position(s) integer[n]"
96
  )
97
  argParser.add_argument(
 
108
 
109
  argParser.add_argument('--ver', default=None, help=argparse.SUPPRESS)
110
 
111
+ # ---------------------------------------------- DATA PREDICTION ------------------------------------------
112
+
113
+ def check_input(args):
114
+ pass
115
+
116
+ def predict(pdb_path, chain, orig, loc, mut, weights=None, version=None, rich_output=False):
117
+ from code.predictor import EnsemblePredictor # expensive import left for after the argument check
118
+ pred_model = EnsemblePredictor(weights=weights, version=version)
119
+
120
+ assesment, prediction = pred_model.predict_change(pdb_path, chain, orig, loc, mut)
121
+ assesment = {'+': 'solubilizing', 'N': 'neutral', '-': 'desolubilizing'}[assesment]
122
+
123
+ print(weights)
124
+
125
+ if rich_output:
126
+ # ternary gradient orange-black-blue
127
+ ORANGE = (255, 165, 0)
128
+ BLUESH = (100, 100, 255)
129
+ color = tuple((o*max(0, 1-prediction*2) + b*max(0, (2*prediction)**2-1) for o, b in zip(ORANGE, BLUESH)))
130
+
131
+ return f"Predicted solubility change: {prediction:.3f} <span style='color: rgb{color}'>({assesment})</span>"
132
+ else:
133
+ return "Predicted solubility change: %g (%s)" % (prediction, assesment)
134
+
135
+
136
+
137
 
138
 
139
 
 
157
  # modeling.VERBOSE_LEVEL = modeling.VERBOSE_VERBOSE
158
 
159
  # ------------------------------------- PREPROCESSING & INFERENCE --------------------------------------
 
 
 
 
 
 
160
  print()
161
+ print(predict(pdb_path, chain, args.orig, args.loc, args.mut))