vvelda commited on
Commit
eb602a3
·
verified ·
1 Parent(s): 7eb3224

Improvement of the input arguments check

Browse files
Files changed (2) hide show
  1. app.py +7 -11
  2. wrapper.py +21 -9
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import gradio as gr
2
  from Bio.PDB.PDBParser import PDBParser
3
- # from Bio.PDB.Polypeptide import is_aa, three_to_one
4
 
5
  from wrapper import *
6
 
@@ -14,11 +13,7 @@ def predict_solubility(pdb_code, chain, orig, loc, mut, model, version=None):
14
  orig_list = parseList(orig, Type_aminoAcid)
15
  mut_list = parseList(mut, Type_aminoAcid)
16
 
17
- if len(loc_list) != len(mut_list):
18
- if len(mut_list) == 1:
19
- mut_list *= len(loc_list)
20
- else:
21
- raise gr.Error("Inconsistent multi-point mutant specification")
22
 
23
  return predict(pdb_path, chain, orig_list, loc_list, mut_list, weights=MODELS[model], rich_output=True)
24
  except Exception as e:
@@ -26,7 +21,7 @@ def predict_solubility(pdb_code, chain, orig, loc, mut, model, version=None):
26
 
27
  _pdb_code = None
28
  _chains = None
29
- def get_chains(pdb_code): #
30
  global _pdb_code, _chains
31
 
32
  if pdb_code != _pdb_code:
@@ -72,7 +67,9 @@ with gr.Blocks(
72
  mut = gr.Textbox(label="Mutant residue(s)", placeholder="D[,A]", scale=0)
73
 
74
  # with gr.Accordion("Model selection"):
75
- model = gr.Radio(choices=[m.stem for m in MODELS], label="Model selection", type="index", value=MODELS[0].stem)
 
 
76
 
77
  output = gr.HTML()
78
 
@@ -97,8 +94,7 @@ with gr.Blocks(
97
  examples=[
98
  ["1EER", "48,150", "F,R", "D"],
99
  ["1EER", "13", "E", "K"],
100
- # ["3QIB", "A,B,P,C,D", "YP7F,TP12S;YP7F;TP12S"],
101
- # ["1KNE", "A,P", ';'.join([f"TP6{a}" for a in AMINO_ACID_CODES_1])]
102
  ],
103
  # example_labels = ["1EER F48D,R150D"],
104
  inputs=[pdb_code, loc, orig, mut],
@@ -111,7 +107,7 @@ with gr.Blocks(
111
  <br/>
112
 
113
  **Acknowledgement**. Please, use the following citation to acknowledge the use of our tool:
114
- > Velecký, J., Faldynová H., Hermosilla, P., Sandlerová, N., Dörr, M., Egersdorfová, S., Bornscheuer, U., Prokop, Z., Damborský, J., Mazurenko, S., 2025:
115
  > SoluProtMut: Siamese Deep Learning for Solubility Effect Prediction in Protein Mutations and Experimental Validation.
116
  > *In preparation.*
117
  """)
 
1
  import gradio as gr
2
  from Bio.PDB.PDBParser import PDBParser
 
3
 
4
  from wrapper import *
5
 
 
13
  orig_list = parseList(orig, Type_aminoAcid)
14
  mut_list = parseList(mut, Type_aminoAcid)
15
 
16
+ mut_list = check_mutList(loc_list, orig_list, mut_list)
 
 
 
 
17
 
18
  return predict(pdb_path, chain, orig_list, loc_list, mut_list, weights=MODELS[model], rich_output=True)
19
  except Exception as e:
 
21
 
22
  _pdb_code = None
23
  _chains = None
24
+ def get_chains(pdb_code): # display chains stored in the PDB structure
25
  global _pdb_code, _chains
26
 
27
  if pdb_code != _pdb_code:
 
67
  mut = gr.Textbox(label="Mutant residue(s)", placeholder="D[,A]", scale=0)
68
 
69
  # with gr.Accordion("Model selection"):
70
+ model_names = [m.stem for m in MODELS]
71
+ model_names[0] += " (recommended)"
72
+ model = gr.Radio(choices=model_names, label="Model selection", type="index", value=model_names[0])
73
 
74
  output = gr.HTML()
75
 
 
94
  examples=[
95
  ["1EER", "48,150", "F,R", "D"],
96
  ["1EER", "13", "E", "K"],
97
+ # ["1z0q", "19", "F", "A"],
 
98
  ],
99
  # example_labels = ["1EER F48D,R150D"],
100
  inputs=[pdb_code, loc, orig, mut],
 
107
  <br/>
108
 
109
  **Acknowledgement**. Please, use the following citation to acknowledge the use of our tool:
110
+ > Velecký, J., Faldynová H., Hermosilla, P., Sandlerová, N., Doerr, M., Egersdorfová, S., Bornscheuer, U., Prokop, Z., Damborský, J., Mazurenko, S., 2025:
111
  > SoluProtMut: Siamese Deep Learning for Solubility Effect Prediction in Protein Mutations and Experimental Validation.
112
  > *In preparation.*
113
  """)
wrapper.py CHANGED
@@ -53,6 +53,15 @@ def Type_PDB(pdb_code):
53
  raise argparse.ArgumentTypeError(e)
54
  return pdb_code, pdb_path
55
 
 
 
 
 
 
 
 
 
 
56
  # ----------------------------------------------END OF DATA TYPES ------------------------------------------
57
 
58
  argParser = argparse.ArgumentParser(add_help = True,
@@ -117,8 +126,12 @@ def predict(pdb_path, chain, orig, loc, mut, weights=None, version=None, rich_ou
117
  from code.predictor import EnsemblePredictor # expensive import left for after the argument check
118
  pred_model = EnsemblePredictor(weights=weights, version=version)
119
 
120
- assesment, prediction = pred_model.predict_change(pdb_path, chain, orig, loc, mut)
121
- assesment = {'+': 'solubilizing', 'N': 'neutral', '-': 'desolubilizing'}[assesment]
 
 
 
 
122
 
123
  print(weights)
124
 
@@ -128,9 +141,9 @@ def predict(pdb_path, chain, orig, loc, mut, weights=None, version=None, rich_ou
128
  BLUESH = (100, 100, 255)
129
  color = tuple((o*max(0, 1-prediction*2) + b*max(0, (2*prediction)**2-1) for o, b in zip(ORANGE, BLUESH)))
130
 
131
- return f"Predicted solubility change: {prediction:.3f} <span style='color: rgb{color}'>({assesment})</span>"
132
  else:
133
- return "Predicted solubility change: %g (%s)" % (prediction, assesment)
134
 
135
 
136
 
@@ -144,11 +157,10 @@ if __name__ == '__main__':
144
  args = argParser.parse_args()
145
 
146
  # positional arguments parsing (argparse can't cope with nested positional arguments)
147
- if len(args.loc) != len(args.mut):
148
- if len(args.mut) == 1: # same target AA on all specified positions
149
- args.mut = args.mut * len(args.loc)
150
- else:
151
- argParser.error("Inconsistent multi-point mutant specification")
152
  pdb_code, pdb_path = args.input
153
  chain = args.chain
154
 
 
53
  raise argparse.ArgumentTypeError(e)
54
  return pdb_code, pdb_path
55
 
56
+ def check_mutList(loc_list, orig_list, mut_list):
57
+ if len(loc_list) != len(mut_list):
58
+ if len(mut_list) == 1: # apply mutation to all positions
59
+ mut_list *= len(loc_list)
60
+ if len(loc_list) == len(mut_list) == len(orig_list):
61
+ return mut_list
62
+ else:
63
+ raise ValueError("Inconsistent multi-point mutant specification")
64
+
65
  # ----------------------------------------------END OF DATA TYPES ------------------------------------------
66
 
67
  argParser = argparse.ArgumentParser(add_help = True,
 
126
  from code.predictor import EnsemblePredictor # expensive import left for after the argument check
127
  pred_model = EnsemblePredictor(weights=weights, version=version)
128
 
129
+ try:
130
+ assesment, prediction = pred_model.predict_change(pdb_path, chain, orig, loc, mut)
131
+ except KeyError as e:
132
+ raise ValueError("Non-existing position in the PDB: %s" % str(e))
133
+
134
+ assesment = {'+': 'solubilizing ( > 0.5)', 'N': 'neutral (score ≈ 0.5)', '-': 'desolubilizing (score < 0.5)'}[assesment]
135
 
136
  print(weights)
137
 
 
141
  BLUESH = (100, 100, 255)
142
  color = tuple((o*max(0, 1-prediction*2) + b*max(0, (2*prediction)**2-1) for o, b in zip(ORANGE, BLUESH)))
143
 
144
+ return f"Predicted solubiliztation score: <span style='color: rgb{color}'>{prediction:.2f} {assesment}</span>"
145
  else:
146
+ return "Predicted solubiliztation score: %g (%s)" % (prediction, assesment)
147
 
148
 
149
 
 
157
  args = argParser.parse_args()
158
 
159
  # positional arguments parsing (argparse can't cope with nested positional arguments)
160
+ try:
161
+ args.mut = check_mutList(args.loc, args.orig, args.mut)
162
+ except Exception as e:
163
+ argParser.error(str(e))
 
164
  pdb_code, pdb_path = args.input
165
  chain = args.chain
166