wangjin2000 commited on
Commit
4f846da
·
verified ·
1 Parent(s): 5926c76

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -2
app.py CHANGED
@@ -159,6 +159,7 @@ def compute_pseudo_perplexity(model, tokenizer, protein_seq, binder_seq):
159
 
160
  return pseudo_perplexity
161
 
 
162
  def compute_plddt_iptm(protein_seq, binder_seq):
163
  start = time.time()
164
  # always the ESMFold model
@@ -193,6 +194,50 @@ def compute_plddt_iptm(protein_seq, binder_seq):
193
  #print(f'compute_plddt_iptm time: {elapsed:.4f} seconds')
194
 
195
  return avg_plddt, ptm
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
 
197
  def generate_peptide_for_single_sequence(model, tokenizer, protein_seq, peptide_length = 15, top_k = 3, num_binders = 5, plddt_iptm_yes="no"):
198
  start = time.time()
@@ -230,8 +275,9 @@ def generate_peptide_for_single_sequence(model, tokenizer, protein_seq, peptide_
230
  ppl = compute_pseudo_perplexity(model, tokenizer, protein_seq, generated_binder)
231
 
232
  # Get PLDDT from ESMFold model
233
- if plddt_iptm_yes=="yes":
234
- plddt, iptm = compute_plddt_iptm(protein_seq, generated_binder) #too time-consuming
 
235
  else:
236
  plddt, iptm = [0, 0]
237
 
 
159
 
160
  return pseudo_perplexity
161
 
162
+ # compute pLLDT and iPMT from ESMFOLD model directly, very slow
163
  def compute_plddt_iptm(protein_seq, binder_seq):
164
  start = time.time()
165
  # always the ESMFold model
 
194
  #print(f'compute_plddt_iptm time: {elapsed:.4f} seconds')
195
 
196
  return avg_plddt, ptm
197
+
198
+ # call API of ESMFOLD to get pLLDT
199
+ def get_plddt(protein_seq, binder_seq):
200
+ start = time.time()
201
+ sequence = protein_seq + binder_seq
202
+
203
+ retries = 0
204
+ pdb_str = None
205
+ url = "https://api.esmatlas.com/foldSequence/v1/pdb/"
206
+ while retries < 3 and pdb_str is None:
207
+ response = requests.post(url, data=sequence, verify=False)
208
+ pdb_str = response.text
209
+ if pdb_str == "INTERNAL SERVER ERROR":
210
+ retries += 1
211
+ time.sleep(0.1)
212
+ pdb = None #pdb = str = None
213
+
214
+ #save a pdb format file
215
+ name = sequence[:3] + sequence[-3:] #combine the firt and last 3 AAs of sequence as a filename.
216
+ outpath = (
217
+ Path.cwd() / f"PDB-{name}.pdb")
218
+ with open(outpath.name, "w") as f:
219
+ f.write(pdb_str)
220
+ outpath_str = str(outpath)
221
+
222
+ #get pdb column values
223
+ p = Bio.PDB.PDBParser()
224
+ structure = p.get_structure('myStructureName', pdb_filename)
225
+ ids = [a.get_id() for a in structure.get_atoms()]
226
+ pLDDTs = [a.get_bfactor() for a in structure.get_atoms()]
227
+
228
+ print("pLDDTs in line228:",pLDDTs)
229
+ print("pLDDTs shape:",plDDTs.shape())
230
+
231
+ avg_plddt = pLDDTs.mean()
232
+ #avg_plddt = plddt[0,:,1].mean().item()
233
+ #iPTM = ptm
234
+ #print("170: iPTM:",iPTM)
235
+ ptm = 0
236
+ end = time.time()
237
+ elapsed = end - start
238
+ print(f'get_plddt time: {elapsed:.4f} seconds')
239
+
240
+ return avg_plddt, ptm
241
 
242
  def generate_peptide_for_single_sequence(model, tokenizer, protein_seq, peptide_length = 15, top_k = 3, num_binders = 5, plddt_iptm_yes="no"):
243
  start = time.time()
 
275
  ppl = compute_pseudo_perplexity(model, tokenizer, protein_seq, generated_binder)
276
 
277
  # Get PLDDT from ESMFold model
278
+ if plddt_iptm_yes=="yes":
279
+ plddt, iptm = get_plddt(protein_seq, binder_seq)
280
+ #plddt, iptm = compute_plddt_iptm(protein_seq, generated_binder) #too time-consuming
281
  else:
282
  plddt, iptm = [0, 0]
283