wangjin2000 commited on
Commit
4a62eed
·
verified ·
1 Parent(s): 6a8b48e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -10
app.py CHANGED
@@ -238,7 +238,7 @@ def predict_peptide(base_model_path, finetuned_model_path, input_seqs, peptide_l
238
 
239
  return results_df, PPC
240
 
241
- def predict_peptide_from_file(base_model_path, finetuned_model_path, file_obj, peptide_length=15, num_binders=4, top_k=3):
242
  # Load the model
243
  loaded_model = AutoModelForMaskedLM.from_pretrained(finetuned_model_path)
244
 
@@ -253,17 +253,23 @@ def predict_peptide_from_file(base_model_path, finetuned_model_path, file_obj, p
253
  results = []
254
 
255
  for i, row in input.iterrows():
256
- seq = row['Receptor Sequence']
257
- binders = generate_peptide_for_single_sequence(loaded_model, tokenizer, seq, peptide_length, top_k, num_binders)
258
- results_idf = pd.DataFrame(binders, columns=['Binder', 'PPL', 'pLDDT', 'iPTM'])
259
- peptide_lp = results_idf['Binder'][results_idf['PPL'].idxmin()] #Choosing the one with the lowest perplexity
 
 
 
 
 
 
 
 
260
 
261
- #for binder, ppl, plddt, iptm in binders:
262
- results.append([seq, peptide_lp])
263
- print("263: results: ", results)
264
- #peptide_lp = results_i['Binder'][results_df['PPL'].idxmin()] #Choosing the one with the lowest perplexity
265
 
266
- results_df = pd.DataFrame(results, columns=['Input Sequence', 'Binder', 'PPL', 'pLDDT', 'iPTM'])
267
 
268
  timestamp = datetime.now().strftime('%Y-%m-%d_%H')
269
  outpath = (
 
238
 
239
  return results_df, PPC
240
 
241
+ def predict_peptide_from_file(base_model_path, finetuned_model_path, file_obj, max_peptide_length=15, num_binders=4, top_k=3):
242
  # Load the model
243
  loaded_model = AutoModelForMaskedLM.from_pretrained(finetuned_model_path)
244
 
 
253
  results = []
254
 
255
  for i, row in input.iterrows():
256
+ protein_seq = row['Receptor Sequence']
257
+ peptide_seq = row['Binder']
258
+ peptide_length = min([len(peptide_seq) max_peptide_length]) # use the same length of ground truth peptide length for prediction limited to max_peptide_length
259
+
260
+ #get metrics for ground truth peptide
261
+ ppl_value = compute_pseudo_perplexity(loaded_model, tokenizer, protein_seq, peptide_seq)
262
+ plddt_value, iPTM_value = compute_plddt_iptm(protein_seq, peptide_seq)
263
+
264
+ results.append([seq, binder, ppl, plddt, iptm, 1]) # flag 1 for ground truth peptide
265
+
266
+ #predict peptides
267
+ binders = generate_peptide_for_single_sequence(loaded_model, tokenizer, protein_seq, peptide_length, top_k, num_binders)
268
 
269
+ for binder, ppl, plddt, iptm in binders:
270
+ results.append([seq, binder, ppl, plddt, iptm, 0]) # flag 0 for generated peptide
 
 
271
 
272
+ results_df = pd.DataFrame(results, columns=['Input Sequence', 'Binder', 'PPL', 'pLDDT', 'iPTM', 'GT_Flag'])
273
 
274
  timestamp = datetime.now().strftime('%Y-%m-%d_%H')
275
  outpath = (