wangjin2000 commited on
Commit
53edd52
·
verified ·
1 Parent(s): d1f6cab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -1
app.py CHANGED
@@ -210,7 +210,39 @@ def predict_peptide(base_model_path, finetuned_model_path, input_seqs, peptide_l
210
  print("Protein+peptide:", PPC)
211
 
212
  return results_df, PPC
213
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
  def suggest(option):
215
  if option == "Protein:P63279":
216
  suggestion = "MSGIALSRLAQERKAWRKDHPFGFVAVPTKNPDGTMNLMNWECAIPGKKGTPWEGGLFKLRMLFKDDYPSSPPKCKFEPPLFHPNVYPSGTVCLSILEEDKDWRPAITIKQILLGIQELLNEPNIQDPAQAEAYTIYCQNRVEYEKRVRAQAKKFAPS"
 
210
  print("Protein+peptide:", PPC)
211
 
212
  return results_df, PPC
213
+
214
+ def predict_peptide_from_file(base_model_path, finetuned_model_path, input_seqs, peptide_length=15, top_k=3, num_binders=4):
215
+ # Load the model
216
+ loaded_model = AutoModelForMaskedLM.from_pretrained(finetuned_model_path)
217
+
218
+ # Ensure the model is in evaluation mode
219
+ loaded_model.eval()
220
+
221
+ # Tokenization
222
+ tokenizer = AutoTokenizer.from_pretrained(base_model_path)
223
+
224
+ if isinstance(input_seqs, str): # Single sequence
225
+ binders = generate_peptide_for_single_sequence(loaded_model, tokenizer, input_seqs, peptide_length, top_k, num_binders)
226
+ results_df = pd.DataFrame(binders, columns=['Binder', 'Pseudo Perplexity'])
227
+
228
+ elif isinstance(input_seqs, list): # List of sequences
229
+ results = []
230
+ for seq in input_seqs:
231
+ binders = generate_peptide_for_single_sequence(loaded_model, tokenizer, seq, peptide_length, top_k, num_binders)
232
+ for binder, ppl in binders:
233
+ results.append([seq, binder, ppl])
234
+ results_df = pd.DataFrame(results, columns=['Input Sequence', 'Binder', 'Pseudo Perplexity'])
235
+ print(results_df)
236
+
237
+ #combine target protein and predicted peptide with 20 G amino acids.
238
+ separator = 'G' * 20
239
+ peptide_lp = results_df['Binder'][results_df['Pseudo Perplexity'].idxmin()] #Choosing the one with the lowest perplexity
240
+ print("lowest perplesity:", peptide_lp)
241
+ PPC = input_seqs + separator + peptide_lp
242
+ print("Protein+peptide:", PPC)
243
+
244
+ return results_df, PPC
245
+
246
  def suggest(option):
247
  if option == "Protein:P63279":
248
  suggestion = "MSGIALSRLAQERKAWRKDHPFGFVAVPTKNPDGTMNLMNWECAIPGKKGTPWEGGLFKLRMLFKDDYPSSPPKCKFEPPLFHPNVYPSGTVCLSILEEDKDWRPAITIKQILLGIQELLNEPNIQDPAQAEAYTIYCQNRVEYEKRVRAQAKKFAPS"