wangjin2000 commited on
Commit
fb8afd6
·
verified ·
1 Parent(s): 7753a80

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -33
app.py CHANGED
@@ -228,43 +228,22 @@ def predict_peptide_from_file(base_model_path, finetuned_model_path, file_obj, p
228
 
229
  for i, row in input.iterrows():
230
  seq = row['Receptor Sequence']
231
- print("231, seq:", seq)
232
- results.append([seq])
233
- '''
234
- binders = generate_peptide_for_single_sequence(loaded_model, tokenizer, seq, peptide_length, top_k, num_binders)
235
- for binder, ppl in binders:
236
- results.append([seq, binder, ppl])
237
- results_df = pd.DataFrame(results, columns=['Input Sequence', 'Binder', 'Pseudo Perplexity'])
238
-
239
- eval_dataset = ProteinDataset(file_obj, tokenizer, peptide_length)
240
- print("eval_dataset_input_ids",eval_dataset[2]['input_ids'])
241
-
242
- #input_seqs = eval_dataset["input_ids"]
243
- #print("line 228 - input_seqs:",input_seqs)
244
 
245
- if isinstance(input_seqs, str): # Single sequence
246
- binders = generate_peptide_for_single_sequence(loaded_model, tokenizer, input_seqs, peptide_length, top_k, num_binders)
247
- results_df = pd.DataFrame(binders, columns=['Binder', 'Pseudo Perplexity'])
248
-
249
- elif isinstance(input_seqs, list): # List of sequences
250
- results = []
251
- for seq in input_seqs:
252
- binders = generate_peptide_for_single_sequence(loaded_model, tokenizer, seq, peptide_length, top_k, num_binders)
253
- for binder, ppl in binders:
254
- results.append([seq, binder, ppl])
255
- results_df = pd.DataFrame(results, columns=['Input Sequence', 'Binder', 'Pseudo Perplexity'])
256
  print(results_df)
257
 
258
- #combine target protein and predicted peptide with 20 G amino acids.
259
- separator = 'G' * 20
260
- peptide_lp = results_df['Binder'][results_df['Pseudo Perplexity'].idxmin()] #Choosing the one with the lowest perplexity
261
- print("lowest perplesity:", peptide_lp)
262
- PPC = input_seqs + separator + peptide_lp
263
- print("Protein+peptide:", PPC)
264
 
265
- return results_df, PPC
266
- '''
267
- return results, file_obj
 
 
268
 
269
  def suggest(option):
270
  if option == "Protein:P63279":
 
228
 
229
  for i, row in input.iterrows():
230
  seq = row['Receptor Sequence']
231
+ binders = generate_peptide_for_single_sequence(loaded_model, tokenizer, seq, peptide_length, top_k, num_binders)
232
+ for binder, ppl in binders:
233
+ results.append([seq, binder, ppl])
 
 
 
 
 
 
 
 
 
 
234
 
235
+ results_df = pd.DataFrame(results, columns=['Input Sequence', 'Binder', 'Pseudo Perplexity'])
 
 
 
 
 
 
 
 
 
 
236
  print(results_df)
237
 
238
+ outpath = (
239
+ Path.cwd() / "predicted_peptides.csv"
240
+ )
 
 
 
241
 
242
+ logging.info(f"Saving predicted_seq_file to:\t{outpath}")
243
+
244
+ results_df.to_csv(outpath,header=True, index=False)
245
+
246
+ return output, outpath
247
 
248
  def suggest(option):
249
  if option == "Protein:P63279":