Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -238,7 +238,7 @@ def predict_peptide(base_model_path, finetuned_model_path, input_seqs, peptide_l
|
|
| 238 |
|
| 239 |
return results_df, PPC
|
| 240 |
|
| 241 |
-
def predict_peptide_from_file(base_model_path, finetuned_model_path, file_obj,
|
| 242 |
# Load the model
|
| 243 |
loaded_model = AutoModelForMaskedLM.from_pretrained(finetuned_model_path)
|
| 244 |
|
|
@@ -253,17 +253,23 @@ def predict_peptide_from_file(base_model_path, finetuned_model_path, file_obj, p
|
|
| 253 |
results = []
|
| 254 |
|
| 255 |
for i, row in input.iterrows():
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 260 |
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
print("263: results: ", results)
|
| 264 |
-
#peptide_lp = results_i['Binder'][results_df['PPL'].idxmin()] #Choosing the one with the lowest perplexity
|
| 265 |
|
| 266 |
-
results_df = pd.DataFrame(results, columns=['Input Sequence', 'Binder', 'PPL', 'pLDDT', 'iPTM'])
|
| 267 |
|
| 268 |
timestamp = datetime.now().strftime('%Y-%m-%d_%H')
|
| 269 |
outpath = (
|
|
|
|
| 238 |
|
| 239 |
return results_df, PPC
|
| 240 |
|
| 241 |
+
def predict_peptide_from_file(base_model_path, finetuned_model_path, file_obj, max_peptide_length=15, num_binders=4, top_k=3):
|
| 242 |
# Load the model
|
| 243 |
loaded_model = AutoModelForMaskedLM.from_pretrained(finetuned_model_path)
|
| 244 |
|
|
|
|
| 253 |
results = []
|
| 254 |
|
| 255 |
for i, row in input.iterrows():
|
| 256 |
+
protein_seq = row['Receptor Sequence']
|
| 257 |
+
peptide_seq = row['Binder']
|
| 258 |
+
peptide_length = min([len(peptide_seq) max_peptide_length]) # use the same length of ground truth peptide length for prediction limited to max_peptide_length
|
| 259 |
+
|
| 260 |
+
#get metrics for ground truth peptide
|
| 261 |
+
ppl_value = compute_pseudo_perplexity(loaded_model, tokenizer, protein_seq, peptide_seq)
|
| 262 |
+
plddt_value, iPTM_value = compute_plddt_iptm(protein_seq, peptide_seq)
|
| 263 |
+
|
| 264 |
+
results.append([seq, binder, ppl, plddt, iptm, 1]) # flag 1 for ground truth peptide
|
| 265 |
+
|
| 266 |
+
#predict peptides
|
| 267 |
+
binders = generate_peptide_for_single_sequence(loaded_model, tokenizer, protein_seq, peptide_length, top_k, num_binders)
|
| 268 |
|
| 269 |
+
for binder, ppl, plddt, iptm in binders:
|
| 270 |
+
results.append([seq, binder, ppl, plddt, iptm, 0]) # flag 0 for generated peptide
|
|
|
|
|
|
|
| 271 |
|
| 272 |
+
results_df = pd.DataFrame(results, columns=['Input Sequence', 'Binder', 'PPL', 'pLDDT', 'iPTM', 'GT_Flag'])
|
| 273 |
|
| 274 |
timestamp = datetime.now().strftime('%Y-%m-%d_%H')
|
| 275 |
outpath = (
|