Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -228,43 +228,22 @@ def predict_peptide_from_file(base_model_path, finetuned_model_path, file_obj, p
|
|
| 228 |
|
| 229 |
for i, row in input.iterrows():
|
| 230 |
seq = row['Receptor Sequence']
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
binders = generate_peptide_for_single_sequence(loaded_model, tokenizer, seq, peptide_length, top_k, num_binders)
|
| 235 |
-
for binder, ppl in binders:
|
| 236 |
-
results.append([seq, binder, ppl])
|
| 237 |
-
results_df = pd.DataFrame(results, columns=['Input Sequence', 'Binder', 'Pseudo Perplexity'])
|
| 238 |
-
|
| 239 |
-
eval_dataset = ProteinDataset(file_obj, tokenizer, peptide_length)
|
| 240 |
-
print("eval_dataset_input_ids",eval_dataset[2]['input_ids'])
|
| 241 |
-
|
| 242 |
-
#input_seqs = eval_dataset["input_ids"]
|
| 243 |
-
#print("line 228 - input_seqs:",input_seqs)
|
| 244 |
|
| 245 |
-
|
| 246 |
-
binders = generate_peptide_for_single_sequence(loaded_model, tokenizer, input_seqs, peptide_length, top_k, num_binders)
|
| 247 |
-
results_df = pd.DataFrame(binders, columns=['Binder', 'Pseudo Perplexity'])
|
| 248 |
-
|
| 249 |
-
elif isinstance(input_seqs, list): # List of sequences
|
| 250 |
-
results = []
|
| 251 |
-
for seq in input_seqs:
|
| 252 |
-
binders = generate_peptide_for_single_sequence(loaded_model, tokenizer, seq, peptide_length, top_k, num_binders)
|
| 253 |
-
for binder, ppl in binders:
|
| 254 |
-
results.append([seq, binder, ppl])
|
| 255 |
-
results_df = pd.DataFrame(results, columns=['Input Sequence', 'Binder', 'Pseudo Perplexity'])
|
| 256 |
print(results_df)
|
| 257 |
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
print("lowest perplesity:", peptide_lp)
|
| 262 |
-
PPC = input_seqs + separator + peptide_lp
|
| 263 |
-
print("Protein+peptide:", PPC)
|
| 264 |
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
|
|
|
|
|
|
| 268 |
|
| 269 |
def suggest(option):
|
| 270 |
if option == "Protein:P63279":
|
|
|
|
| 228 |
|
| 229 |
for i, row in input.iterrows():
|
| 230 |
seq = row['Receptor Sequence']
|
| 231 |
+
binders = generate_peptide_for_single_sequence(loaded_model, tokenizer, seq, peptide_length, top_k, num_binders)
|
| 232 |
+
for binder, ppl in binders:
|
| 233 |
+
results.append([seq, binder, ppl])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 234 |
|
| 235 |
+
results_df = pd.DataFrame(results, columns=['Input Sequence', 'Binder', 'Pseudo Perplexity'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 236 |
print(results_df)
|
| 237 |
|
| 238 |
+
outpath = (
|
| 239 |
+
Path.cwd() / "predicted_peptides.csv"
|
| 240 |
+
)
|
|
|
|
|
|
|
|
|
|
| 241 |
|
| 242 |
+
logging.info(f"Saving predicted_seq_file to:\t{outpath}")
|
| 243 |
+
|
| 244 |
+
results_df.to_csv(outpath,header=True, index=False)
|
| 245 |
+
|
| 246 |
+
return output, outpath
|
| 247 |
|
| 248 |
def suggest(option):
|
| 249 |
if option == "Protein:P63279":
|