Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -210,7 +210,39 @@ def predict_peptide(base_model_path, finetuned_model_path, input_seqs, peptide_l
|
|
| 210 |
print("Protein+peptide:", PPC)
|
| 211 |
|
| 212 |
return results_df, PPC
|
| 213 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
def suggest(option):
|
| 215 |
if option == "Protein:P63279":
|
| 216 |
suggestion = "MSGIALSRLAQERKAWRKDHPFGFVAVPTKNPDGTMNLMNWECAIPGKKGTPWEGGLFKLRMLFKDDYPSSPPKCKFEPPLFHPNVYPSGTVCLSILEEDKDWRPAITIKQILLGIQELLNEPNIQDPAQAEAYTIYCQNRVEYEKRVRAQAKKFAPS"
|
|
|
|
| 210 |
print("Protein+peptide:", PPC)
|
| 211 |
|
| 212 |
return results_df, PPC
|
| 213 |
+
|
| 214 |
+
def predict_peptide_from_file(base_model_path, finetuned_model_path, input_seqs, peptide_length=15, top_k=3, num_binders=4):
|
| 215 |
+
# Load the model
|
| 216 |
+
loaded_model = AutoModelForMaskedLM.from_pretrained(finetuned_model_path)
|
| 217 |
+
|
| 218 |
+
# Ensure the model is in evaluation mode
|
| 219 |
+
loaded_model.eval()
|
| 220 |
+
|
| 221 |
+
# Tokenization
|
| 222 |
+
tokenizer = AutoTokenizer.from_pretrained(base_model_path)
|
| 223 |
+
|
| 224 |
+
if isinstance(input_seqs, str): # Single sequence
|
| 225 |
+
binders = generate_peptide_for_single_sequence(loaded_model, tokenizer, input_seqs, peptide_length, top_k, num_binders)
|
| 226 |
+
results_df = pd.DataFrame(binders, columns=['Binder', 'Pseudo Perplexity'])
|
| 227 |
+
|
| 228 |
+
elif isinstance(input_seqs, list): # List of sequences
|
| 229 |
+
results = []
|
| 230 |
+
for seq in input_seqs:
|
| 231 |
+
binders = generate_peptide_for_single_sequence(loaded_model, tokenizer, seq, peptide_length, top_k, num_binders)
|
| 232 |
+
for binder, ppl in binders:
|
| 233 |
+
results.append([seq, binder, ppl])
|
| 234 |
+
results_df = pd.DataFrame(results, columns=['Input Sequence', 'Binder', 'Pseudo Perplexity'])
|
| 235 |
+
print(results_df)
|
| 236 |
+
|
| 237 |
+
#combine target protein and predicted peptide with 20 G amino acids.
|
| 238 |
+
separator = 'G' * 20
|
| 239 |
+
peptide_lp = results_df['Binder'][results_df['Pseudo Perplexity'].idxmin()] #Choosing the one with the lowest perplexity
|
| 240 |
+
print("lowest perplesity:", peptide_lp)
|
| 241 |
+
PPC = input_seqs + separator + peptide_lp
|
| 242 |
+
print("Protein+peptide:", PPC)
|
| 243 |
+
|
| 244 |
+
return results_df, PPC
|
| 245 |
+
|
| 246 |
def suggest(option):
|
| 247 |
if option == "Protein:P63279":
|
| 248 |
suggestion = "MSGIALSRLAQERKAWRKDHPFGFVAVPTKNPDGTMNLMNWECAIPGKKGTPWEGGLFKLRMLFKDDYPSSPPKCKFEPPLFHPNVYPSGTVCLSILEEDKDWRPAITIKQILLGIQELLNEPNIQDPAQAEAYTIYCQNRVEYEKRVRAQAKKFAPS"
|