Spaces:

wangjin2000
/

ESM2PPI

Paused

App Files Files Community

wangjin2000 commited on Oct 29, 2024

Commit

53edd52

verified ·

1 Parent(s): d1f6cab

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -1

app.py CHANGED Viewed

@@ -210,7 +210,39 @@ def predict_peptide(base_model_path, finetuned_model_path, input_seqs, peptide_l
     print("Protein+peptide:", PPC)
     return results_df, PPC
 def suggest(option):
     if option == "Protein:P63279":
         suggestion = "MSGIALSRLAQERKAWRKDHPFGFVAVPTKNPDGTMNLMNWECAIPGKKGTPWEGGLFKLRMLFKDDYPSSPPKCKFEPPLFHPNVYPSGTVCLSILEEDKDWRPAITIKQILLGIQELLNEPNIQDPAQAEAYTIYCQNRVEYEKRVRAQAKKFAPS"

     print("Protein+peptide:", PPC)
     return results_df, PPC
+def predict_peptide_from_file(base_model_path, finetuned_model_path, input_seqs, peptide_length=15, top_k=3, num_binders=4):
+    # Load the model
+    loaded_model = AutoModelForMaskedLM.from_pretrained(finetuned_model_path)
+    # Ensure the model is in evaluation mode
+    loaded_model.eval()
+    # Tokenization
+    tokenizer = AutoTokenizer.from_pretrained(base_model_path)
+    if isinstance(input_seqs, str):  # Single sequence
+        binders = generate_peptide_for_single_sequence(loaded_model, tokenizer, input_seqs, peptide_length, top_k, num_binders)
+        results_df = pd.DataFrame(binders, columns=['Binder', 'Pseudo Perplexity'])
+    elif isinstance(input_seqs, list):  # List of sequences
+        results = []
+        for seq in input_seqs:
+            binders = generate_peptide_for_single_sequence(loaded_model, tokenizer, seq, peptide_length, top_k, num_binders)
+            for binder, ppl in binders:
+                results.append([seq, binder, ppl])
+        results_df = pd.DataFrame(results, columns=['Input Sequence', 'Binder', 'Pseudo Perplexity'])
+    print(results_df)
+    #combine target protein and predicted peptide with 20 G amino acids.
+    separator = 'G' * 20
+    peptide_lp = results_df['Binder'][results_df['Pseudo Perplexity'].idxmin()] #Choosing the one with the lowest perplexity
+    print("lowest perplesity:", peptide_lp)
+    PPC = input_seqs + separator +  peptide_lp
+    print("Protein+peptide:", PPC)
+    return results_df, PPC
 def suggest(option):
     if option == "Protein:P63279":
         suggestion = "MSGIALSRLAQERKAWRKDHPFGFVAVPTKNPDGTMNLMNWECAIPGKKGTPWEGGLFKLRMLFKDDYPSSPPKCKFEPPLFHPNVYPSGTVCLSILEEDKDWRPAITIKQILLGIQELLNEPNIQDPAQAEAYTIYCQNRVEYEKRVRAQAKKFAPS"