wnagleiofficial commited on
Commit
fe55e9c
·
1 Parent(s): 7bf7082
NeuroPredPLM/__pycache__/__init__.cpython-38.pyc ADDED
Binary file (162 Bytes). View file
 
NeuroPredPLM/__pycache__/model.cpython-38.pyc ADDED
Binary file (2.48 kB). View file
 
NeuroPredPLM/__pycache__/predict.cpython-38.pyc ADDED
Binary file (1.4 kB). View file
 
NeuroPredPLM/__pycache__/utils.cpython-38.pyc ADDED
Binary file (2.21 kB). View file
 
NeuroPredPLM/predict.py CHANGED
@@ -15,4 +15,17 @@ def predict(peptide_list, model_path, device='cpu'):
15
  att = att.cpu().numpy()
16
  out = {'Neuropeptide':pred[0][1], "Non-neuropeptide":pred[0][0]}
17
  return out
18
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  att = att.cpu().numpy()
16
  out = {'Neuropeptide':pred[0][1], "Non-neuropeptide":pred[0][0]}
17
  return out
18
+
19
+ def batch_predict(peptide_list, cutoff, model_path, device='cpu'):
20
+ with torch.no_grad():
21
+ neuroPred_model = EsmModel()
22
+ neuroPred_model.eval()
23
+ # state_dict = load_hub_workaround(MODEL_URL)
24
+ state_dict = torch.load(model_path, map_location="cpu")
25
+ neuroPred_model.load_state_dict(state_dict)
26
+ neuroPred_model = neuroPred_model.to(device)
27
+ prob, att = neuroPred_model(peptide_list, device)
28
+ pred = torch.softmax(prob, dim=-1).cpu().tolist()
29
+ att = att.cpu().numpy()
30
+ out = [[i[0], i[1], f"{j[1]:.3f}", 'Neuropeptide' if j[1] >cutoff else 'Non-neuropeptide'] for i, j in zip(peptide_list, pred)]
31
+ return out
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import torch
2
- from NeuroPredPLM.predict import predict
3
  import gradio as gr
4
  from io import StringIO
5
  from Bio import SeqIO
@@ -14,9 +14,64 @@ def classifier(peptide_seq):
14
  return neuropeptide_pred
15
  # {peptide_id:[Type:int(1->neuropeptide,0->non-neuropeptide), attention score:nd.array]}
16
 
17
- iface = gr.Interface(fn=classifier, inputs=gr.Textbox(
18
- label="Input peptide sequence",
19
- lines=3,
20
- value=">peptide-1\nIGLRLPNMLKF",
21
- ), outputs=gr.outputs.Label(num_top_classes=2), title="NeuroPred-PLM")
22
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import torch
2
+ from NeuroPredPLM.predict import predict, batch_predict
3
  import gradio as gr
4
  from io import StringIO
5
  from Bio import SeqIO
 
14
  return neuropeptide_pred
15
  # {peptide_id:[Type:int(1->neuropeptide,0->non-neuropeptide), attention score:nd.array]}
16
 
17
+
18
+ def batch_classifier(file, cutoff):
19
+ data = []
20
+ for record in SeqIO.parse(file.name, 'fasta'):
21
+ data.append((record.id, str(record.seq)))
22
+ device = "cuda" if torch.cuda.is_available() else "cpu"
23
+ neuropeptide_pred = batch_predict(data, cutoff, './model.pth', device)
24
+ return neuropeptide_pred
25
+
26
+ with gr.Blocks() as demo:
27
+ gr.Markdown(" ## NeuroPred-PLM")
28
+ gr.Markdown("In this work, we developed an interpretable and robust neuropeptide prediction model, named NeuroPred-PLM. First, we employed a language model (ESM) of proteins to obtain semantic representations of neuropeptides, which could reduce the complexity of feature engineering. Next, we adopted a multi-scale convolutional neural network to enhance the local feature representation of neuropeptide embeddings. To make the model interpretable, we proposed a global multi-head attention network that could be used to capture the position-wise contribution to neuropeptide prediction via the attention scores. In addition, NeuroPred-PLM was developed based on our newly constructed NeuroPep 2.0 database. Benchmarks based on the independent test set show that NeuroPred-PLM achieves superior predictive performance compared to other state-of-the-art predictors.")
29
+ with gr.Tab("Single Sequence Medel"):
30
+ # cutoff = gr.Slider(0, 1, step=0.1, value=0.5, interactive=True)
31
+ with gr.Row():
32
+ with gr.Column(scale=2):
33
+ text_input = gr.Textbox(
34
+ label="Input single peptide sequence in the Fasta format",
35
+ lines=4,
36
+ value=">peptide-1\nIGLRLPNMLKF",
37
+ )
38
+ gr.Markdown("#### The input peptide sequence length should be between 5-100")
39
+ single_cutoff = gr.Slider(0, 1, step=0.1, value=0.5, interactive=True, label="Threshold")
40
+ text_button = gr.Button("Submit")
41
+ with gr.Column(scale=2):
42
+ text_output = gr.outputs.Label(num_top_classes=2, label='Output')
43
+ with gr.Tab("Batch Model"):
44
+ with gr.Row():
45
+ with gr.Column(scale=2):
46
+ input_file_fasta = gr.File()
47
+ # cutoff = gr.Slider(0, 1, step=0.1, value=0.5, interactive=True, label="threshold")
48
+ # image_button = gr.Button("Submit")
49
+ with gr.Column(scale=2):
50
+ batch_cutoff = gr.Slider(0, 1, step=0.1, value=0.5, interactive=True, label="Threshold")
51
+ gr.Markdown("### Note")
52
+ gr.Markdown("- Limit the number of input sequences to less than 100")
53
+ gr.Markdown("- The file should be the Fasta format")
54
+ gr.Markdown("- The input peptide sequence length should be between 5-100")
55
+ image_button = gr.Button("Submit")
56
+ with gr.Column():
57
+ # gr.Markdown(" ### Flip text or image files using this demo.")
58
+ frame_output = gr.DataFrame(headers=["Sequence Id", "Sequence", "Probability of neuropeptides", "Neuropeptide"],
59
+ datatype=["str", "str", "str", 'str'],)
60
+
61
+
62
+
63
+ with gr.Accordion("Citation"):
64
+ gr.Markdown("- Wang, L., Huang, C., Wang, M., Xue, Z., & Wang, Y. (2022). NeuroPred-PLM: an interpretable and robust model for neuropeptide prediction by protein language model. In preparation.")
65
+ gr.Markdown("- GitHub: https://github.com/ISYSLAB-HUST/NeuroPred-PLM")
66
+
67
+ with gr.Accordion("License"):
68
+ gr.Markdown("- Released under the [MIT license](https://github.com/ISYSLAB-HUST/NeuroPred-PLM/blob/main/LICENSE). ")
69
+
70
+ with gr.Accordion("Contact"):
71
+ gr.Markdown("- If you have any questions, comments, or would like to report a bug, please file a Github issue or contact me at wanglei94@hust.edu.cn.")
72
+
73
+ text_button.click(classifier, inputs=text_input, outputs=text_output)
74
+ image_button.click(batch_classifier, inputs=[input_file_fasta, batch_cutoff], outputs=frame_output)
75
+
76
+ demo.queue(4)
77
+ demo.launch()
test.fa ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ >peptide_1
2
+ IGLRLPNMLKF
3
+ >peptide_2
4
+ QAAQFKVWSASELVD
5
+ >peptide_3
6
+ LRSPKMMHKSGCFGRRLDRIGSLSGLGCNVLRKY