Spaces:

isyslab
/

NeuroPred-PLM

Runtime error

App Files Files Community

wnagleiofficial commited on Sep 9, 2022

Commit

e39cbff

1 Parent(s): f556603

Add application file

Browse files

Files changed (9) hide show

NeuroPredPLM/__init__.py +0 -0
NeuroPredPLM/args.pt +3 -0
NeuroPredPLM/model.py +55 -0
NeuroPredPLM/predict.py +18 -0
NeuroPredPLM/utils.py +50 -0
README.md +1 -1
app.py +22 -0
model.pth +3 -0
requirements.txt +4 -0

NeuroPredPLM/__init__.py ADDED Viewed

File without changes

NeuroPredPLM/args.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:51bbe01a0f9d64a23fc40c16fae8454188cb5ff6e1b661114490ef7e90718df1
+size 4271

NeuroPredPLM/model.py ADDED Viewed

	@@ -0,0 +1,55 @@

+"""
+main model
+"""
+import torch
+from torch import nn
+import numpy as np
+import torch.nn.functional as F
+from einops import rearrange
+import os
+from .utils import length_to_mask, load_model_and_alphabet_core
+class EsmModel(nn.Module):
+    def __init__(self, hidden_size=64, num_labels=2, projection_size=24, head=12):
+        super().__init__()
+        basedir = os.path.abspath(os.path.dirname(__file__))
+        self.esm, self.alphabet = load_model_and_alphabet_core(os.path.join(basedir, 'args.pt'))
+        self.num_labels = num_labels
+        self.head = head
+        self.hidden_size = hidden_size
+        self.projection = nn.Linear(hidden_size, projection_size)
+        self.cov_1 = nn.Conv1d(projection_size, projection_size, kernel_size=3, padding='same')
+        self.cov_2 = nn.Conv1d(projection_size, int(projection_size/2), kernel_size=1, padding='same')
+        # self.gating = nn.Linear(projection_size, projection_size)
+        self.W = nn.Parameter(torch.randn((head, int(projection_size/2))))
+        # self.mu = nn.Parameter(torch.randn((1, 768)))
+        self.fcn = nn.Sequential(nn.Linear(int(projection_size/2)*head, int(projection_size/2)),
+                                nn.ReLU(), nn.Linear(int(projection_size/2), num_labels))
+    def forward(self, peptide_list, device='cpu'):
+        peptide_length = [len(i[1]) for i in peptide_list]
+        batch_converter = self.alphabet.get_batch_converter()
+        _, _, batch_tokens = batch_converter(peptide_list)
+        batch_tokens = batch_tokens.to(device)
+        protein_dict = self.esm(batch_tokens, repr_layers=[12], return_contacts=False)
+        protein_embeddings = protein_dict["representations"][12][:, 1:, :]
+        protein_embed = rearrange(protein_embeddings, 'b l (h d)-> (b h) l d', h=self.head)
+        representations = self.projection(protein_embed)
+        representations = rearrange(representations, 'b l d -> b d l')
+        representation_cov = F.relu(self.cov_1(representations))
+        representation_cov =  F.relu(self.cov_2(representation_cov))
+        representations = rearrange(representation_cov, '(b h) d l -> b h l d', h=self.head)
+        att = torch.einsum('bhld,hd->bhl', representations, self.W)
+        mask = length_to_mask(torch.tensor(peptide_length)).to(device)
+        att = att.masked_fill(mask.unsqueeze(1)==0, -np.inf)
+        att= F.softmax(att, dim=-1)
+        # print(att)
+        representations = rearrange(representations * att.unsqueeze(-1), 'b h l d -> b l (h d)')
+        representations = torch.sum(representations, dim=1)
+        return self.fcn(representations), att

NeuroPredPLM/predict.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from .model import EsmModel
+from .utils import load_hub_workaround
+import torch
+def predict(peptide_list, model_path, device='cpu'):
+    with torch.no_grad():
+        neuroPred_model = EsmModel()
+        neuroPred_model.eval()
+        # state_dict = load_hub_workaround(MODEL_URL)
+        state_dict = torch.load(model_path, map_location="cpu")
+        neuroPred_model.load_state_dict(state_dict)
+        neuroPred_model = neuroPred_model.to(device)
+        prob, att = neuroPred_model(peptide_list, device)
+        pred = torch.softmax(prob, dim=-1).cpu().tolist()
+        att = att.cpu().numpy()
+        out = {'Neuroppetide':pred[0][1], "Non-neuropeptide":pred[0][0]}
+    return out

NeuroPredPLM/utils.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import torch
+import esm
+from argparse import Namespace
+import pathlib
+import urllib
+def length_to_mask(length, max_len=None, dtype=None):
+    """length: B.
+    return B x max_len.
+    If max_len is None, then max of length will be used.
+    """
+    assert len(length.shape) == 1, 'Length shape should be 1 dimensional.'
+    max_len = max_len or length.max().item()
+    mask = torch.arange(max_len, device=length.device,
+                        dtype=length.dtype).expand(len(length), max_len) < length.unsqueeze(1)
+    if dtype is not None:
+        mask = torch.as_tensor(mask, dtype=dtype, device=length.device)
+    return mask
+def load_model_and_alphabet_core(args_dict, regression_data=None):
+    args_dict = torch.load(args_dict)
+    alphabet = esm.Alphabet.from_architecture(args_dict["args"].arch)
+    # upgrade state dict
+    pra = lambda s: "".join(s.split("decoder_")[1:] if "decoder" in s else s)
+    prs = lambda s: "".join(s.split("decoder.")[1:] if "decoder" in s else s)
+    model_args = {pra(arg[0]): arg[1] for arg in vars(args_dict["args"]).items()}
+    model_type = esm.ProteinBertModel
+    model = model_type(
+        Namespace(**model_args),
+        alphabet,
+    )
+    return model, alphabet
+def load_hub_workaround(url):
+    try:
+        data = torch.hub.load_state_dict_from_url(url, progress=False, map_location="cpu")
+    except RuntimeError:
+        # Pytorch version issue - see https://github.com/pytorch/pytorch/issues/43106
+        fn = pathlib.Path(url).name
+        data = torch.load(
+            f"{torch.hub.get_dir()}/checkpoints/{fn}",
+            map_location="cpu",
+        )
+    except urllib.error.HTTPError as e:
+        raise Exception(f"Could not load {url}, check your network!")
+    return data

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: NeuroPred PLM
 emoji: 😻
 colorFrom: yellow
 colorTo: pink

 ---
+title: NeuroPred-PLM
 emoji: 😻
 colorFrom: yellow
 colorTo: pink

app.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import torch
+from NeuroPredPLM.predict import predict
+import gradio as gr
+from io import StringIO
+from Bio import SeqIO
+def classifier(peptide_seq):
+    handle = StringIO(peptide_seq)
+    data = []
+    for record in SeqIO.parse(handle, 'fasta'):
+        data.append((record.id, record.seq))
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    neuropeptide_pred = predict(data,device)
+    return neuropeptide_pred
+# {peptide_id:[Type:int(1->neuropeptide,0->non-neuropeptide), attention score:nd.array]}
+iface = gr.Interface(fn=classifier, inputs=gr.Textbox(
+            label="Input peptide sequence",
+            lines=3,
+            value=">peptide-1\nIGLRLPNMLKF",
+        ), outputs=gr.outputs.Label(num_top_classes=2), title="NeuroPred-PLM")
+iface.launch()

model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:154841aade40ce25f75ee9028046b361001c90de1cd2c6fd09ead97de076de8a
+size 340609839

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+torch
+einops
+numpy
+biopython