xiaoleon commited on
Commit
46b9840
·
1 Parent(s): 8a12c65

initial submission

Browse files
DeepPD/BERT/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"hidden_size": 128, "hidden_act": "gelu", "initializer_range": 0.02, "vocab_size": 30522, "hidden_dropout_prob": 0.1, "num_attention_heads": 2, "type_vocab_size": 2, "max_position_embeddings": 512, "num_hidden_layers": 2, "intermediate_size": 512, "attention_probs_dropout_prob": 0.1}
DeepPD/BERT/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e28abb3688c8927a0dc41d37b6b9d6e30c6c7419e5311d55ce30ed55843da91
3
+ size 17755352
DeepPD/BERT/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
DeepPD/ESM2/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/tmp/facebook/esm2_t12_35M_UR50D",
3
+ "architectures": [
4
+ "EsmForMaskedLM"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "classifier_dropout": null,
8
+ "emb_layer_norm_before": false,
9
+ "esmfold_config": null,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.0,
12
+ "hidden_size": 480,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 1920,
15
+ "is_folding_model": false,
16
+ "layer_norm_eps": 1e-05,
17
+ "mask_token_id": 32,
18
+ "max_position_embeddings": 1026,
19
+ "model_type": "esm",
20
+ "num_attention_heads": 20,
21
+ "num_hidden_layers": 12,
22
+ "pad_token_id": 1,
23
+ "position_embedding_type": "rotary",
24
+ "token_dropout": true,
25
+ "torch_dtype": "float32",
26
+ "transformers_version": "4.25.0.dev0",
27
+ "use_cache": true,
28
+ "vocab_list": null,
29
+ "vocab_size": 33
30
+ }
DeepPD/ESM2/esm2_t12_35M_UR50D-contact-regression.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16641e05d830d0ce863dd152dbb8c2f3ddfa3c3ec2a66080152c8abad01d8585
3
+ size 1959
DeepPD/ESM2/esm2_t12_35M_UR50D.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f21e80e61d16a71735163ef555d3009afb0c98da74c48e29df08606973cc55e
3
+ size 134095705
DeepPD/ESM2/model_index.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "StableDiffusionPipeline",
3
+ "_diffusers_version": "0.8.0",
4
+ "feature_extractor": [
5
+ "transformers",
6
+ "CLIPFeatureExtractor"
7
+ ],
8
+ "safety_checker": [
9
+ null,
10
+ null
11
+ ],
12
+ "scheduler": [
13
+ "diffusers",
14
+ "DDIMScheduler"
15
+ ],
16
+ "text_encoder": [
17
+ "transformers",
18
+ "CLIPTextModel"
19
+ ],
20
+ "tokenizer": [
21
+ "transformers",
22
+ "CLIPTokenizer"
23
+ ],
24
+ "unet": [
25
+ "diffusers",
26
+ "UNet2DConditionModel"
27
+ ],
28
+ "vae": [
29
+ "diffusers",
30
+ "AutoencoderKL"
31
+ ],
32
+ "requires_safety_checker": false
33
+ }
DeepPD/ESM2/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "<cls>",
3
+ "eos_token": "<eos>",
4
+ "mask_token": "<mask>",
5
+ "pad_token": "<pad>",
6
+ "unk_token": "<unk>"
7
+ }
DeepPD/ESM2/tokenizer_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "model_max_length": 1000000000000000019884624838656,
3
+ "tokenizer_class": "EsmTokenizer"
4
+ }
DeepPD/ESM2/vocab.txt ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <cls>
2
+ <pad>
3
+ <eos>
4
+ <unk>
5
+ L
6
+ A
7
+ G
8
+ V
9
+ S
10
+ E
11
+ R
12
+ T
13
+ I
14
+ D
15
+ P
16
+ K
17
+ Q
18
+ N
19
+ F
20
+ Y
21
+ M
22
+ H
23
+ W
24
+ C
25
+ X
26
+ B
27
+ U
28
+ Z
29
+ O
30
+ .
31
+ -
32
+ <null_1>
33
+ <mask>
DeepPD/__pycache__/config.cpython-38.pyc ADDED
Binary file (1.03 kB). View file
 
DeepPD/__pycache__/data_helper.cpython-38.pyc ADDED
Binary file (6.37 kB). View file
 
DeepPD/__pycache__/model.cpython-38.pyc ADDED
Binary file (5.86 kB). View file
 
DeepPD/__pycache__/predictor.cpython-38.pyc ADDED
Binary file (1.17 kB). View file
 
DeepPD/__pycache__/utils.cpython-38.pyc ADDED
Binary file (2.28 kB). View file
 
DeepPD/__pycache__/utils_etfc.cpython-38.pyc ADDED
Binary file (10.7 kB). View file
 
DeepPD/config.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+
4
+ class ArgsConfig:
5
+ def __init__(self) -> None:
6
+ self.batch_size = 192
7
+ self.embedding_size = 480
8
+ self.epochs = 50
9
+ self.kflod = 5
10
+ self.max_len = 40
11
+ self.lr = 1.5e-3
12
+ self.weight_decay = 0
13
+ self.is_autocast = False
14
+ self.info_bottleneck = False
15
+ self.dropout = 0.6
16
+ self.IB_beta = 1e-3
17
+ self.model_name = 'DeepPD_C' #
18
+ self.exp_nums = 0.0
19
+ self.aa_dict = 'esm' # 'protbert' /'esm'/ None
20
+ self.info = f"" #对当前训练做的补充说明
21
+
22
+ # self.data_c_dir = './data/GPMDB_Homo_sapiens_20190115/sorted_GPMDB_Homo_0.025_0.9.csv'
23
+ # self.data_c1_dir = './data/GPMDB_Homo_sapiens_20190115/sorted_GPMDB_Homo_0.025.csv'
24
+ # self.data_homo_dir = './data/PepFormer/Homo_0.9.csv'
25
+ # self.data_mus_dir = './data/PepFormer/Mus_0.9.csv'
26
+
27
+ # self.log_dir = './result/logs'
28
+ # self.save_dir = './result/model_para'
29
+ # self.tensorboard_log_dir = './tensorboard'
30
+ self.ems_path = './DeepPD/ESM2/esm2_t12_35M_UR50D.pt'
31
+ self.esm_layer_idx = 12
32
+ # self.save_para_dir = os.path.join(self.save_dir,self.model_name)
33
+ self.random_seed = 2023
34
+ self.num_classes = 21
35
+ self.split_size = 0.8
36
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
37
+
DeepPD/data_helper.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import torch.nn.utils.rnn as rnn_utils
4
+
5
+ def Data2EqlTensor(lines,max_len):
6
+ # aa_dict = {'A':1,'C':2,'D':3,'E':4,'F':5,'G':6,'H':7,'I':8,'K':9,'L':10,'M':11,'N':12,'P':13,'Q':14,'R':15,'S':16,'T':17,'V':18,'W':19,'Y':20,'U':0,'X':0}
7
+ aa_dict = {'[PAD]': 1, 'L': 4, 'A': 5, 'G': 6, 'V': 7, 'S': 8, 'E': 9, 'R': 10, 'T': 11, 'I': 12, 'D': 13, 'P': 14, 'K': 15, 'Q': 16,
8
+ 'N': 17, 'F': 18, 'Y': 19, 'M': 20, 'H': 21, 'W': 22, 'C': 23, 'X': 1, 'B': 1, 'U': 1, 'Z': 1, 'O': 1}
9
+
10
+ padding_key = '[PAD]'
11
+ default_padding_value = 1
12
+ if padding_key in aa_dict:
13
+ dict_padding_value = aa_dict.get('[PAD]')
14
+ else:
15
+ dict_padding_value = default_padding_value
16
+ print(f"No padding value in the implicit dictionary, set to {default_padding_value} by default")
17
+
18
+ print('default_padding_value:',default_padding_value)
19
+
20
+ long_pep_counter=0
21
+ pep_codes=[]
22
+ ids = []
23
+ pad_flag = 1
24
+ for id,pep in lines:
25
+ ids.append(id)
26
+ x = len(pep)
27
+ # 将第一个长度<max_len的序列填充到40,确保当输入序列均<max_len时,所有序列仍然能够填充到max_len
28
+
29
+ if x < max_len:
30
+ current_pep=[]
31
+ for aa in pep:
32
+ current_pep.append(aa_dict[aa])
33
+ if pad_flag:
34
+ current_pep.extend([dict_padding_value] * (max_len - len(current_pep)))
35
+ pad_flag = 0
36
+
37
+ pep_codes.append(torch.tensor(current_pep)) #torch.tensor(current_pep)
38
+ else:
39
+ pep_head = pep[0:int(max_len/2)]
40
+ pep_tail = pep[int(x-int(max_len/2)):int(x)]
41
+ new_pep = pep_head+pep_tail
42
+ current_pep=[]
43
+ for aa in new_pep:
44
+ current_pep.append(aa_dict[aa])
45
+ pep_codes.append(torch.tensor(current_pep))
46
+ long_pep_counter += 1
47
+
48
+ print("length>"+str(max_len)+':',long_pep_counter)
49
+ data = rnn_utils.pad_sequence(pep_codes,batch_first=True,padding_value=dict_padding_value)
50
+
51
+ return data,ids
52
+
53
+ def Seqs2EqlTensor(file_path:str,max_len:int,AminoAcid_vocab=None):
54
+ '''
55
+ Args:
56
+ flie:文件路径 \n
57
+ max_len:设定转换后的氨基酸序列最大长度 \n
58
+ vocab_dict:esm / protbert / default ,默认为按顺序映射的词典
59
+ '''
60
+
61
+ # 只保留20种氨基酸和填充数,其余几种非常规氨基酸均用填充数代替
62
+ # 使用 esm和portbert字典时,nn.embedding()的vocab_size = 25
63
+ if AminoAcid_vocab =='esm':
64
+ aa_dict = {'[PAD]': 1, 'L': 4, 'A': 5, 'G': 6, 'V': 7, 'S': 8, 'E': 9, 'R': 10, 'T': 11, 'I': 12, 'D': 13, 'P': 14, 'K': 15, 'Q': 16,
65
+ 'N': 17, 'F': 18, 'Y': 19, 'M': 20, 'H': 21, 'W': 22, 'C': 23, 'X': 1, 'B': 1, 'U': 1, 'Z': 1, 'O': 1}
66
+ elif AminoAcid_vocab == 'protbert':
67
+ aa_dict = {'[PAD]':0,'L': 5, 'A': 6, 'G': 7, 'V': 8, 'E': 9, 'S': 10, 'I': 11, 'K': 12, 'R': 13, 'D': 14, 'T': 15,
68
+ 'P': 16, 'N': 17, 'Q': 18, 'F': 19, 'Y': 20, 'M': 21, 'H': 22, 'C': 23, 'W': 24, 'X': 0, 'U': 0, 'B': 0, 'Z': 0, 'O': 0}
69
+ else:
70
+ aa_dict = {'[PAD]':0,'A':1,'C':2,'D':3,'E':4,'F':5,'G':6,'H':7,'I':8,'K':9,'L':10,'M':11,'N':12,'P':13,'Q':14,'R':15,
71
+ 'S':16,'T':17,'V':18,'W':19,'Y':20,'U':0,'X':0,'J':0}
72
+ # aa_dict = {'A':1,'C':2,'D':3,'E':4,'F':5,'G':6,'H':7,'I':8,'K':9,'L':10,'M':11,'N':12,'P':13,'Q':14,'R':15,'S':16,'T':17,'V':18,'W':19,'Y':20,'U':0,'X':0}
73
+ ## Esm vocab
74
+ ## protbert vocab
75
+
76
+ padding_key = '[PAD]'
77
+ default_padding_value = 0
78
+ if padding_key in aa_dict:
79
+ dict_padding_value = aa_dict.get('[PAD]')
80
+ else:
81
+ dict_padding_value = default_padding_value
82
+ print(f"No padding value in the implicit dictionary, set to {default_padding_value} by default")
83
+
84
+ with open(file_path, 'r') as inf:
85
+ lines = inf.read().splitlines()
86
+ # assert len(lines) % 2 == 0, "Invalid file format. Number of lines should be even."
87
+
88
+ long_pep_counter=0
89
+ pep_codes=[]
90
+ labels=[]
91
+ pos_count = 0
92
+ neg_count = 0
93
+ for line in lines:
94
+ pep,label = line.split(",")
95
+ labels.append(int(label))
96
+ if int(label) == int(1):
97
+ pos_count+=1
98
+ else:
99
+ neg_count+=1
100
+
101
+ seq_len = len(pep)
102
+ if seq_len <= max_len:
103
+ current_pep=[]
104
+ for aa in pep:
105
+ if aa.upper() in aa_dict.keys():
106
+ current_pep.append(aa_dict[aa.upper()])
107
+ pep_codes.append(torch.tensor(current_pep)) #torch.tensor(current_pep)
108
+ else:
109
+ pep_head = pep[0:int(max_len/2)]
110
+ pep_tail = pep[int(seq_len-int(max_len/2)):int(seq_len)]
111
+ new_pep = pep_head+pep_tail
112
+ current_pep=[]
113
+ for aa in new_pep:
114
+ current_pep.append(aa_dict[aa])
115
+ pep_codes.append(torch.tensor(current_pep))
116
+ long_pep_counter += 1
117
+
118
+ print("length > {}:{},postive sample:{},negative sample:{}".format(max_len,long_pep_counter,pos_count,neg_count))
119
+ data = rnn_utils.pad_sequence(pep_codes,batch_first=True,padding_value=dict_padding_value)
120
+ return data,torch.tensor(labels)
121
+
122
+ def Numseq2OneHot(numseq):
123
+ OneHot = []
124
+ for seq in numseq:
125
+ len_seq = len(seq)
126
+ seq = seq.cpu().numpy()
127
+ x = torch.zeros(len_seq,20)
128
+ for i in range(len_seq):
129
+ x[i][seq[i]-1] = 1
130
+ OneHot.append(np.array(x))
131
+
132
+ return torch.tensor(np.array(OneHot))
133
+
134
+ def index_alignment(batch,condition_num=0,subtraction_num1=4,subtraction_num2=1):
135
+ '''将其他蛋白质语言模型的字典索引和默认字典索引进行对齐,保持氨基酸索引只有20个数构成,且范围在[1,20],[PAD]=0或者1 \n
136
+ "esm"模型,condition_num=1,subtraction_num1=3,subtraction_num2=1; \n
137
+ "protbert"模型,condition_num=0,subtraction_num1=4
138
+
139
+ Args:
140
+ batch:形状为[batch_size,seq_len]的二维张量 \n
141
+ condition_num:字典中的[PAD]值 \n
142
+ subtraction_num1:对齐非[PAD]元素所需减掉的差值 \n
143
+ subtraction_num2:对齐[PAD]元素所需减掉的差值
144
+
145
+ return:
146
+ shape:[batch_size,seq_len],dtype=tensor.
147
+ '''
148
+ condition = batch == condition_num
149
+ # 创建一个张量,形状和batch相同,表示非[PAD]元素要减去的值
150
+ subtraction = torch.full_like(batch, subtraction_num1)
151
+ if condition_num==0:
152
+ # 使用torch.where()函数来选择batch中为0的元素或者batch减去subtraction中的元素
153
+ output = torch.where(condition, batch, batch - subtraction)
154
+ elif condition_num==1:
155
+ # 创建一个张量,形状和batch相同,表示[PAD]元素要减去的值
156
+ subtraction_2 = torch.full_like(batch, subtraction_num2)
157
+ output = torch.where(condition, batch-subtraction_2, batch - subtraction)
158
+
159
+ return output
160
+
161
+ blosum62 = {
162
+ '1': [4, -1, -2, -2, 0, -1, -1, 0, -2, -1, -1, -1, -1, -2, -1, 1, 0, -3, -2, 0], # A
163
+ '15': [-1, 5, 0, -2, -3, 1, 0, -2, 0, -3, -2, 2, -1, -3, -2, -1, -1, -3, -2, -3], # R
164
+ '12': [-2, 0, 6, 1, -3, 0, 0, 0, 1, -3, -3, 0, -2, -3, -2, 1, 0, -4, -2, -3], # N
165
+ '3': [-2, -2, 1, 6, -3, 0, 2, -1, -1, -3, -4, -1, -3, -3, -1, 0, -1, -4, -3, -3], # D
166
+ '2': [0, -3, -3, -3, 9, -3, -4, -3, -3, -1, -1, -3, -1, -2, -3, -1, -1, -2, -2, -1], # C
167
+ '14': [-1, 1, 0, 0, -3, 5, 2, -2, 0, -3, -2, 1, 0, -3, -1, 0, -1, -2, -1, -2], # Q
168
+ '4': [-1, 0, 0, 2, -4, 2, 5, -2, 0, -3, -3, 1, -2, -3, -1, 0, -1, -3, -2, -2], # E
169
+ '6': [0, -2, 0, -1, -3, -2, -2, 6, -2, -4, -4, -2, -3, -3, -2, 0, -2, -2, -3, -3], # G
170
+ '7': [-2, 0, 1, -1, -3, 0, 0, -2, 8, -3, -3, -1, -2, -1, -2, -1, -2, -2, 2, -3], # H
171
+ '8': [-1, -3, -3, -3, -1, -3, -3, -4, -3, 4, 2, -3, 1, 0, -3, -2, -1, -3, -1, 3], # I
172
+ '10': [-1, -2, -3, -4, -1, -2, -3, -4, -3, 2, 4, -2, 2, 0, -3, -2, -1, -2, -1, 1], # L
173
+ '9': [-1, 2, 0, -1, -3, 1, 1, -2, -1, -3, -2, 5, -1, -3, -1, 0, -1, -3, -2, -2], # K
174
+ '11': [-1, -1, -2, -3, -1, 0, -2, -3, -2, 1, 2, -1, 5, 0, -2, -1, -1, -1, -1, 1], # M
175
+ '5': [-2, -3, -3, -3, -2, -3, -3, -3, -1, 0, 0, -3, 0, 6, -4, -2, -2, 1, 3, -1], # F
176
+ '13': [-1, -2, -2, -1, -3, -1, -1, -2, -2, -3, -3, -1, -2, -4, 7, -1, -1, -4, -3, -2], # P
177
+ '16': [1, -1, 1, 0, -1, 0, 0, 0, -1, -2, -2, 0, -1, -2, -1, 4, 1, -3, -2, -2], # S
178
+ '17': [0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1, 1, 5, -2, -2, 0], # T
179
+ '19': [-3, -3, -4, -4, -2, -2, -3, -2, -2, -3, -2, -3, -1, 1, -4, -3, -2, 11, 2, -3], # W
180
+ '20': [-2, -2, -2, -3, -2, -1, -2, -3, 2, -1, -1, -2, -1, 3, -3, -2, -2, 2, 7, -1], # Y
181
+ '18': [0, -3, -3, -3, -1, -2, -2, -3, -3, 3, 1, -2, 1, -1, -2, -2, 0, -3, -1, 4], # V
182
+ '0': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], # -
183
+ }
184
+
185
+ def get_blosum62(seq):
186
+ # 使用列表推导式和字典get方法代替循环
187
+ seq = seq.tolist()
188
+ seq2b62 = np.array([blosum62.get(str(i)) for i in seq])
189
+ return seq2b62
190
+
191
+ def seqs2blosum62(sequences):
192
+
193
+ evolution = np.array([get_blosum62(seq) for seq in sequences],dtype=float)
194
+
195
+ return torch.from_numpy(evolution)
DeepPD/model.py ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+ from DeepPD.utils import CBAMBlock,Res_Net
5
+ from DeepPD.data_helper import Numseq2OneHot
6
+ from transformers import BertModel
7
+
8
+ bert_wight = BertModel.from_pretrained("./DeepPD/BERT")
9
+ class MyModel(nn.Module):
10
+ def __init__(self):
11
+ super().__init__()
12
+ batch_size = 64
13
+ vocab_size = 21
14
+ self.hidden_dim = 25
15
+ self.gru_emb = 128
16
+ self.emb_dim = 108
17
+
18
+ self.model = bert_wight
19
+ self.gru = nn.GRU(self.gru_emb, self.hidden_dim, num_layers=2,
20
+ bidirectional=True,dropout=0.1)
21
+ self.embedding = nn.Embedding(vocab_size, self.emb_dim, padding_idx=0)
22
+ self.encoder_layer = nn.TransformerEncoderLayer(d_model=128, nhead=8)
23
+ self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=1)
24
+
25
+ self.resnet = Res_Net(batch_size)
26
+ self.cbamBlock = CBAMBlock(batch_size)
27
+
28
+ self.convblock1 = nn.Sequential(
29
+ nn.Conv2d(1,batch_size,1),
30
+ nn.BatchNorm2d(batch_size),
31
+ nn.LeakyReLU()
32
+ )
33
+ self.convblock2 = nn.Sequential(
34
+ nn.Conv2d(batch_size,1,1),
35
+ nn.BatchNorm2d(1),
36
+ nn.LeakyReLU()
37
+ )
38
+
39
+ self.fc = nn.Sequential( nn.Linear(4200,512),
40
+ nn.BatchNorm1d(512),
41
+ nn.LeakyReLU(),
42
+ nn.Linear(512,32),
43
+ nn.BatchNorm1d(32),
44
+ nn.LeakyReLU(),
45
+ nn.Linear(32,2))
46
+
47
+ def forward(self, x):
48
+ xx = self.embedding(x) #* 40 128 #* 40 108
49
+ z = Numseq2OneHot(x) #* 40 20
50
+ z = z.type_as(xx)
51
+ out = torch.cat([xx,z],2)
52
+ out = self.transformer_encoder(out)
53
+
54
+ out = out.unsqueeze(1)
55
+ out = self.convblock1(out) #*,32,40,128
56
+ out = self.resnet(out)
57
+ out = self.resnet(out)
58
+ out = self.cbamBlock(out)
59
+ out = self.convblock2(out) #*,1,40,128
60
+ out = out.squeeze(1)
61
+ out = out.permute(1,0,2) #40,*,128
62
+ out,hn = self.gru(out)
63
+ out = out.permute(1,0,2) #*,40,50
64
+ hn = hn.permute(1,0,2) #*,4,25
65
+ out = out.reshape(out.shape[0],-1) #* 900
66
+ hn = hn.reshape(hn.shape[0],-1) #* 100
67
+ out = torch.cat([out,hn],1) #* 1000
68
+
69
+ out1 = self.model(x)[0] #*,40,128
70
+ out1 = out1.permute(1,0,2) #40,*,128
71
+ out1,hn1 = self.gru(out1)
72
+ out1 = out1.permute(1,0,2) #*,40,50
73
+ hn1= hn1.permute(1,0,2) #*,4,25
74
+ out1 = out1.reshape(out1.shape[0],-1) #* 2000
75
+ hn1 = hn1.reshape(hn1.shape[0],-1) #* 100
76
+ out1 = torch.cat([out1,hn1],1) #* 2100
77
+
78
+ out = torch.cat([out1,out],1) #* 4200
79
+ out = self.fc(out)
80
+
81
+ return out
82
+
83
+
84
+ from DeepPD.utils_etfc import *
85
+ import torch,esm
86
+ import torch.nn as nn
87
+ from DeepPD.data_helper import index_alignment,seqs2blosum62
88
+ import torch.nn.functional as f
89
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
90
+ class DeepPD(nn.Module):
91
+ def __init__(self, vocab_size:int, embedding_size:int, fan_layer_num:int, num_heads:int,encoder_layer_num:int=1,seq_len: int=40,
92
+ output_size:int=2, layer_idx=None,esm_path=None,dropout:float=0.6, max_pool: int=4,Contrastive_Learning=False,info_bottleneck=False):
93
+ super(DeepPD, self).__init__()
94
+
95
+ self.vocab_size = vocab_size
96
+ self.embedding_size = embedding_size
97
+ self.output_size = output_size
98
+ self.seq_len = seq_len
99
+ self.dropout = dropout
100
+ self.dropout_layer = nn.Dropout(self.dropout)
101
+ self.encoder_layer_num = encoder_layer_num
102
+ self.fan_layer_num = fan_layer_num
103
+ self.num_heads = num_heads
104
+ self.max_pool = max_pool
105
+ self.ctl = Contrastive_Learning
106
+ self.info_bottleneck = info_bottleneck
107
+
108
+ self.ESMmodel,_ = esm.pretrained.load_model_and_alphabet_local(esm_path)
109
+ self.ESMmodel.eval()
110
+ self.layer_idx = layer_idx
111
+
112
+ self.out_chs = 64
113
+ self.kernel_sizes = [3,7]
114
+ self.all_conv = nn.ModuleList([
115
+ nn.Sequential(
116
+ nn.Conv1d(self.embedding_size+20,out_channels=self.out_chs,kernel_size=self.kernel_sizes[i],padding=(self.kernel_sizes[i]-1)//2), #padding=(self.kernel_sizes[i]-1)//2,
117
+ nn.BatchNorm1d(self.out_chs),
118
+ nn.LeakyReLU()
119
+ )
120
+ for i in range(len(self.kernel_sizes))
121
+ ])
122
+
123
+ self.hidden_dim = 64
124
+ self.gru = nn.GRU(self.out_chs*2, self.hidden_dim, num_layers=2, batch_first=True,
125
+ bidirectional=True,dropout=0.25)
126
+
127
+ self.embed = nn.Embedding(self.vocab_size, self.embedding_size)
128
+ # self.encoder_layer = nn.TransformerEncoderLayer(d_model=self.embedding_size,nhead=self.num_heads,dropout=self.dropout)
129
+ # self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=1)
130
+ # self.MaxPool1d = nn.MaxPool1d(kernel_size=self.max_pool) # stride的默认值=kernel_size
131
+
132
+ self.pos_encoding = PositionalEncoding(num_hiddens=self.embedding_size,dropout=self.dropout)
133
+ self.attention_encode = AttentionEncode(self.dropout, self.embedding_size, self.num_heads,seq_len=self.seq_len,ffn=False)
134
+
135
+ shape = int(40*(64*2+64)) # +64
136
+ # self.fan = FAN_encode(self.dropout, shape)
137
+
138
+ z_dim = 1024
139
+ self.enc_mean = nn.Linear(shape,z_dim)
140
+ self.enc_std = nn.Linear(shape,z_dim)
141
+ self.dec = nn.Sequential(
142
+ nn.Linear(z_dim,128),
143
+ nn.BatchNorm1d(128),
144
+ nn.LeakyReLU(),
145
+ nn.Linear(128,self.output_size)
146
+ )
147
+
148
+ self.proj_layer = nn.Linear(self.embedding_size,self.out_chs)
149
+ self.fc = nn.Sequential(
150
+ nn.Linear(shape,z_dim),
151
+ nn.BatchNorm1d(z_dim),
152
+ nn.LeakyReLU(),
153
+ nn.Linear(z_dim,128),
154
+ nn.BatchNorm1d(128),
155
+ nn.LeakyReLU(),
156
+ nn.Linear(128,self.output_size)
157
+ )
158
+
159
+ def CNN1DNet(self,x):
160
+
161
+ for i in range(len(self.kernel_sizes)):
162
+ conv = self.all_conv[i]
163
+ conv_x = conv(x)
164
+ # conv_x = self.MaxPool1d(conv_x)
165
+ if i == 0:
166
+ all_feats = conv_x
167
+ else:
168
+ all_feats = torch.cat([all_feats,conv_x],dim=1)
169
+ return all_feats
170
+
171
+ def forward(self, x):
172
+ # x : [B,S=40]
173
+ # get esm embedding
174
+ with torch.no_grad():
175
+ results = self.ESMmodel(x, repr_layers=[self.layer_idx], return_contacts=False)
176
+ esm_x = results["representations"][self.layer_idx] #* 50 480 /640 /1280 # [B,S,480]
177
+
178
+ x = index_alignment(x,condition_num=1,subtraction_num1=3,subtraction_num2=1)
179
+ # feature A
180
+ embed_x = self.embed(x) # [batch_size,seq_len,embedding_size] c
181
+ pos_x = self.pos_encoding(embed_x * math.sqrt(self.embedding_size)) # [batch_size,seq_len,embedding_size]
182
+ encoding_x = pos_x # [B,S,480]
183
+
184
+ for _ in range(self.encoder_layer_num):
185
+ encoding_x = self.attention_encode(encoding_x)
186
+ encoding_x += embed_x
187
+ featA = encoding_x + esm_x
188
+
189
+ # feature B
190
+ pssm = seqs2blosum62(x).to(device) # B,S,20
191
+ featB = pssm.type_as(embed_x)
192
+ featAB = torch.cat([featA,featB],dim=2) # B,S,480+20
193
+
194
+ cnn_input = featAB.permute(0, 2, 1) # B,H,S
195
+ cnn_output = self.CNN1DNet(cnn_input) # B,out_chs*2,S
196
+ out = self.dropout_layer(cnn_output)
197
+ # out = self.dropout_layer(featA)
198
+ out = out.permute(0,2,1) # B,S,H:out_chs*2
199
+ out,_ = self.gru(out)
200
+
201
+ out = self.dropout_layer(out)
202
+ final_featAB = out.reshape(x.size(0),-1) # B,S*H:40*hidden_dim(64)*2
203
+
204
+ # feature C
205
+ featC = self.proj_layer(esm_x)
206
+ featC = self.dropout_layer(featC)
207
+ featC = featC.reshape(featC.shape[0],-1)
208
+
209
+ feat = torch.cat([final_featAB,featC],1) # B
210
+ final_feat = self.dropout_layer(feat) # B,S*(64*2+64)
211
+ # final_feat = final_featAB
212
+ # final_feat = featC
213
+
214
+ if self.info_bottleneck:
215
+ # ToxIBTL prediction head
216
+ enc_mean, enc_std = self.enc_mean(final_feat), f.softplus(self.enc_std(final_feat)-5)
217
+ eps = torch.randn_like(enc_std)
218
+ IB_out = enc_mean + enc_std*eps
219
+ logits = self.dec(IB_out)
220
+ return logits,enc_mean,enc_std
221
+ # return featA,featB,featAB,final_featAB,featC,enc_mean
222
+ else:
223
+ # 全连接层
224
+ logits = self.fc(final_feat)
225
+ return logits,logits,logits
226
+ # return featA,featB,featAB,final_featAB,featC,logits
DeepPD/predictor.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from DeepPD.model import MyModel,DeepPD
2
+ import torch
3
+ import torch.nn as nn
4
+ from DeepPD.config import ArgsConfig
5
+
6
+ args = ArgsConfig()
7
+
8
+ softmax = nn.Softmax(1)
9
+ def predict(seqs,data,model_path,threshold=0.5, device=args.device):
10
+ with torch.no_grad():
11
+ model = DeepPD(vocab_size=21,embedding_size=args.embedding_size,esm_path=args.ems_path,layer_idx=args.esm_layer_idx,seq_len=args.max_len,dropout=args.dropout,
12
+ fan_layer_num=1,num_heads=8,encoder_layer_num=1,Contrastive_Learning=False,info_bottleneck=args.info_bottleneck).to(args.device)
13
+ model.eval()
14
+ state_dict = torch.load(model_path, map_location=device)
15
+ model.load_state_dict(state_dict,strict=False)
16
+ model.to(device)
17
+ seqs = seqs.to(device)
18
+ out,_,_ = model(seqs)
19
+ prob = softmax(out)[:,1]
20
+
21
+ final_out = []
22
+ for i, j in zip(data, prob):
23
+ temp = [i[0], i[1], f"{j:.3f}", 'Peptide' if j >threshold else 'Non-Peptide']
24
+ final_out.append(temp)
25
+
26
+ return final_out
DeepPD/utils.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
5
+
6
+ class Res_Net(nn.Module):
7
+ def __init__(self,input_cha):
8
+ super(Res_Net,self).__init__()
9
+ self.conv1 = nn.Conv2d(input_cha,input_cha,3,padding=1)
10
+ self.conv2 = nn.Conv2d(input_cha,input_cha,5,padding=2)
11
+ self.conv3 = nn.Conv2d(input_cha,input_cha,7,padding=3)
12
+
13
+ self.cbamBlock = CBAMBlock(input_cha)
14
+
15
+ self.bn1 = nn.BatchNorm2d(input_cha)
16
+ self.relu1 = nn.ReLU()
17
+ self.relu2 = nn.LeakyReLU()
18
+
19
+ def forward(self,x):
20
+ init_x = x
21
+
22
+ out = self.conv1(x)
23
+ out = self.bn1(out)
24
+ out = self.relu2(out)
25
+
26
+ out = self.conv1(out)
27
+ out = self.bn1(out)
28
+ out += init_x
29
+ out = self.relu2(out)
30
+
31
+ return out
32
+
33
+ class CBAMBlock(nn.Module):
34
+ def __init__(self, channel, reduction=16):
35
+ super(CBAMBlock, self).__init__()
36
+ self.avg_pool = nn.AdaptiveAvgPool2d(1)
37
+ self.max_pool = nn.AdaptiveMaxPool2d(1)
38
+
39
+ self.channel_excitation = nn.Sequential(nn.Linear(channel,int(channel//reduction),bias=False),
40
+ nn.ReLU(inplace=True),
41
+ nn.Linear(int(channel//reduction),channel,bias=False),
42
+ )
43
+ self.sigmoid = nn.Sigmoid()
44
+
45
+ self.spatial_excitation = nn.Sequential(nn.Conv2d(2, 1, kernel_size=7,
46
+ stride=1, padding=3, bias=False),
47
+ )
48
+
49
+
50
+ def forward(self, x):
51
+ bahs, chs, _, _ = x.size() #16 16 24 42
52
+
53
+ # Returns a new tensor with the same data as the self tensor but of a different size.
54
+ chn_avg = self.avg_pool(x).view(bahs, chs)
55
+ chn_avg = self.channel_excitation(chn_avg).view(bahs, chs, 1, 1)
56
+ chn_max = self.max_pool(x).view(bahs, chs)
57
+ chn_max = self.channel_excitation(chn_max).view(bahs, chs, 1, 1)
58
+ chn_add=chn_avg+chn_max
59
+ chn_add=self.sigmoid(chn_add)
60
+
61
+ chn_cbam = torch.mul(x, chn_add)
62
+
63
+ avg_out = torch.mean(chn_cbam, dim=1, keepdim=True)
64
+ max_out, _ = torch.max(chn_cbam, dim=1, keepdim=True)
65
+ cat = torch.cat([avg_out, max_out], dim=1)
66
+
67
+ spa_add = self.spatial_excitation(cat)
68
+ spa_add = self.sigmoid(spa_add)
69
+ spa_cbam = torch.mul(chn_cbam, spa_add)
70
+
71
+ return spa_cbam
DeepPD/utils_etfc.py ADDED
@@ -0,0 +1,367 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import torch
3
+ from torch import nn
4
+
5
+
6
+ class AddNorm(nn.Module):
7
+ """残差连接后进行层归一化"""
8
+
9
+ def __init__(self, normalized, dropout):
10
+ super(AddNorm, self).__init__()
11
+ self.dropout = nn.Dropout(dropout)
12
+ self.ln = nn.LayerNorm(normalized)
13
+
14
+ def forward(self, x, y):
15
+ return self.ln(x + self.dropout(y))
16
+
17
+
18
+ class PositionWiseFFN(nn.Module):
19
+ """基于位置的前馈⽹络"""
20
+
21
+ def __init__(self, ffn_input, ffn_hiddens,mlp_bias=True):
22
+ super(PositionWiseFFN, self).__init__()
23
+ self.ffn = nn.Sequential(
24
+ nn.Linear(ffn_input, ffn_hiddens, bias=mlp_bias),
25
+ nn.ReLU(),
26
+ nn.Linear(ffn_hiddens, ffn_input, bias=mlp_bias),
27
+ )
28
+
29
+ def forward(self, x):
30
+ return self.ffn(x)
31
+
32
+ from torch.autograd import Variable
33
+ class PositionalEncoding1(nn.Module):
34
+ "Implement the PE function."
35
+ def __init__(self, d_model, dropout, max_len=5000):
36
+ super(PositionalEncoding1, self).__init__()
37
+ self.dropout = nn.Dropout(p=dropout)
38
+
39
+ # Compute the positional encodings once in log space.
40
+ pe = torch.zeros(max_len, d_model)
41
+ position = torch.arange(0, max_len).unsqueeze(1)
42
+ div_term = torch.exp(torch.arange(0, d_model, 2) *
43
+ -(math.log(10000.0) / d_model))
44
+ pe[:, 0::2] = torch.sin(position * div_term)
45
+ pe[:, 1::2] = torch.cos(position * div_term)
46
+ pe = pe.unsqueeze(0)
47
+ self.register_buffer('pe', pe)
48
+
49
+ def forward(self, x):
50
+ x = x + Variable(self.pe[:, :x.size(1)],
51
+ requires_grad=False)
52
+ return self.dropout(x)
53
+
54
+ class PositionalEncoding(nn.Module):
55
+ """位置编码"""
56
+
57
+ def __init__(self, num_hiddens, dropout, max_len=1000):
58
+ super(PositionalEncoding, self).__init__()
59
+ self.dropout = nn.Dropout(dropout)
60
+ # 创建⼀个⾜够⻓的P
61
+ self.P = torch.zeros((1, max_len, num_hiddens))
62
+ X = torch.arange(max_len, dtype=torch.float32).reshape(-1, 1) / torch.pow(10000, torch.arange(0, num_hiddens, 2,
63
+ dtype=torch.float32) / num_hiddens)
64
+ self.P[:, :, 0::2] = torch.sin(X)
65
+ self.P[:, :, 1::2] = torch.cos(X)
66
+
67
+ def forward(self, X):
68
+ X = X + self.P[:, :X.shape[1], :].to(X.device)
69
+ return self.dropout(X)
70
+
71
+
72
+ class AttentionEncode(nn.Module):
73
+
74
+ def __init__(self, dropout, embedding_size, num_heads,seq_len: int=40,ffn=False):
75
+ super(AttentionEncode, self).__init__()
76
+ self.dropout = dropout
77
+ self.embedding_size = embedding_size
78
+ self.num_heads = num_heads
79
+ self.seq_len = seq_len
80
+ self.is_ffn = ffn
81
+
82
+ self.att = nn.MultiheadAttention(embed_dim=self.embedding_size,
83
+ num_heads=num_heads,
84
+ dropout=0.6
85
+ )
86
+
87
+ self.addNorm = AddNorm(normalized=[self.seq_len, self.embedding_size], dropout=self.dropout)
88
+
89
+ self.FFN = PositionWiseFFN(ffn_input=self.embedding_size, ffn_hiddens=self.embedding_size*2)
90
+
91
+ def forward(self, x):
92
+ bs,_,_ = x.size()
93
+ MHAtt, _ = self.att(x, x, x)
94
+ MHAtt_encode = self.addNorm(x, MHAtt)
95
+
96
+ if self.is_ffn:
97
+ ffn_in = MHAtt_encode # bs,seq_len,feat_dims
98
+ ffn_out = self.FFN(ffn_in)
99
+ MHAtt_encode = self.addNorm(ffn_in,ffn_out)
100
+
101
+ return MHAtt_encode
102
+
103
+
104
+ class FAN_encode(nn.Module):
105
+
106
+ def __init__(self, dropout, shape):
107
+ super(FAN_encode, self).__init__()
108
+ self.dropout = dropout
109
+ self.addNorm = AddNorm(normalized=[1, shape], dropout=self.dropout)
110
+ self.FFN = PositionWiseFFN(ffn_input=shape, ffn_hiddens=(2*shape))
111
+ self.ln = nn.LayerNorm(shape)
112
+
113
+ def forward(self, x):
114
+ #x = self.ln(x)
115
+ ffn_out = self.FFN(x)
116
+ encode_output = self.addNorm(x, ffn_out)
117
+
118
+ return encode_output
119
+
120
+ class ffn_norm(nn.Module):
121
+ # 可接受二维输入和一维输入
122
+ def __init__(self,input_dims:int,hidden_dims:int,dropout:float,bias:bool=True):
123
+ super(ffn_norm,self).__init__()
124
+
125
+ self.inps_dims = input_dims
126
+ self.hidden_dims = hidden_dims
127
+ self.dropout = nn.Dropout(dropout)
128
+ self.ffn_bias = bias
129
+ self.ffn = nn.Sequential(
130
+ nn.Linear(self.inps_dims, self.hidden_dims, bias=self.ffn_bias),
131
+ nn.LeakyReLU(),
132
+ nn.Linear(self.hidden_dims, self.inps_dims, bias=self.ffn_bias),
133
+ )
134
+
135
+ self.ln = nn.LayerNorm(self.inps_dims)
136
+
137
+ def forward(self,x):
138
+ # x:[B,S,H] OR [B,shape],shape:S*H
139
+ ffn_out = self.ffn(x)
140
+ norm_out = self.ln(x + self.dropout(ffn_out))
141
+
142
+ return norm_out
143
+
144
+
145
+ def sequence_mask(X, valid_len, value=0.):
146
+ """在序列中屏蔽不相关的项"""
147
+ valid_len = valid_len.float()
148
+ MaxLen = X.size(1)
149
+ mask = torch.arange(MaxLen, dtype=torch.float32, device=X.device)[None, :] < valid_len[:, None].to(X.device)
150
+ X[~mask] = value
151
+ return X
152
+
153
+
154
+ def masked_softmax(X, valid_lens):
155
+ """通过在最后⼀个轴上掩蔽元素来执⾏softmax操作"""
156
+ # X:3D张量,valid_lens:1D或2D张量
157
+ if valid_lens is None:
158
+ return nn.functional.softmax(X, dim=-1)
159
+ else:
160
+ shape = X.shape
161
+ if valid_lens.dim() == 1:
162
+ valid_lens = torch.repeat_interleave(valid_lens, shape[1])
163
+ else:
164
+ valid_lens = valid_lens.reshape(-1) # 最后⼀轴上被掩蔽的元素使⽤⼀个⾮常⼤的负值替换,从⽽其softmax输出为0
165
+ X = sequence_mask(X.reshape(-1, shape[-1]), valid_lens, value=-1e6)
166
+ return nn.functional.softmax(X.reshape(shape), dim=-1)
167
+
168
+
169
+ # class AdditiveAttention(nn.Module):
170
+ # """加性注意⼒"""
171
+ #
172
+ # def __init__(self, key_size, query_size, num_hiddens, dropout):
173
+ # super(AdditiveAttention, self).__init__()
174
+ # self.W_k = nn.Linear(key_size, num_hiddens, bias=False)
175
+ # self.W_q = nn.Linear(query_size, num_hiddens, bias=False)
176
+ # self.w_v = nn.Linear(num_hiddens, 1, bias=False)
177
+ # self.dropout = nn.Dropout(dropout)
178
+ #
179
+ # def forward(self, queries, keys, values, valid_lens):
180
+ # queries, keys = self.W_q(queries), self.W_k(keys)
181
+ # # 在维度扩展后,
182
+ # # queries的形状:(batch_size,查询的个数,1,num_hidden)
183
+ # # key的形状:(batch_size,1,“键-值”对的个数,num_hiddens)
184
+ # # 使⽤⼴播⽅式进⾏求和
185
+ # features = queries.unsqueeze(2) + keys.unsqueeze(1)
186
+ # features = torch.tanh(features)
187
+ # # self.w_v仅有⼀个输出,因此从形状中移除最后那个维度。
188
+ # # scores的形状:(batch_size,查询的个数,“键-值”对的个数)
189
+ # scores = self.w_v(features).squeeze(-1)
190
+ # attention_weights = masked_softmax(scores, valid_lens)
191
+ # # values的形状:(batch_size,“键-值”对的个数,值的维度)
192
+ # return torch.bmm(self.dropout(attention_weights), values)
193
+
194
+
195
+ class AdditiveAttention(nn.Module):
196
+ """注意⼒机制"""
197
+
198
+ def __init__(self, input_size, value_size, num_hiddens, dropout):
199
+ super(AdditiveAttention, self).__init__()
200
+ self.W_k = nn.Linear(input_size, num_hiddens, bias=False)
201
+ self.W_q = nn.Linear(input_size, num_hiddens, bias=False)
202
+ self.w_v = nn.Linear(input_size, num_hiddens, bias=False)
203
+ self.w_o = nn.Linear(50, value_size, bias=False)
204
+ self.dropout = nn.Dropout(dropout)
205
+
206
+ def forward(self, queries, keys, values, valid_lens=None):
207
+ queries, keys = self.W_q(queries), self.W_k(keys)
208
+ d = queries.shape[-1]
209
+ # 在维度扩展后,
210
+ # queries的形状:(batch_size,查询的个数,1,num_hidden)
211
+ # key的形状:(batch_size,1,“键-值”对的个数,num_hiddens)
212
+ # 使⽤⼴播⽅式进⾏求和
213
+ # features = queries + keys
214
+ # features = torch.tanh(features)
215
+ # self.w_v仅有⼀个输出,因此从形状中移除最后那个维度。
216
+ # scores的形状:(batch_size,查询的个数,“键-值”对的个数)
217
+
218
+ scores = torch.bmm(queries, keys.transpose(1, 2)) / math.sqrt(d)
219
+ scores = self.w_o(scores).permute(0, 2, 1)
220
+ attention_weights = masked_softmax(scores, valid_lens)
221
+
222
+ # attention_weights = nn.Softmax(dim=1)(scores)
223
+ values = self.w_v(values)
224
+ # values = torch.transpose(values, 1, 2)
225
+ # values的形状:(batch_size,“键-值”对的个数,值的维度)
226
+ return torch.bmm(self.dropout(attention_weights), values), attention_weights
227
+
228
+
229
+ class MultiHeadAttention(nn.Module):
230
+ """多头注意力"""
231
+
232
+ def __init__(self, key_size, query_size, value_size, num_hiddens,
233
+ num_heads, dropout, bias=False):
234
+ super(MultiHeadAttention, self).__init__()
235
+ self.num_heads = num_heads
236
+ self.attention = DotProductAttention(dropout)
237
+ self.W_q = nn.Linear(query_size, num_hiddens, bias=bias)
238
+ self.W_k = nn.Linear(key_size, num_hiddens, bias=bias)
239
+ self.W_v = nn.Linear(value_size, num_hiddens, bias=bias)
240
+ self.W_o = nn.Linear(num_hiddens, num_hiddens, bias=bias)
241
+
242
+ def forward(self, queries, keys, values, valid_lens=None):
243
+ # queries,keys,values的形状:
244
+ # (batch_size,查询或者“键-值”对的个数,num_hiddens)
245
+ # valid_lens 的形状:
246
+ # (batch_size,)或(batch_size,查询的个数)
247
+ # 经过变换后,输出的queries,keys,values 的形状:
248
+ # (batch_size*num_heads,查询或者“键-值”对的个数,
249
+ # num_hiddens/num_heads)
250
+ queries = transpose_qkv(self.W_q(queries), self.num_heads)
251
+ keys = transpose_qkv(self.W_k(keys), self.num_heads)
252
+ values = transpose_qkv(self.W_v(values), self.num_heads)
253
+
254
+ if valid_lens is not None:
255
+ # 在轴0,将第一项(标量或者矢量)复制num_heads次,
256
+ # 然后如此复制第二项,然后诸如此类。
257
+ valid_lens = torch.repeat_interleave(valid_lens, repeats=self.num_heads, dim=0)
258
+
259
+ # output的形状:(batch_size*num_heads,查询的个数,num_hiddens/num_heads)
260
+ output = self.attention(queries, keys, values, valid_lens)
261
+
262
+ # output_concat的形状:(batch_size,查询的个数,num_hiddens)
263
+ output_concat = transpose_output(output, self.num_heads)
264
+ return self.W_o(output_concat)
265
+
266
+
267
+ def transpose_qkv(X, num_heads):
268
+ """为了多注意力头的并行计算而变换形状"""
269
+ # 输入X的形状:(batch_size,查询或者“键-值”对的个数,num_hiddens)
270
+ # 输出X的形状:(batch_size,查询或者“键-值”对的个数,num_heads,
271
+ # num_hiddens/num_heads)
272
+ X = X.reshape(X.shape[0], X.shape[1], num_heads, -1)
273
+
274
+ # 输出X的形状:(batch_size,num_heads,查询或者“键-值”对的个数,
275
+ # num_hiddens/num_heads)
276
+ X = X.permute(0, 2, 1, 3)
277
+
278
+ # 最终输出的形状:(batch_size*num_heads,查询或者“键-值”对的个数,
279
+ # num_hiddens/num_heads)
280
+ return X.reshape(-1, X.shape[2], X.shape[3])
281
+
282
+
283
+ def transpose_output(X, num_heads):
284
+ """逆转transpose_qkv函数的操作"""
285
+ X = X.reshape(-1, num_heads, X.shape[1], X.shape[2])
286
+ X = X.permute(0, 2, 1, 3)
287
+ return X.reshape(X.shape[0], X.shape[1], -1)
288
+
289
+
290
+ class DotProductAttention(nn.Module):
291
+ """缩放点积注意力"""
292
+
293
+ def __init__(self, dropout):
294
+ super(DotProductAttention, self).__init__()
295
+ self.dropout = nn.Dropout(dropout)
296
+
297
+ # queries的形状:(batch_size,查询的个数,d)
298
+ # keys的形状:(batch_size,“键-值”对的个数,d)
299
+ # values的形状:(batch_size,“键-值”对的个数,值的维度)
300
+ # valid_lens的形状:(batch_size,)或者(batch_size,查询的个数)
301
+ def forward(self, queries, keys, values, valid_lens=None):
302
+ d = queries.shape[-1]
303
+ # 设置transpose_b=True为了交换keys的最后两个维度
304
+ scores = torch.bmm(queries, keys.transpose(1, 2)) / math.sqrt(d)
305
+ attention_weights = masked_softmax(scores, valid_lens)
306
+ return torch.bmm(self.dropout(attention_weights), values)
307
+
308
+
309
+ class MASK_AttentionEncode(nn.Module):
310
+
311
+ def __init__(self, dropout, embedding_size, num_heads):
312
+ super(MASK_AttentionEncode, self).__init__()
313
+ self.dropout = dropout
314
+ self.embedding_size = embedding_size
315
+ self.num_heads = num_heads
316
+
317
+ self.at1 = MultiHeadAttention(key_size=self.embedding_size,
318
+ query_size=self.embedding_size,
319
+ value_size=self.embedding_size,
320
+ num_hiddens=self.embedding_size,
321
+ num_heads=self.num_heads,
322
+ dropout=self.dropout)
323
+ self.addNorm = AddNorm(normalized=[50, self.embedding_size], dropout=self.dropout)
324
+
325
+ self.FFN = PositionWiseFFN(ffn_num_input=64, ffn_num_hiddens=192, ffn_num_outputs=64)
326
+
327
+ def forward(self, x, y=None):
328
+ # Multi, _ = self.at1(x, x, x)
329
+ Multi = self.at1(x, x, x, y)
330
+ Multi_encode = self.addNorm(x, Multi)
331
+
332
+ # encode_output = self.addNorm(Multi_encode, self.FFN(Multi_encode))
333
+
334
+ return Multi_encode
335
+
336
+
337
+ class transformer_encode(nn.Module):
338
+
339
+ def __init__(self, dropout, embedding, num_heads):
340
+ super(transformer_encode, self).__init__()
341
+ self.dropout = dropout
342
+ self.embedding_size = embedding
343
+ self.num_heads = num_heads
344
+ self.attention = nn.MultiheadAttention(embed_dim=192,
345
+ num_heads=8,
346
+ dropout=0.6
347
+ )
348
+ self.at1 = MultiHeadAttention(key_size=self.embedding_size,
349
+ query_size=self.embedding_size,
350
+ value_size=self.embedding_size,
351
+ num_hiddens=self.embedding_size,
352
+ num_heads=self.num_heads,
353
+ dropout=self.dropout)
354
+
355
+ self.addNorm = AddNorm(normalized=[50, self.embedding_size], dropout=self.dropout)
356
+
357
+ self.ffn = PositionWiseFFN(ffn_num_input=self.embedding_size, ffn_num_hiddens=2*self.embedding_size,
358
+ ffn_num_outputs=self.embedding_size)
359
+
360
+ def forward(self, x, valid=None):
361
+ # Multi, _ = self.attention(x, x, x)
362
+ Multi = self.at1(x, x, x, valid)
363
+ Multi_encode = self.addNorm(x, Multi)
364
+
365
+ encode_output = self.addNorm(Multi_encode, self.ffn(Multi_encode))
366
+
367
+ return encode_output
app.ipynb ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "from Bio import SeqIO\n",
10
+ "from DeepPD.data_helper import Data2EqlTensor,Seqs2EqlTensor"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 2,
16
+ "metadata": {},
17
+ "outputs": [
18
+ {
19
+ "data": {
20
+ "text/plain": [
21
+ "('LLSEVEELNMSLTALREK', 18)"
22
+ ]
23
+ },
24
+ "execution_count": 2,
25
+ "metadata": {},
26
+ "output_type": "execute_result"
27
+ }
28
+ ],
29
+ "source": [
30
+ "file_path = './homo_test.fa'\n",
31
+ "data = []\n",
32
+ "for record in SeqIO.parse(file_path, 'fasta'):\n",
33
+ " data.append((record.id, str(record.seq)))\n",
34
+ "\n",
35
+ "data[0][1],len(data[0][1])"
36
+ ]
37
+ },
38
+ {
39
+ "cell_type": "code",
40
+ "execution_count": 3,
41
+ "metadata": {},
42
+ "outputs": [
43
+ {
44
+ "name": "stdout",
45
+ "output_type": "stream",
46
+ "text": [
47
+ "default_padding_value: 1\n",
48
+ "length>40: 0\n"
49
+ ]
50
+ },
51
+ {
52
+ "data": {
53
+ "text/plain": [
54
+ "torch.Size([6, 40])"
55
+ ]
56
+ },
57
+ "execution_count": 3,
58
+ "metadata": {},
59
+ "output_type": "execute_result"
60
+ }
61
+ ],
62
+ "source": [
63
+ "seqs,ids = Data2EqlTensor(data,40)\n",
64
+ "seqs.shape"
65
+ ]
66
+ },
67
+ {
68
+ "cell_type": "code",
69
+ "execution_count": 4,
70
+ "metadata": {},
71
+ "outputs": [
72
+ {
73
+ "data": {
74
+ "text/plain": [
75
+ "tensor([[ 4, 4, 8, 9, 7, 9, 9, 4, 17, 20, 8, 4, 11, 5, 4, 10, 9, 15,\n",
76
+ " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
77
+ " 1, 1, 1, 1],\n",
78
+ " [11, 5, 21, 19, 6, 8, 4, 14, 16, 15, 8, 21, 6, 10, 1, 1, 1, 1,\n",
79
+ " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
80
+ " 1, 1, 1, 1],\n",
81
+ " [ 7, 17, 18, 21, 18, 12, 4, 18, 17, 17, 7, 13, 6, 21, 4, 19, 9, 4,\n",
82
+ " 13, 6, 10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
83
+ " 1, 1, 1, 1],\n",
84
+ " [17, 16, 22, 16, 4, 8, 5, 13, 13, 4, 15, 15, 1, 1, 1, 1, 1, 1,\n",
85
+ " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
86
+ " 1, 1, 1, 1],\n",
87
+ " [ 7, 4, 7, 5, 4, 19, 9, 9, 14, 9, 15, 14, 17, 8, 5, 4, 13, 18,\n",
88
+ " 4, 15, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
89
+ " 1, 1, 1, 1],\n",
90
+ " [16, 5, 11, 11, 12, 12, 5, 13, 17, 12, 12, 18, 4, 8, 13, 16, 11, 15,\n",
91
+ " 9, 15, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
92
+ " 1, 1, 1, 1]])"
93
+ ]
94
+ },
95
+ "execution_count": 4,
96
+ "metadata": {},
97
+ "output_type": "execute_result"
98
+ }
99
+ ],
100
+ "source": [
101
+ "seqs"
102
+ ]
103
+ },
104
+ {
105
+ "cell_type": "code",
106
+ "execution_count": 5,
107
+ "metadata": {},
108
+ "outputs": [
109
+ {
110
+ "name": "stderr",
111
+ "output_type": "stream",
112
+ "text": [
113
+ "Some weights of the model checkpoint at ./DeepPD/BERT were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']\n",
114
+ "- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
115
+ "- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
116
+ ]
117
+ }
118
+ ],
119
+ "source": [
120
+ "from DeepPD.predictor import predict\n",
121
+ "import torch"
122
+ ]
123
+ },
124
+ {
125
+ "cell_type": "code",
126
+ "execution_count": 6,
127
+ "metadata": {},
128
+ "outputs": [],
129
+ "source": [
130
+ "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
131
+ "\n",
132
+ "def homo_classifier(file,threshold):\n",
133
+ " data = []\n",
134
+ " for record in SeqIO.parse(file, 'fasta'):\n",
135
+ " data.append((record.id, str(record.seq)))\n",
136
+ " seqs,ids = Data2EqlTensor(data,40)\n",
137
+ " homo_peptide_pred = predict(seqs,data, './weight-Homo/4.pth', threshold, device)\n",
138
+ " return homo_peptide_pred"
139
+ ]
140
+ },
141
+ {
142
+ "cell_type": "code",
143
+ "execution_count": 7,
144
+ "metadata": {},
145
+ "outputs": [
146
+ {
147
+ "name": "stdout",
148
+ "output_type": "stream",
149
+ "text": [
150
+ "default_padding_value: 1\n",
151
+ "length>40: 0\n"
152
+ ]
153
+ },
154
+ {
155
+ "data": {
156
+ "text/plain": [
157
+ "[['peptide_1', 'LLSEVEELNMSLTALREK', '0.296', 'Non-Peptide'],\n",
158
+ " ['peptide_2', 'TAHYGSLPQKSHGR', '0.013', 'Non-Peptide'],\n",
159
+ " ['peptide_3', 'VNFHFILFNNVDGHLYELDGR', '0.809', 'Peptide'],\n",
160
+ " ['peptide_4', 'NQWQLSADDLKK', '0.827', 'Peptide'],\n",
161
+ " ['peptide_5', 'VLVALYEEPEKPNSALDFLK', '0.868', 'Peptide'],\n",
162
+ " ['peptide_6', 'QATTIIADNIIFLSDQTKEKE', '0.043', 'Non-Peptide']]"
163
+ ]
164
+ },
165
+ "execution_count": 7,
166
+ "metadata": {},
167
+ "output_type": "execute_result"
168
+ }
169
+ ],
170
+ "source": [
171
+ "out = homo_classifier(file_path,0.5)\n",
172
+ "out"
173
+ ]
174
+ },
175
+ {
176
+ "cell_type": "code",
177
+ "execution_count": null,
178
+ "metadata": {},
179
+ "outputs": [],
180
+ "source": []
181
+ }
182
+ ],
183
+ "metadata": {
184
+ "kernelspec": {
185
+ "display_name": "env3.8",
186
+ "language": "python",
187
+ "name": "python3"
188
+ },
189
+ "language_info": {
190
+ "codemirror_mode": {
191
+ "name": "ipython",
192
+ "version": 3
193
+ },
194
+ "file_extension": ".py",
195
+ "mimetype": "text/x-python",
196
+ "name": "python",
197
+ "nbconvert_exporter": "python",
198
+ "pygments_lexer": "ipython3",
199
+ "version": "3.8.0"
200
+ },
201
+ "orig_nbformat": 4
202
+ },
203
+ "nbformat": 4,
204
+ "nbformat_minor": 2
205
+ }
app.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from DeepPD.predictor import predict
3
+ from DeepPD.data_helper import Data2EqlTensor
4
+ import gradio as gr
5
+ from Bio import SeqIO
6
+ device = "cuda" if torch.cuda.is_available() else "cpu"
7
+
8
+ def mus_classifier(file,threshold):
9
+ data = []
10
+ for record in SeqIO.parse(file.name, 'fasta'):
11
+ data.append((record.id, str(record.seq)))
12
+ seqs,_ = Data2EqlTensor(data,40)
13
+ mus_peptide_pred = predict(seqs,data, './weight-Mus/4.pth', threshold, device)
14
+ return mus_peptide_pred
15
+
16
+ def homo_classifier(file,threshold):
17
+ data = []
18
+ for record in SeqIO.parse(file.name, 'fasta'):
19
+ data.append((record.id, str(record.seq)))
20
+ seqs,_ = Data2EqlTensor(data,40)
21
+ homo_peptide_pred = predict(seqs, data, './weight-Homo/4.pth', threshold, device)
22
+ return homo_peptide_pred
23
+ # {peptide_id:[Type:int(1->peptide,0->non-peptide)]}
24
+
25
+ with gr.Blocks() as demo:
26
+ gr.Markdown(" ## DeepPD")
27
+ gr.Markdown("In this study, we developed a peptide detectability prediction model. The model was used to predict the probability that an amino acid sequence is a peptide.")
28
+
29
+ with gr.Tab("Prediction Model(Homo sapiens)"):
30
+ with gr.Row():
31
+ with gr.Column(scale=2):
32
+ input_fasta_homo = gr.File()
33
+ with gr.Column(scale=2):
34
+ homo_cutoff = gr.Slider(0, 1, step=0.1, value=0.5, interactive=True, label="Threshold")
35
+ gr.Markdown("### Note")
36
+ gr.Markdown("- Limit the number of input sequences to less than 128.")
37
+ gr.Markdown("- The file should be the Fasta format.")
38
+ gr.Markdown("- We used only the first 20 amino acids of each N-terminal and C-terminal of the sequence for prediction.")
39
+ image_button_homo = gr.Button("Submit")
40
+ with gr.Column():
41
+ # gr.Markdown(" ### Flip text or image files using this demo.")
42
+ gr.Markdown("Note: the output scores indicates the probability of the input sequence to be predicted as a Peptide or a Non-Peptide.")
43
+ frame_homo_output = gr.DataFrame(
44
+ headers=["Sequence Id", "Sequence", "Probability of peptides", "Peptide"],
45
+ datatype=["str", "str", "str", 'str'],)
46
+
47
+ image_button_homo.click(homo_classifier, inputs=[input_fasta_homo, homo_cutoff], outputs=frame_homo_output)
48
+
49
+ with gr.Tab("Prediction Model(Mus musculus)"):
50
+ # cutoff = gr.Slider(0, 1, step=0.1, value=0.5, interactive=True)
51
+ with gr.Row():
52
+ with gr.Column(scale=2):
53
+ input_fasta_mus = gr.File()
54
+ # cutoff = gr.Slider(0, 1, step=0.1, value=0.5, interactive=True, label="threshold")
55
+ # image_button = gr.Button("Submit")
56
+ with gr.Column(scale=2):
57
+ mus_cutoff = gr.Slider(0, 1, step=0.1, value=0.5, interactive=True, label="Threshold")
58
+ gr.Markdown("### Note")
59
+ gr.Markdown("- Limit the number of input sequences to less than 128.")
60
+ gr.Markdown("- The file should be the Fasta format.")
61
+ gr.Markdown("- We used only the first 20 amino acids of each N-terminal and C-terminal of the sequence for prediction.")
62
+ image_button_mus = gr.Button("Submit")
63
+ with gr.Column():
64
+ # gr.Markdown(" ### Flip text or image files using this demo.")
65
+ gr.Markdown("Note: the output scores indicates the probability of the input sequence to be predicted as a Peptide or a Non-Peptide.")
66
+ frame_mus_output = gr.DataFrame(
67
+ headers=["Sequence Id", "Sequence", "Probability of peptides", "Peptide"],
68
+ datatype=["str", "str", "str", 'str'],)
69
+
70
+ image_button_mus.click(mus_classifier, inputs=[input_fasta_mus, mus_cutoff], outputs=frame_mus_output)
71
+
72
+ with gr.Accordion("Citation"):
73
+ gr.Markdown("- GitHub: https://github.com/leonern/DeepPD")
74
+
75
+ with gr.Accordion("License"):
76
+ gr.Markdown("- Released under the [MIT license](https://github.com/leonern/DeepPD/blob/main/LICENSE). ")
77
+
78
+ with gr.Accordion("Contact"):
79
+ gr.Markdown("- If you have any questions, please file a Github issue or contact me at 107552103310@stu.xju.edu.cn")
80
+
81
+
82
+ demo.queue(4)
83
+ demo.launch() #share=True
homo_test.fa ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ >peptide_1
2
+ LLSEVEELNMSLTALREK
3
+ >peptide_2
4
+ TAHYGSLPQKSHGR
5
+ >peptide_3
6
+ VNFHFILFNNVDGHLYELDGR
7
+ >peptide_4
8
+ NQWQLSADDLKK
9
+ >peptide_5
10
+ VLVALYEEPEKPNSALDFLK
11
+ >peptide_6
12
+ QATTIIADNIIFLSDQTKEKE
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ biopython==1.81
2
+ fair_esm==2.0.0
3
+ numpy==1.22.3
4
+ torch
5
+ transformers==4.25.1
6
+ gradio==3.30.0
7
+ Bio==1.5.9
weight-Homo/4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:220e8f5094004e171951d84665f1728fe4a206a7447427bbd4db08bb4df3ca18
3
+ size 239141411
weight-Mus/4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22a162acab7dd8fa9b2e496edf833fb641fad0de22c97d9f6008fd7865a6a2b6
3
+ size 239141411