gravelcompbio commited on
Commit
1c586cc
·
verified ·
1 Parent(s): bfbf372

Upload 12 files

Browse files
__pycache__/configuration_esm.cpython-39.pyc ADDED
Binary file (12.6 kB). View file
 
__pycache__/modeling_esm.cpython-39.pyc ADDED
Binary file (33.4 kB). View file
 
claspp_forward.py ADDED
@@ -0,0 +1,412 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import warnings
4
+
5
+ import numpy as np
6
+ import torch
7
+ import torch.nn as nn
8
+
9
+ from transformers import DataCollatorWithPadding
10
+ from transformers import EsmTokenizer
11
+ from datasets import (
12
+ load_dataset,
13
+ Dataset,
14
+ )
15
+
16
+ from modeling_esm import EsmForSequenceClassificationCustomWidehead
17
+
18
+
19
+ tokenizer = EsmTokenizer.from_pretrained("finalCheckpoint_25_05_11/")
20
+ model = EsmForSequenceClassificationCustomWidehead.from_pretrained("finalCheckpoint_25_05_11/", num_labels=54).cuda()
21
+
22
+
23
+ ###############################################################################
24
+ #helper code to make the model run smooth
25
+ ###############################################################################
26
+ # labs=['ST-Phosphorylation_nc0_tot5',
27
+ # 'ST-Phosphorylation_nc1_tot5',
28
+ # 'ST-Phosphorylation_nc2_tot5',
29
+ # 'ST-Phosphorylation_nc3_tot5',
30
+ # 'ST-Phosphorylation_nc4_tot5',
31
+ # 'K-Ubiquitination_nc0_tot20',
32
+ # 'K-Ubiquitination_nc1_tot20',
33
+ # 'K-Ubiquitination_nc2_tot20',
34
+ # 'K-Ubiquitination_nc3_tot20',
35
+ # 'K-Ubiquitination_nc4_tot20',
36
+ # 'K-Ubiquitination_nc5_tot20',
37
+ # 'K-Ubiquitination_nc6_tot20',
38
+ # 'K-Ubiquitination_nc7_tot20',
39
+ # 'K-Ubiquitination_nc8_tot20',
40
+ # 'K-Ubiquitination_nc9_tot20',
41
+ # 'K-Ubiquitination_nc10_tot20',
42
+ # 'K-Ubiquitination_nc11_tot20',
43
+ # 'K-Ubiquitination_nc12_tot20',
44
+ # 'K-Ubiquitination_nc13_tot20',
45
+ # 'K-Ubiquitination_nc14_tot20',
46
+ # 'K-Ubiquitination_nc15_tot20',
47
+ # 'K-Ubiquitination_nc16_tot20',
48
+ # 'K-Ubiquitination_nc17_tot20',
49
+ # 'K-Ubiquitination_nc18_tot20',
50
+ # 'K-Ubiquitination_nc19_tot20',
51
+ # 'Y-Phosphorylation_nc0_tot1',
52
+ # 'K-Acetylation_nc0_tot10',
53
+ # 'K-Acetylation_nc1_tot10',
54
+ # 'K-Acetylation_nc2_tot10',
55
+ # 'K-Acetylation_nc3_tot10',
56
+ # 'K-Acetylation_nc4_tot10',
57
+ # 'K-Acetylation_nc5_tot10',
58
+ # 'K-Acetylation_nc6_tot10',
59
+ # 'K-Acetylation_nc7_tot10',
60
+ # 'K-Acetylation_nc8_tot10',
61
+ # 'K-Acetylation_nc9_tot10',
62
+ # 'N-N-linked-Glycosylation_nc0_tot1',
63
+ # 'ST-O-linked-Glycosylation_nc0_tot5',
64
+ # 'ST-O-linked-Glycosylation_nc1_tot5',
65
+ # 'ST-O-linked-Glycosylation_nc2_tot5',
66
+ # 'ST-O-linked-Glycosylation_nc3_tot5',
67
+ # 'ST-O-linked-Glycosylation_nc4_tot5',
68
+ # 'RK-Methylation_nc0_tot4',
69
+ # 'RK-Methylation_nc1_tot4',
70
+ # 'RK-Methylation_nc2_tot4',
71
+ # 'RK-Methylation_nc3_tot4',
72
+ # 'K-Sumoylation_nc0_tot1',
73
+ # 'K-Malonylation_nc0_tot1',
74
+ # 'M-Sulfoxidation_nc0_tot1',
75
+ # 'AM-Acetylation_nc0_tot1',
76
+ # 'C-Glutathionylation_nc0_tot1',
77
+ # 'C-S-palmitoylation_nc0_tot1',
78
+ # 'PK-Hydroxylation_nc0_tot1',
79
+ # 'NegLab']
80
+
81
+ labsoi=set()
82
+ lab2map={}
83
+ labsoi.add("S_Phosphorylation")
84
+ lab2map["S_Phosphorylation"]=0
85
+ labsoi.add("T_Phosphorylation")
86
+ lab2map["T_Phosphorylation"]=1
87
+ labsoi.add("Y_Phosphorylation")
88
+ lab2map["Y_Phosphorylation"]=3
89
+ labsoi.add("A_Acetylation")
90
+ lab2map["A_Acetylation"]=13
91
+ labsoi.add("M_Acetylation")
92
+ lab2map["M_Acetylation"]=14
93
+ labsoi.add("K_Acetylation")
94
+ lab2map["K_Acetylation"]=4
95
+ labsoi.add("K_Ubiquitination")
96
+ lab2map["K_Ubiquitination"]=2
97
+ labsoi.add("S_O-linked-Glycosylation")
98
+ lab2map["S_O-linked-Glycosylation"]=6
99
+ labsoi.add("T_O-linked-Glycosylation")
100
+ lab2map["T_O-linked-Glycosylation"]=7
101
+ labsoi.add("N_N-linked-Glycosylation")
102
+ lab2map["N_N-linked-Glycosylation"]=5
103
+ labsoi.add("K_Methylation")
104
+ lab2map["K_Methylation"]=9
105
+ labsoi.add("R_Methylation")
106
+ lab2map["R_Methylation"]=8
107
+ labsoi.add("K_Malonylation")
108
+ lab2map["K_Malonylation"]=11
109
+ labsoi.add("K_Sumoylation")
110
+ lab2map["K_Sumoylation"]=10
111
+ labsoi.add("C_Glutathionylation")
112
+ lab2map["C_Glutathionylation"]=15
113
+ labsoi.add("P_Hydroxylation")
114
+ lab2map["P_Hydroxylation"]=17
115
+ labsoi.add("K_Hydroxylation")
116
+ lab2map["K_Hydroxylation"]=18
117
+ labsoi.add("C_S-palmitoylation")
118
+ lab2map["C_S-palmitoylation"]=16
119
+ lab2map['M_Sulfoxidation']=12
120
+ pos2lab={}
121
+ for lab in lab2map.keys():
122
+ pos=lab2map[lab]
123
+ pos2lab[pos]=lab
124
+ # labsoi.add("K-Succinylation")
125
+ # lab2map["K-Succinylation"]=14
126
+
127
+
128
+ def preprocess_function(examples):
129
+ toks={}
130
+ toks['input_ids']=[]
131
+ toks['attention_mask']=[]
132
+
133
+ for info in examples["pep"]:
134
+ info=info.replace(".", "<mask>")
135
+ t=tokenizer(info.replace("-", "<pad>"))
136
+ toks['input_ids'].append(t['input_ids'])
137
+ toks['attention_mask'].append(t['attention_mask'])
138
+
139
+
140
+ return toks
141
+
142
+
143
+ def getlab(elab,res):
144
+ output=np.zeros((20))
145
+ if res=='S':
146
+ output[0]=max(elab[:5])
147
+ output[1]=0
148
+ elif res=='T':
149
+ output[0]=0
150
+ output[1]=max(elab[:5])
151
+ else:
152
+ output[0]=0
153
+ output[1]=0
154
+ #print(labs[:5])['ST-Phosphorylation_nc0_tot5', 'ST-Phosphorylation_nc1_tot5', 'ST-Phosphorylation_nc2_tot5', 'ST-Phosphorylation_nc3_tot5', 'ST-Phosphorylation_nc4_tot5']
155
+ output[2]=max(elab[5:25])
156
+ #print(labs[5:25])['K-Ubiquitination_nc0_tot20', 'K-Ubiquitination_nc1_tot20', 'K-Ubiquitination_nc2_tot20', 'K-Ubiquitination_nc3_tot20', 'K-Ubiquitination_nc4_tot20', 'K-Ubiquitination_nc5_tot20', 'K-Ubiquitination_nc6_tot20', 'K-Ubiquitination_nc7_tot20', 'K-Ubiquitination_nc8_tot20', 'K-Ubiquitination_nc9_tot20', 'K-Ubiquitination_nc10_tot20', 'K-Ubiquitination_nc11_tot20', 'K-Ubiquitination_nc12_tot20', 'K-Ubiquitination_nc13_tot20', 'K-Ubiquitination_nc14_tot20', 'K-Ubiquitination_nc15_tot20', 'K-Ubiquitination_nc16_tot20', 'K-Ubiquitination_nc17_tot20', 'K-Ubiquitination_nc18_tot20', 'K-Ubiquitination_nc19_tot20']
157
+ output[3]=max(elab[25:26])
158
+ #print(labs[25:30])['Y-Phosphorylation_nc0_tot5', 'Y-Phosphorylation_nc1_tot5', 'Y-Phosphorylation_nc2_tot5', 'Y-Phosphorylation_nc3_tot5', 'Y-Phosphorylation_nc4_tot5']
159
+ output[4]=max(elab[26:36])
160
+ #print(labs[30:40])['K-Acetylation_nc0_tot10', 'K-Acetylation_nc1_tot10', 'K-Acetylation_nc2_tot10', 'K-Acetylation_nc3_tot10', 'K-Acetylation_nc4_tot10', 'K-Acetylation_nc5_tot10', 'K-Acetylation_nc6_tot10', 'K-Acetylation_nc7_tot10', 'K-Acetylation_nc8_tot10', 'K-Acetylation_nc9_tot10']
161
+ output[5]=max(elab[36:37])
162
+ #print(labs[40:41])['N-N-linked-Glycosylation_nc0_tot1']
163
+ if res=='S':
164
+ output[6]=max(elab[37:42])
165
+ output[7]=0
166
+ elif res=='T':
167
+ output[6]=0
168
+ output[7]=max(elab[37:42])
169
+ else:
170
+ output[6]=0
171
+ output[7]=0
172
+ #print(labs[41:46])['ST-O-linked-Glycosylation_nc0_tot5', 'ST-O-linked-Glycosylation_nc1_tot5', 'ST-O-linked-Glycosylation_nc2_tot5', 'ST-O-linked-Glycosylation_nc3_tot5', 'ST-O-linked-Glycosylation_nc4_tot5']
173
+ if res=="R":
174
+ output[8]=max(elab[42:46])
175
+ output[9]=0
176
+ elif res=="K":
177
+ output[8]=0
178
+ output[9]=max(elab[42:46])
179
+ else:
180
+ output[8]=0
181
+ output[9]=0
182
+ #print(labs[46:50])['RK-Methylation_nc0_tot4', 'RK-Methylation_nc1_tot4', 'RK-Methylation_nc2_tot4', 'RK-Methylation_nc3_tot4']
183
+ output[10]=max(elab[46:47])
184
+ #print(labs[50:52])['K-Sumoylation_nc0_tot2', 'K-Sumoylation_nc1_tot2']
185
+ output[11]=max(elab[47:48])
186
+ #'K-Malonylation_nc0_tot1'
187
+ output[12]=max(elab[48:49])
188
+ #"M-Sulfoxidation_nc0_tot1'
189
+ if res=="A":
190
+ output[13]=max(elab[49:50])
191
+ output[14]=0
192
+ elif res=="M":
193
+ output[13]=0
194
+ output[14]=max(elab[49:50])
195
+ else:
196
+ output[13]=0
197
+ output[14]=0
198
+ #print(elab[50:51])
199
+ output[15]=max(elab[50:51])
200
+ #print(labs[57:58])['C-Glutathionylation_nc0_tot1']
201
+ output[16]=max(elab[51:52])
202
+ #print(labs[58:59])['C-S-palmitoylation_nc0_tot1']
203
+ if res=="P":
204
+ output[17]=max(elab[52:53])
205
+ output[18]=0
206
+ elif res=="K":
207
+ output[17]=0
208
+ output[18]=max(elab[52:53])
209
+ else:
210
+ output[17]=0
211
+ output[18]=0
212
+ #print(labs[52:54])['K-Malonylation_nc0_tot2', 'K-Malonylation_nc1_tot2']
213
+ output[19]=max(elab[53:54])
214
+ return(output)
215
+ #print(labs[59:60])['NegLab']
216
+
217
+
218
+
219
+
220
+ ###############################################################################
221
+ #prediction code
222
+ ###############################################################################
223
+
224
+
225
+ def predict(input_batches):
226
+ sig=nn.Sigmoid()
227
+ outputpreds=[]
228
+ r='\r'
229
+ for i,batches in enumerate(input_batches):
230
+ print(f"{i} / {len(input_batches)} batches done",end=r)
231
+ # tok_input_ids=tokenizer(batches)['input_ids']
232
+ # tensor_input_ids=torch.tensor(tok_input_ids)
233
+ # print(tensor_input_ids)
234
+ # print(torch.tensor([tokenizer(batches)['input_ids']]).cuda().shape)
235
+ # print(torch.tensor([tokenizer(batches)['attention_mask']]).cuda()["logits"][0].shape)
236
+ #print(torch.tensor([tokenizer(batches)['input_ids']]).cuda().squeeze().shape)
237
+
238
+ pred=(sig(model(torch.tensor([tokenizer(batches)['input_ids']]).squeeze().cuda(),torch.tensor([tokenizer(batches)['attention_mask']]).squeeze().cuda())["logits"]).tolist())
239
+ #print(len(pred[0]))
240
+ for p in pred:
241
+ outputpreds.append(p)
242
+ return outputpreds
243
+
244
+
245
+ def write_output(pred,listofpeps):
246
+ hf=open("output_predictions.csv",'w+')
247
+ n="\n"
248
+ writethisline="pep,"
249
+ for i in range(len(labsoi)):
250
+ writethisline+=pos2lab[i]
251
+ hf.write(writethisline+n)
252
+ for p,ip in zip(pred,listofpeps):
253
+ writethisline=f"{ip}"
254
+ r=ip[10]
255
+ #print(p)
256
+ easyreadlab=getlab(p,r)
257
+ for sp in easyreadlab:
258
+ writethisline+=f"{sp},"
259
+
260
+ writethisline=writethisline[:-1]+n
261
+ hf.write(writethisline)
262
+ hf.close()
263
+
264
+
265
+ DOC_HELP='''
266
+ Usage: python3 claspp_forward.py [OPTION]... --input INPUT [FASTA_FILE or TXT_FILE]...
267
+ predict PTM events on peptides or full sequences
268
+
269
+ Example 1: python3 claspp_forward.py -B 100 -S 0 -i random.txt
270
+ Example 2: python3 claspp_forward.py -B 50 -S 1 -i random.fasta
271
+
272
+ FASTA_FILE contain protein sequences in proper fasta or a2m format
273
+ TXT_FILE cointain protien peptides 21 in length with the center
274
+ residue being the PTM modification site
275
+
276
+
277
+ Pattern selection and interpretation:
278
+ -B, --batch_size (int) that describes how many predictions
279
+ can be predicted at a time on the GPU
280
+ (reduce if you get run out of GPU space)
281
+
282
+ -S --scrape_fasta (int) should be a 1 or a 0
283
+ 1 = read a fasta and scrape posible 21 peptides
284
+ that can be modified by a PTM
285
+ 0 = read a txt file that has the 21mer already
286
+ sperated and all peptides should be sperated by
287
+ a '\\n' (can be faster) than fasta option
288
+
289
+ -h --help your reading it right now
290
+
291
+ -i --input location of the input fasta or txt
292
+
293
+ -o --output location of the output csv
294
+
295
+
296
+ Report bugs to:
297
+
298
+
299
+ '''
300
+ WARNING_MESSAGE="""
301
+ #################################
302
+ PLEASE READ HELP MESSAGE TO ENSURE
303
+ YOU KNOW HOW TO FORMAT/USE THE
304
+ MODEL
305
+ #################################
306
+ """
307
+
308
+
309
+
310
+
311
+ def main():
312
+ batch_size=50
313
+ scrape=0
314
+ file_output="output_predictions.csv"
315
+ input_file="N/A"
316
+ for i in range(len(sys.argv)-1):
317
+ if sys.argv[i]=='--scrape_fasta' or sys.argv[i]=='-S':
318
+ scrape = int(sys.argv[i+1])
319
+ if sys.argv[i]=='--batch_size' or sys.argv[i]=='-B':
320
+ batch_size = int(sys.argv[i+1])
321
+ if sys.argv[i]=='--input' or sys.argv[i]=='-i':
322
+ input_file = sys.argv[i+1]
323
+ if sys.argv[i]=='--output' or sys.argv[i]=='-o':
324
+ file_output = sys.argv[i+1]
325
+ if sys.argv[i]=='-h' or sys.argv[i]=='--h' or sys.argv[i]=='-help' or sys.argv[i]=='--help' :
326
+ print(DOC_HELP)
327
+ if input_file=='N/A':
328
+ print(WARNING_MESSAGE)
329
+ print(DOC_HELP)
330
+ return
331
+
332
+ if scrape==0:
333
+ #todo make readerfuc
334
+ listofpeps=[]
335
+ rf=open(input_file,"r")
336
+ lines=rf.readlines()
337
+ for line in lines:
338
+ pep=line[:-1]
339
+ listofpeps.append(pep)
340
+
341
+
342
+
343
+ else:
344
+ #todo make readerfuc
345
+ listofpeps=[]
346
+ acc2seq={}
347
+ #seq2acc={}
348
+ rf=open(input_file,"r")
349
+ lines=rf.readlines()
350
+ seq=""
351
+ acc=""
352
+ for line in lines:
353
+ if line[0]=='>':
354
+ if seq!='':
355
+ acc2seq[acc]=seq
356
+ #seq2acc[seq]=acc
357
+ seq=""
358
+ acc=line[1:-1]
359
+ else:
360
+ seq+=line.replace('\n','')
361
+ acc2seq[acc]=seq
362
+ #seq2acc[seq]=acc
363
+ for acc in acc2seq.keys():
364
+ seq=acc2seq[acc]
365
+ paddedseq='----------'+seq+'----------'
366
+ for i,c in enumerate(seq):
367
+ pep=paddedseq[i:i+21]
368
+ listofpeps.append(pep)
369
+ setofpeps=set(listofpeps)
370
+ listofpeps=list(setofpeps)
371
+
372
+
373
+
374
+
375
+
376
+
377
+
378
+
379
+
380
+
381
+ input_batches=[]
382
+ temp=[]
383
+ for i,pep in enumerate(listofpeps):
384
+ if i%batch_size==0 and i!=0:
385
+ input_batches.append(temp)
386
+ temp=[]
387
+ temp.append(pep)
388
+ input_batches.append(temp)
389
+
390
+ pred=predict(input_batches=input_batches)
391
+ write_output(pred,listofpeps)
392
+
393
+
394
+
395
+
396
+
397
+
398
+
399
+
400
+
401
+
402
+
403
+
404
+
405
+
406
+ if __name__ == "__main__":
407
+ main()
408
+ #df=pd.read_csv("output_predictions.csv")
409
+ #print(df)
410
+
411
+
412
+
configuration_esm.py ADDED
@@ -0,0 +1,370 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2022 Meta and The HuggingFace Inc. team. All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """ ESM model configuration"""
16
+
17
+ from dataclasses import asdict, dataclass
18
+ from typing import Optional
19
+
20
+ from transformers.configuration_utils import PretrainedConfig
21
+ from transformers.utils import logging
22
+
23
+
24
+ logger = logging.get_logger(__name__)
25
+
26
+ # TODO Update this
27
+ ESM_PRETRAINED_CONFIG_ARCHIVE_MAP = {
28
+ "facebook/esm-1b": "https://huggingface.co/facebook/esm-1b/resolve/main/config.json",
29
+ "facebook/esm2_t6_8M_UR50D": "https://huggingface.co/facebook/esm2_t6_8M_UR50D/blob/main/config.json"
30
+ # See all ESM models at https://huggingface.co/models?filter=esm
31
+ }
32
+
33
+
34
+ class EsmConfig(PretrainedConfig):
35
+ r"""
36
+ This is the configuration class to store the configuration of a [`ESMModel`]. It is used to instantiate a ESM model
37
+ according to the specified arguments, defining the model architecture. Instantiating a configuration with the
38
+ defaults will yield a similar configuration to that of the ESM
39
+ [facebook/esm-1b](https://huggingface.co/facebook/esm-1b) architecture.
40
+
41
+ Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
42
+ documentation from [`PretrainedConfig`] for more information.
43
+
44
+
45
+ Args:
46
+ vocab_size (`int`, *optional*):
47
+ Vocabulary size of the ESM model. Defines the number of different tokens that can be represented by the
48
+ `inputs_ids` passed when calling [`ESMModel`].
49
+ mask_token_id (`int`, *optional*):
50
+ The index of the mask token in the vocabulary. This must be included in the config because of the
51
+ "mask-dropout" scaling trick, which will scale the inputs depending on the number of masked tokens.
52
+ pad_token_id (`int`, *optional*):
53
+ The index of the padding token in the vocabulary. This must be included in the config because certain parts
54
+ of the ESM code use this instead of the attention mask.
55
+ hidden_size (`int`, *optional*, defaults to 768):
56
+ Dimensionality of the encoder layers and the pooler layer.
57
+ num_hidden_layers (`int`, *optional*, defaults to 12):
58
+ Number of hidden layers in the Transformer encoder.
59
+ num_attention_heads (`int`, *optional*, defaults to 12):
60
+ Number of attention heads for each attention layer in the Transformer encoder.
61
+ intermediate_size (`int`, *optional*, defaults to 3072):
62
+ Dimensionality of the "intermediate" (often named feed-forward) layer in the Transformer encoder.
63
+ hidden_act (`str` or `Callable`, *optional*, defaults to `"gelu"`):
64
+ The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
65
+ `"relu"`, `"silu"` and `"gelu_new"` are supported.
66
+ hidden_dropout_prob (`float`, *optional*, defaults to 0.1):
67
+ The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
68
+ attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
69
+ The dropout ratio for the attention probabilities.
70
+ max_position_embeddings (`int`, *optional*, defaults to 1026):
71
+ The maximum sequence length that this model might ever be used with. Typically set this to something large
72
+ just in case (e.g., 512 or 1024 or 2048).
73
+ initializer_range (`float`, *optional*, defaults to 0.02):
74
+ The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
75
+ layer_norm_eps (`float`, *optional*, defaults to 1e-12):
76
+ The epsilon used by the layer normalization layers.
77
+ position_embedding_type (`str`, *optional*, defaults to `"absolute"`):
78
+ Type of position embedding. Choose one of `"absolute"`, `"relative_key"`, `"relative_key_query", "rotary"`.
79
+ For positional embeddings use `"absolute"`. For more information on `"relative_key"`, please refer to
80
+ [Self-Attention with Relative Position Representations (Shaw et al.)](https://arxiv.org/abs/1803.02155).
81
+ For more information on `"relative_key_query"`, please refer to *Method 4* in [Improve Transformer Models
82
+ with Better Relative Position Embeddings (Huang et al.)](https://arxiv.org/abs/2009.13658).
83
+ use_cache (`bool`, *optional*, defaults to `True`):
84
+ Whether or not the model should return the last key/values attentions (not used by all models). Only
85
+ relevant if `config.is_decoder=True`.
86
+ classifier_dropout (`float`, *optional*):
87
+ The dropout ratio for the classification head.
88
+ emb_layer_norm_before (`bool`, *optional*):
89
+ Whether to apply layer normalization after embeddings but before the main stem of the network.
90
+ token_dropout (`bool`, defaults to `False`):
91
+ When this is enabled, masked tokens are treated as if they had been dropped out by input dropout.
92
+
93
+ Examples:
94
+
95
+ ```python
96
+ >>> from transformers import EsmModel, EsmConfig
97
+
98
+ >>> # Initializing a ESM facebook/esm-1b style configuration >>> configuration = EsmConfig()
99
+
100
+ >>> # Initializing a model from the configuration >>> model = ESMModel(configuration)
101
+
102
+ >>> # Accessing the model configuration >>> configuration = model.config
103
+ ```"""
104
+ model_type = "esm"
105
+
106
+ def __init__(
107
+ self,
108
+ vocab_size=None,
109
+ mask_token_id=None,
110
+ pad_token_id=None,
111
+ hidden_size=768,
112
+ num_hidden_layers=12,
113
+ num_attention_heads=12,
114
+ intermediate_size=3072,
115
+ hidden_act="gelu",
116
+ hidden_dropout_prob=0.1,
117
+ attention_probs_dropout_prob=0.1,
118
+ max_position_embeddings=1026,
119
+ initializer_range=0.02,
120
+ layer_norm_eps=1e-12,
121
+ position_embedding_type="absolute",
122
+ use_cache=True,
123
+ classifier_dropout=None,
124
+ emb_layer_norm_before=None,
125
+ token_dropout=False,
126
+ is_folding_model=False,
127
+ esmfold_config=None,
128
+ vocab_list=None,
129
+ **kwargs
130
+ ):
131
+ super().__init__(pad_token_id=pad_token_id, mask_token_id=mask_token_id, **kwargs)
132
+
133
+ self.vocab_size = vocab_size
134
+ self.hidden_size = hidden_size
135
+ self.num_hidden_layers = num_hidden_layers
136
+ self.num_attention_heads = num_attention_heads
137
+ self.hidden_act = hidden_act
138
+ self.intermediate_size = intermediate_size
139
+ self.hidden_dropout_prob = hidden_dropout_prob
140
+ self.attention_probs_dropout_prob = attention_probs_dropout_prob
141
+ self.max_position_embeddings = max_position_embeddings
142
+ self.initializer_range = initializer_range
143
+ self.layer_norm_eps = layer_norm_eps
144
+ self.position_embedding_type = position_embedding_type
145
+ self.use_cache = use_cache
146
+ self.classifier_dropout = classifier_dropout
147
+ self.emb_layer_norm_before = emb_layer_norm_before
148
+ self.token_dropout = token_dropout
149
+ self.is_folding_model = is_folding_model
150
+ if is_folding_model:
151
+ if esmfold_config is None:
152
+ logger.info("No esmfold_config supplied for folding model, using default values.")
153
+ esmfold_config = EsmFoldConfig()
154
+ elif isinstance(esmfold_config, dict):
155
+ esmfold_config = EsmFoldConfig(**esmfold_config)
156
+ self.esmfold_config = esmfold_config
157
+ if vocab_list is None:
158
+ logger.warning("No vocab_list supplied for folding model, assuming the ESM-2 vocabulary!")
159
+ self.vocab_list = get_default_vocab_list()
160
+ else:
161
+ self.vocab_list = vocab_list
162
+ else:
163
+ self.esmfold_config = None
164
+ self.vocab_list = None
165
+ if self.esmfold_config is not None and getattr(self.esmfold_config, "use_esm_attn_map", False):
166
+ raise ValueError("The HuggingFace port of ESMFold does not support use_esm_attn_map at this time!")
167
+
168
+ def to_dict(self):
169
+ """
170
+ Serializes this instance to a Python dictionary. Override the default [`~PretrainedConfig.to_dict`].
171
+
172
+ Returns:
173
+ `Dict[str, any]`: Dictionary of all the attributes that make up this configuration instance,
174
+ """
175
+ output = super().to_dict()
176
+ if isinstance(self.esmfold_config, EsmFoldConfig):
177
+ output["esmfold_config"] = self.esmfold_config.to_dict()
178
+ return output
179
+
180
+
181
+ @dataclass
182
+ class EsmFoldConfig:
183
+ esm_type: str = None
184
+ fp16_esm: bool = True
185
+ use_esm_attn_map: bool = False
186
+ esm_ablate_pairwise: bool = False
187
+ esm_ablate_sequence: bool = False
188
+ esm_input_dropout: float = 0
189
+
190
+ embed_aa: bool = True
191
+ bypass_lm: bool = False
192
+
193
+ lddt_head_hid_dim: int = 128
194
+ trunk: "TrunkConfig" = None
195
+
196
+ def __post_init__(self):
197
+ if self.trunk is None:
198
+ self.trunk = TrunkConfig()
199
+ elif isinstance(self.trunk, dict):
200
+ self.trunk = TrunkConfig(**self.trunk)
201
+
202
+ def to_dict(self):
203
+ """
204
+ Serializes this instance to a Python dictionary. Override the default [`~PretrainedConfig.to_dict`].
205
+
206
+ Returns:
207
+ `Dict[str, any]`: Dictionary of all the attributes that make up this configuration instance,
208
+ """
209
+ output = asdict(self)
210
+ output["trunk"] = self.trunk.to_dict()
211
+ return output
212
+
213
+
214
+ @dataclass
215
+ class TrunkConfig:
216
+ num_blocks: int = 48
217
+ sequence_state_dim: int = 1024
218
+ pairwise_state_dim: int = 128
219
+ sequence_head_width: int = 32
220
+ pairwise_head_width: int = 32
221
+ position_bins: int = 32
222
+ dropout: float = 0
223
+ layer_drop: float = 0
224
+ cpu_grad_checkpoint: bool = False
225
+ max_recycles: int = 4
226
+ chunk_size: Optional[int] = 128
227
+ structure_module: "StructureModuleConfig" = None
228
+
229
+ def __post_init__(self):
230
+ if self.structure_module is None:
231
+ self.structure_module = StructureModuleConfig()
232
+ elif isinstance(self.structure_module, dict):
233
+ self.structure_module = StructureModuleConfig(**self.structure_module)
234
+
235
+ if self.max_recycles <= 0:
236
+ raise ValueError(f"`max_recycles` should be positive, got {self.max_recycles}.")
237
+ if self.sequence_state_dim % self.sequence_state_dim != 0:
238
+ raise ValueError(
239
+ "`sequence_state_dim` should be a round multiple of `sequence_state_dim`, got"
240
+ f" {self.sequence_state_dim} and {self.sequence_state_dim}."
241
+ )
242
+ if self.pairwise_state_dim % self.pairwise_state_dim != 0:
243
+ raise ValueError(
244
+ "`pairwise_state_dim` should be a round multiple of `pairwise_state_dim`, got"
245
+ f" {self.pairwise_state_dim} and {self.pairwise_state_dim}."
246
+ )
247
+
248
+ sequence_num_heads = self.sequence_state_dim // self.sequence_head_width
249
+ pairwise_num_heads = self.pairwise_state_dim // self.pairwise_head_width
250
+
251
+ if self.sequence_state_dim != sequence_num_heads * self.sequence_head_width:
252
+ raise ValueError(
253
+ "`sequence_state_dim` should be equal to `sequence_num_heads * sequence_head_width, got"
254
+ f" {self.sequence_state_dim} != {sequence_num_heads} * {self.sequence_head_width}."
255
+ )
256
+ if self.pairwise_state_dim != pairwise_num_heads * self.pairwise_head_width:
257
+ raise ValueError(
258
+ "`pairwise_state_dim` should be equal to `pairwise_num_heads * pairwise_head_width, got"
259
+ f" {self.pairwise_state_dim} != {pairwise_num_heads} * {self.pairwise_head_width}."
260
+ )
261
+ if self.pairwise_state_dim % 2 != 0:
262
+ raise ValueError(f"`pairwise_state_dim` should be even, got {self.pairwise_state_dim}.")
263
+
264
+ if self.dropout >= 0.4:
265
+ raise ValueError(f"`dropout` should not be greater than 0.4, got {self.dropout}.")
266
+
267
+ def to_dict(self):
268
+ """
269
+ Serializes this instance to a Python dictionary. Override the default [`~PretrainedConfig.to_dict`].
270
+
271
+ Returns:
272
+ `Dict[str, any]`: Dictionary of all the attributes that make up this configuration instance,
273
+ """
274
+ output = asdict(self)
275
+ output["structure_module"] = self.structure_module.to_dict()
276
+ return output
277
+
278
+
279
+ @dataclass
280
+ class StructureModuleConfig:
281
+ """
282
+ Args:
283
+ sequence_dim:
284
+ Single representation channel dimension
285
+ pairwise_dim:
286
+ Pair representation channel dimension
287
+ ipa_dim:
288
+ IPA hidden channel dimension
289
+ resnet_dim:
290
+ Angle resnet (Alg. 23 lines 11-14) hidden channel dimension
291
+ num_heads_ipa:
292
+ Number of IPA heads
293
+ num_qk_points:
294
+ Number of query/key points to generate during IPA
295
+ num_v_points:
296
+ Number of value points to generate during IPA
297
+ dropout_rate:
298
+ Dropout rate used throughout the layer
299
+ num_blocks:
300
+ Number of structure module blocks
301
+ num_transition_layers:
302
+ Number of layers in the single representation transition (Alg. 23 lines 8-9)
303
+ num_resnet_blocks:
304
+ Number of blocks in the angle resnet
305
+ num_angles:
306
+ Number of angles to generate in the angle resnet
307
+ trans_scale_factor:
308
+ Scale of single representation transition hidden dimension
309
+ epsilon:
310
+ Small number used in angle resnet normalization
311
+ inf:
312
+ Large number used for attention masking
313
+ """
314
+
315
+ sequence_dim: int = 384
316
+ pairwise_dim: int = 128
317
+ ipa_dim: int = 16
318
+ resnet_dim: int = 128
319
+ num_heads_ipa: int = 12
320
+ num_qk_points: int = 4
321
+ num_v_points: int = 8
322
+ dropout_rate: float = 0.1
323
+ num_blocks: int = 8
324
+ num_transition_layers: int = 1
325
+ num_resnet_blocks: int = 2
326
+ num_angles: int = 7
327
+ trans_scale_factor: int = 10
328
+ epsilon: float = 1e-8
329
+ inf: float = 1e5
330
+
331
+ def to_dict(self):
332
+ return asdict(self)
333
+
334
+
335
+ def get_default_vocab_list():
336
+ return (
337
+ "<cls>",
338
+ "<pad>",
339
+ "<eos>",
340
+ "<unk>",
341
+ "L",
342
+ "A",
343
+ "G",
344
+ "V",
345
+ "S",
346
+ "E",
347
+ "R",
348
+ "T",
349
+ "I",
350
+ "D",
351
+ "P",
352
+ "K",
353
+ "Q",
354
+ "N",
355
+ "F",
356
+ "Y",
357
+ "M",
358
+ "H",
359
+ "W",
360
+ "C",
361
+ "X",
362
+ "B",
363
+ "U",
364
+ "Z",
365
+ "O",
366
+ ".",
367
+ "-",
368
+ "<null_1>",
369
+ "<mask>",
370
+ )
finalCheckpoint_25_05_11/config.json ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "contrastive_checkpoint/checkpoint-7800",
3
+ "architectures": [
4
+ "EsmForSequenceClassificationCustomWidehead"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "classifier_dropout": null,
8
+ "emb_layer_norm_before": false,
9
+ "esmfold_config": null,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.0,
12
+ "hidden_size": 640,
13
+ "id2label": {
14
+ "0": "LABEL_0",
15
+ "1": "LABEL_1",
16
+ "2": "LABEL_2",
17
+ "3": "LABEL_3",
18
+ "4": "LABEL_4",
19
+ "5": "LABEL_5",
20
+ "6": "LABEL_6",
21
+ "7": "LABEL_7",
22
+ "8": "LABEL_8",
23
+ "9": "LABEL_9",
24
+ "10": "LABEL_10",
25
+ "11": "LABEL_11",
26
+ "12": "LABEL_12",
27
+ "13": "LABEL_13",
28
+ "14": "LABEL_14",
29
+ "15": "LABEL_15",
30
+ "16": "LABEL_16",
31
+ "17": "LABEL_17",
32
+ "18": "LABEL_18",
33
+ "19": "LABEL_19",
34
+ "20": "LABEL_20",
35
+ "21": "LABEL_21",
36
+ "22": "LABEL_22",
37
+ "23": "LABEL_23",
38
+ "24": "LABEL_24",
39
+ "25": "LABEL_25",
40
+ "26": "LABEL_26",
41
+ "27": "LABEL_27",
42
+ "28": "LABEL_28",
43
+ "29": "LABEL_29",
44
+ "30": "LABEL_30",
45
+ "31": "LABEL_31",
46
+ "32": "LABEL_32",
47
+ "33": "LABEL_33",
48
+ "34": "LABEL_34",
49
+ "35": "LABEL_35",
50
+ "36": "LABEL_36",
51
+ "37": "LABEL_37",
52
+ "38": "LABEL_38",
53
+ "39": "LABEL_39",
54
+ "40": "LABEL_40",
55
+ "41": "LABEL_41",
56
+ "42": "LABEL_42",
57
+ "43": "LABEL_43",
58
+ "44": "LABEL_44",
59
+ "45": "LABEL_45",
60
+ "46": "LABEL_46",
61
+ "47": "LABEL_47",
62
+ "48": "LABEL_48",
63
+ "49": "LABEL_49",
64
+ "50": "LABEL_50",
65
+ "51": "LABEL_51",
66
+ "52": "LABEL_52",
67
+ "53": "LABEL_53"
68
+ },
69
+ "initializer_range": 0.02,
70
+ "intermediate_size": 2560,
71
+ "is_folding_model": false,
72
+ "label2id": {
73
+ "LABEL_0": 0,
74
+ "LABEL_1": 1,
75
+ "LABEL_10": 10,
76
+ "LABEL_11": 11,
77
+ "LABEL_12": 12,
78
+ "LABEL_13": 13,
79
+ "LABEL_14": 14,
80
+ "LABEL_15": 15,
81
+ "LABEL_16": 16,
82
+ "LABEL_17": 17,
83
+ "LABEL_18": 18,
84
+ "LABEL_19": 19,
85
+ "LABEL_2": 2,
86
+ "LABEL_20": 20,
87
+ "LABEL_21": 21,
88
+ "LABEL_22": 22,
89
+ "LABEL_23": 23,
90
+ "LABEL_24": 24,
91
+ "LABEL_25": 25,
92
+ "LABEL_26": 26,
93
+ "LABEL_27": 27,
94
+ "LABEL_28": 28,
95
+ "LABEL_29": 29,
96
+ "LABEL_3": 3,
97
+ "LABEL_30": 30,
98
+ "LABEL_31": 31,
99
+ "LABEL_32": 32,
100
+ "LABEL_33": 33,
101
+ "LABEL_34": 34,
102
+ "LABEL_35": 35,
103
+ "LABEL_36": 36,
104
+ "LABEL_37": 37,
105
+ "LABEL_38": 38,
106
+ "LABEL_39": 39,
107
+ "LABEL_4": 4,
108
+ "LABEL_40": 40,
109
+ "LABEL_41": 41,
110
+ "LABEL_42": 42,
111
+ "LABEL_43": 43,
112
+ "LABEL_44": 44,
113
+ "LABEL_45": 45,
114
+ "LABEL_46": 46,
115
+ "LABEL_47": 47,
116
+ "LABEL_48": 48,
117
+ "LABEL_49": 49,
118
+ "LABEL_5": 5,
119
+ "LABEL_50": 50,
120
+ "LABEL_51": 51,
121
+ "LABEL_52": 52,
122
+ "LABEL_53": 53,
123
+ "LABEL_6": 6,
124
+ "LABEL_7": 7,
125
+ "LABEL_8": 8,
126
+ "LABEL_9": 9
127
+ },
128
+ "layer_norm_eps": 1e-05,
129
+ "mask_token_id": 32,
130
+ "max_position_embeddings": 1026,
131
+ "model_type": "esm",
132
+ "num_attention_heads": 20,
133
+ "num_hidden_layers": 30,
134
+ "pad_token_id": 1,
135
+ "position_embedding_type": "rotary",
136
+ "problem_type": "multi_label_classification",
137
+ "token_dropout": true,
138
+ "torch_dtype": "float32",
139
+ "transformers_version": "4.45.2",
140
+ "use_cache": true,
141
+ "vocab_list": null,
142
+ "vocab_size": 33
143
+ }
finalCheckpoint_25_05_11/model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8500bf15856824253050c35105b187cf7b6b099759093573f52e8d3795a8c43a
3
+ size 593608456
finalCheckpoint_25_05_11/model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0457e64925706ef97ee05e0a2a17f1974ac4c02ab4539e4c6853af37ed02ba73
3
+ size 4842128200
finalCheckpoint_25_05_11/model.safetensors.index.json ADDED
@@ -0,0 +1,528 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 5435676264
4
+ },
5
+ "weight_map": {
6
+ "classifier.dense1.bias": "model-00002-of-00002.safetensors",
7
+ "classifier.dense1.weight": "model-00002-of-00002.safetensors",
8
+ "classifier.layer_norm.bias": "model-00002-of-00002.safetensors",
9
+ "classifier.layer_norm.weight": "model-00002-of-00002.safetensors",
10
+ "classifier.out_proj.bias": "model-00002-of-00002.safetensors",
11
+ "classifier.out_proj.weight": "model-00002-of-00002.safetensors",
12
+ "esm.embeddings.position_embeddings.weight": "model-00001-of-00002.safetensors",
13
+ "esm.embeddings.position_ids": "model-00001-of-00002.safetensors",
14
+ "esm.embeddings.word_embeddings.weight": "model-00001-of-00002.safetensors",
15
+ "esm.encoder.emb_layer_norm_after.bias": "model-00001-of-00002.safetensors",
16
+ "esm.encoder.emb_layer_norm_after.weight": "model-00001-of-00002.safetensors",
17
+ "esm.encoder.layer.0.LayerNorm.bias": "model-00001-of-00002.safetensors",
18
+ "esm.encoder.layer.0.LayerNorm.weight": "model-00001-of-00002.safetensors",
19
+ "esm.encoder.layer.0.attention.LayerNorm.bias": "model-00001-of-00002.safetensors",
20
+ "esm.encoder.layer.0.attention.LayerNorm.weight": "model-00001-of-00002.safetensors",
21
+ "esm.encoder.layer.0.attention.output.dense.bias": "model-00001-of-00002.safetensors",
22
+ "esm.encoder.layer.0.attention.output.dense.weight": "model-00001-of-00002.safetensors",
23
+ "esm.encoder.layer.0.attention.self.key.bias": "model-00001-of-00002.safetensors",
24
+ "esm.encoder.layer.0.attention.self.key.weight": "model-00001-of-00002.safetensors",
25
+ "esm.encoder.layer.0.attention.self.query.bias": "model-00001-of-00002.safetensors",
26
+ "esm.encoder.layer.0.attention.self.query.weight": "model-00001-of-00002.safetensors",
27
+ "esm.encoder.layer.0.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00002.safetensors",
28
+ "esm.encoder.layer.0.attention.self.value.bias": "model-00001-of-00002.safetensors",
29
+ "esm.encoder.layer.0.attention.self.value.weight": "model-00001-of-00002.safetensors",
30
+ "esm.encoder.layer.0.intermediate.dense.bias": "model-00001-of-00002.safetensors",
31
+ "esm.encoder.layer.0.intermediate.dense.weight": "model-00001-of-00002.safetensors",
32
+ "esm.encoder.layer.0.output.dense.bias": "model-00001-of-00002.safetensors",
33
+ "esm.encoder.layer.0.output.dense.weight": "model-00001-of-00002.safetensors",
34
+ "esm.encoder.layer.1.LayerNorm.bias": "model-00001-of-00002.safetensors",
35
+ "esm.encoder.layer.1.LayerNorm.weight": "model-00001-of-00002.safetensors",
36
+ "esm.encoder.layer.1.attention.LayerNorm.bias": "model-00001-of-00002.safetensors",
37
+ "esm.encoder.layer.1.attention.LayerNorm.weight": "model-00001-of-00002.safetensors",
38
+ "esm.encoder.layer.1.attention.output.dense.bias": "model-00001-of-00002.safetensors",
39
+ "esm.encoder.layer.1.attention.output.dense.weight": "model-00001-of-00002.safetensors",
40
+ "esm.encoder.layer.1.attention.self.key.bias": "model-00001-of-00002.safetensors",
41
+ "esm.encoder.layer.1.attention.self.key.weight": "model-00001-of-00002.safetensors",
42
+ "esm.encoder.layer.1.attention.self.query.bias": "model-00001-of-00002.safetensors",
43
+ "esm.encoder.layer.1.attention.self.query.weight": "model-00001-of-00002.safetensors",
44
+ "esm.encoder.layer.1.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00002.safetensors",
45
+ "esm.encoder.layer.1.attention.self.value.bias": "model-00001-of-00002.safetensors",
46
+ "esm.encoder.layer.1.attention.self.value.weight": "model-00001-of-00002.safetensors",
47
+ "esm.encoder.layer.1.intermediate.dense.bias": "model-00001-of-00002.safetensors",
48
+ "esm.encoder.layer.1.intermediate.dense.weight": "model-00001-of-00002.safetensors",
49
+ "esm.encoder.layer.1.output.dense.bias": "model-00001-of-00002.safetensors",
50
+ "esm.encoder.layer.1.output.dense.weight": "model-00001-of-00002.safetensors",
51
+ "esm.encoder.layer.10.LayerNorm.bias": "model-00001-of-00002.safetensors",
52
+ "esm.encoder.layer.10.LayerNorm.weight": "model-00001-of-00002.safetensors",
53
+ "esm.encoder.layer.10.attention.LayerNorm.bias": "model-00001-of-00002.safetensors",
54
+ "esm.encoder.layer.10.attention.LayerNorm.weight": "model-00001-of-00002.safetensors",
55
+ "esm.encoder.layer.10.attention.output.dense.bias": "model-00001-of-00002.safetensors",
56
+ "esm.encoder.layer.10.attention.output.dense.weight": "model-00001-of-00002.safetensors",
57
+ "esm.encoder.layer.10.attention.self.key.bias": "model-00001-of-00002.safetensors",
58
+ "esm.encoder.layer.10.attention.self.key.weight": "model-00001-of-00002.safetensors",
59
+ "esm.encoder.layer.10.attention.self.query.bias": "model-00001-of-00002.safetensors",
60
+ "esm.encoder.layer.10.attention.self.query.weight": "model-00001-of-00002.safetensors",
61
+ "esm.encoder.layer.10.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00002.safetensors",
62
+ "esm.encoder.layer.10.attention.self.value.bias": "model-00001-of-00002.safetensors",
63
+ "esm.encoder.layer.10.attention.self.value.weight": "model-00001-of-00002.safetensors",
64
+ "esm.encoder.layer.10.intermediate.dense.bias": "model-00001-of-00002.safetensors",
65
+ "esm.encoder.layer.10.intermediate.dense.weight": "model-00001-of-00002.safetensors",
66
+ "esm.encoder.layer.10.output.dense.bias": "model-00001-of-00002.safetensors",
67
+ "esm.encoder.layer.10.output.dense.weight": "model-00001-of-00002.safetensors",
68
+ "esm.encoder.layer.11.LayerNorm.bias": "model-00001-of-00002.safetensors",
69
+ "esm.encoder.layer.11.LayerNorm.weight": "model-00001-of-00002.safetensors",
70
+ "esm.encoder.layer.11.attention.LayerNorm.bias": "model-00001-of-00002.safetensors",
71
+ "esm.encoder.layer.11.attention.LayerNorm.weight": "model-00001-of-00002.safetensors",
72
+ "esm.encoder.layer.11.attention.output.dense.bias": "model-00001-of-00002.safetensors",
73
+ "esm.encoder.layer.11.attention.output.dense.weight": "model-00001-of-00002.safetensors",
74
+ "esm.encoder.layer.11.attention.self.key.bias": "model-00001-of-00002.safetensors",
75
+ "esm.encoder.layer.11.attention.self.key.weight": "model-00001-of-00002.safetensors",
76
+ "esm.encoder.layer.11.attention.self.query.bias": "model-00001-of-00002.safetensors",
77
+ "esm.encoder.layer.11.attention.self.query.weight": "model-00001-of-00002.safetensors",
78
+ "esm.encoder.layer.11.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00002.safetensors",
79
+ "esm.encoder.layer.11.attention.self.value.bias": "model-00001-of-00002.safetensors",
80
+ "esm.encoder.layer.11.attention.self.value.weight": "model-00001-of-00002.safetensors",
81
+ "esm.encoder.layer.11.intermediate.dense.bias": "model-00001-of-00002.safetensors",
82
+ "esm.encoder.layer.11.intermediate.dense.weight": "model-00001-of-00002.safetensors",
83
+ "esm.encoder.layer.11.output.dense.bias": "model-00001-of-00002.safetensors",
84
+ "esm.encoder.layer.11.output.dense.weight": "model-00001-of-00002.safetensors",
85
+ "esm.encoder.layer.12.LayerNorm.bias": "model-00001-of-00002.safetensors",
86
+ "esm.encoder.layer.12.LayerNorm.weight": "model-00001-of-00002.safetensors",
87
+ "esm.encoder.layer.12.attention.LayerNorm.bias": "model-00001-of-00002.safetensors",
88
+ "esm.encoder.layer.12.attention.LayerNorm.weight": "model-00001-of-00002.safetensors",
89
+ "esm.encoder.layer.12.attention.output.dense.bias": "model-00001-of-00002.safetensors",
90
+ "esm.encoder.layer.12.attention.output.dense.weight": "model-00001-of-00002.safetensors",
91
+ "esm.encoder.layer.12.attention.self.key.bias": "model-00001-of-00002.safetensors",
92
+ "esm.encoder.layer.12.attention.self.key.weight": "model-00001-of-00002.safetensors",
93
+ "esm.encoder.layer.12.attention.self.query.bias": "model-00001-of-00002.safetensors",
94
+ "esm.encoder.layer.12.attention.self.query.weight": "model-00001-of-00002.safetensors",
95
+ "esm.encoder.layer.12.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00002.safetensors",
96
+ "esm.encoder.layer.12.attention.self.value.bias": "model-00001-of-00002.safetensors",
97
+ "esm.encoder.layer.12.attention.self.value.weight": "model-00001-of-00002.safetensors",
98
+ "esm.encoder.layer.12.intermediate.dense.bias": "model-00001-of-00002.safetensors",
99
+ "esm.encoder.layer.12.intermediate.dense.weight": "model-00001-of-00002.safetensors",
100
+ "esm.encoder.layer.12.output.dense.bias": "model-00001-of-00002.safetensors",
101
+ "esm.encoder.layer.12.output.dense.weight": "model-00001-of-00002.safetensors",
102
+ "esm.encoder.layer.13.LayerNorm.bias": "model-00001-of-00002.safetensors",
103
+ "esm.encoder.layer.13.LayerNorm.weight": "model-00001-of-00002.safetensors",
104
+ "esm.encoder.layer.13.attention.LayerNorm.bias": "model-00001-of-00002.safetensors",
105
+ "esm.encoder.layer.13.attention.LayerNorm.weight": "model-00001-of-00002.safetensors",
106
+ "esm.encoder.layer.13.attention.output.dense.bias": "model-00001-of-00002.safetensors",
107
+ "esm.encoder.layer.13.attention.output.dense.weight": "model-00001-of-00002.safetensors",
108
+ "esm.encoder.layer.13.attention.self.key.bias": "model-00001-of-00002.safetensors",
109
+ "esm.encoder.layer.13.attention.self.key.weight": "model-00001-of-00002.safetensors",
110
+ "esm.encoder.layer.13.attention.self.query.bias": "model-00001-of-00002.safetensors",
111
+ "esm.encoder.layer.13.attention.self.query.weight": "model-00001-of-00002.safetensors",
112
+ "esm.encoder.layer.13.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00002.safetensors",
113
+ "esm.encoder.layer.13.attention.self.value.bias": "model-00001-of-00002.safetensors",
114
+ "esm.encoder.layer.13.attention.self.value.weight": "model-00001-of-00002.safetensors",
115
+ "esm.encoder.layer.13.intermediate.dense.bias": "model-00001-of-00002.safetensors",
116
+ "esm.encoder.layer.13.intermediate.dense.weight": "model-00001-of-00002.safetensors",
117
+ "esm.encoder.layer.13.output.dense.bias": "model-00001-of-00002.safetensors",
118
+ "esm.encoder.layer.13.output.dense.weight": "model-00001-of-00002.safetensors",
119
+ "esm.encoder.layer.14.LayerNorm.bias": "model-00001-of-00002.safetensors",
120
+ "esm.encoder.layer.14.LayerNorm.weight": "model-00001-of-00002.safetensors",
121
+ "esm.encoder.layer.14.attention.LayerNorm.bias": "model-00001-of-00002.safetensors",
122
+ "esm.encoder.layer.14.attention.LayerNorm.weight": "model-00001-of-00002.safetensors",
123
+ "esm.encoder.layer.14.attention.output.dense.bias": "model-00001-of-00002.safetensors",
124
+ "esm.encoder.layer.14.attention.output.dense.weight": "model-00001-of-00002.safetensors",
125
+ "esm.encoder.layer.14.attention.self.key.bias": "model-00001-of-00002.safetensors",
126
+ "esm.encoder.layer.14.attention.self.key.weight": "model-00001-of-00002.safetensors",
127
+ "esm.encoder.layer.14.attention.self.query.bias": "model-00001-of-00002.safetensors",
128
+ "esm.encoder.layer.14.attention.self.query.weight": "model-00001-of-00002.safetensors",
129
+ "esm.encoder.layer.14.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00002.safetensors",
130
+ "esm.encoder.layer.14.attention.self.value.bias": "model-00001-of-00002.safetensors",
131
+ "esm.encoder.layer.14.attention.self.value.weight": "model-00001-of-00002.safetensors",
132
+ "esm.encoder.layer.14.intermediate.dense.bias": "model-00001-of-00002.safetensors",
133
+ "esm.encoder.layer.14.intermediate.dense.weight": "model-00001-of-00002.safetensors",
134
+ "esm.encoder.layer.14.output.dense.bias": "model-00001-of-00002.safetensors",
135
+ "esm.encoder.layer.14.output.dense.weight": "model-00001-of-00002.safetensors",
136
+ "esm.encoder.layer.15.LayerNorm.bias": "model-00001-of-00002.safetensors",
137
+ "esm.encoder.layer.15.LayerNorm.weight": "model-00001-of-00002.safetensors",
138
+ "esm.encoder.layer.15.attention.LayerNorm.bias": "model-00001-of-00002.safetensors",
139
+ "esm.encoder.layer.15.attention.LayerNorm.weight": "model-00001-of-00002.safetensors",
140
+ "esm.encoder.layer.15.attention.output.dense.bias": "model-00001-of-00002.safetensors",
141
+ "esm.encoder.layer.15.attention.output.dense.weight": "model-00001-of-00002.safetensors",
142
+ "esm.encoder.layer.15.attention.self.key.bias": "model-00001-of-00002.safetensors",
143
+ "esm.encoder.layer.15.attention.self.key.weight": "model-00001-of-00002.safetensors",
144
+ "esm.encoder.layer.15.attention.self.query.bias": "model-00001-of-00002.safetensors",
145
+ "esm.encoder.layer.15.attention.self.query.weight": "model-00001-of-00002.safetensors",
146
+ "esm.encoder.layer.15.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00002.safetensors",
147
+ "esm.encoder.layer.15.attention.self.value.bias": "model-00001-of-00002.safetensors",
148
+ "esm.encoder.layer.15.attention.self.value.weight": "model-00001-of-00002.safetensors",
149
+ "esm.encoder.layer.15.intermediate.dense.bias": "model-00001-of-00002.safetensors",
150
+ "esm.encoder.layer.15.intermediate.dense.weight": "model-00001-of-00002.safetensors",
151
+ "esm.encoder.layer.15.output.dense.bias": "model-00001-of-00002.safetensors",
152
+ "esm.encoder.layer.15.output.dense.weight": "model-00001-of-00002.safetensors",
153
+ "esm.encoder.layer.16.LayerNorm.bias": "model-00001-of-00002.safetensors",
154
+ "esm.encoder.layer.16.LayerNorm.weight": "model-00001-of-00002.safetensors",
155
+ "esm.encoder.layer.16.attention.LayerNorm.bias": "model-00001-of-00002.safetensors",
156
+ "esm.encoder.layer.16.attention.LayerNorm.weight": "model-00001-of-00002.safetensors",
157
+ "esm.encoder.layer.16.attention.output.dense.bias": "model-00001-of-00002.safetensors",
158
+ "esm.encoder.layer.16.attention.output.dense.weight": "model-00001-of-00002.safetensors",
159
+ "esm.encoder.layer.16.attention.self.key.bias": "model-00001-of-00002.safetensors",
160
+ "esm.encoder.layer.16.attention.self.key.weight": "model-00001-of-00002.safetensors",
161
+ "esm.encoder.layer.16.attention.self.query.bias": "model-00001-of-00002.safetensors",
162
+ "esm.encoder.layer.16.attention.self.query.weight": "model-00001-of-00002.safetensors",
163
+ "esm.encoder.layer.16.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00002.safetensors",
164
+ "esm.encoder.layer.16.attention.self.value.bias": "model-00001-of-00002.safetensors",
165
+ "esm.encoder.layer.16.attention.self.value.weight": "model-00001-of-00002.safetensors",
166
+ "esm.encoder.layer.16.intermediate.dense.bias": "model-00001-of-00002.safetensors",
167
+ "esm.encoder.layer.16.intermediate.dense.weight": "model-00001-of-00002.safetensors",
168
+ "esm.encoder.layer.16.output.dense.bias": "model-00001-of-00002.safetensors",
169
+ "esm.encoder.layer.16.output.dense.weight": "model-00001-of-00002.safetensors",
170
+ "esm.encoder.layer.17.LayerNorm.bias": "model-00001-of-00002.safetensors",
171
+ "esm.encoder.layer.17.LayerNorm.weight": "model-00001-of-00002.safetensors",
172
+ "esm.encoder.layer.17.attention.LayerNorm.bias": "model-00001-of-00002.safetensors",
173
+ "esm.encoder.layer.17.attention.LayerNorm.weight": "model-00001-of-00002.safetensors",
174
+ "esm.encoder.layer.17.attention.output.dense.bias": "model-00001-of-00002.safetensors",
175
+ "esm.encoder.layer.17.attention.output.dense.weight": "model-00001-of-00002.safetensors",
176
+ "esm.encoder.layer.17.attention.self.key.bias": "model-00001-of-00002.safetensors",
177
+ "esm.encoder.layer.17.attention.self.key.weight": "model-00001-of-00002.safetensors",
178
+ "esm.encoder.layer.17.attention.self.query.bias": "model-00001-of-00002.safetensors",
179
+ "esm.encoder.layer.17.attention.self.query.weight": "model-00001-of-00002.safetensors",
180
+ "esm.encoder.layer.17.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00002.safetensors",
181
+ "esm.encoder.layer.17.attention.self.value.bias": "model-00001-of-00002.safetensors",
182
+ "esm.encoder.layer.17.attention.self.value.weight": "model-00001-of-00002.safetensors",
183
+ "esm.encoder.layer.17.intermediate.dense.bias": "model-00001-of-00002.safetensors",
184
+ "esm.encoder.layer.17.intermediate.dense.weight": "model-00001-of-00002.safetensors",
185
+ "esm.encoder.layer.17.output.dense.bias": "model-00001-of-00002.safetensors",
186
+ "esm.encoder.layer.17.output.dense.weight": "model-00001-of-00002.safetensors",
187
+ "esm.encoder.layer.18.LayerNorm.bias": "model-00001-of-00002.safetensors",
188
+ "esm.encoder.layer.18.LayerNorm.weight": "model-00001-of-00002.safetensors",
189
+ "esm.encoder.layer.18.attention.LayerNorm.bias": "model-00001-of-00002.safetensors",
190
+ "esm.encoder.layer.18.attention.LayerNorm.weight": "model-00001-of-00002.safetensors",
191
+ "esm.encoder.layer.18.attention.output.dense.bias": "model-00001-of-00002.safetensors",
192
+ "esm.encoder.layer.18.attention.output.dense.weight": "model-00001-of-00002.safetensors",
193
+ "esm.encoder.layer.18.attention.self.key.bias": "model-00001-of-00002.safetensors",
194
+ "esm.encoder.layer.18.attention.self.key.weight": "model-00001-of-00002.safetensors",
195
+ "esm.encoder.layer.18.attention.self.query.bias": "model-00001-of-00002.safetensors",
196
+ "esm.encoder.layer.18.attention.self.query.weight": "model-00001-of-00002.safetensors",
197
+ "esm.encoder.layer.18.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00002.safetensors",
198
+ "esm.encoder.layer.18.attention.self.value.bias": "model-00001-of-00002.safetensors",
199
+ "esm.encoder.layer.18.attention.self.value.weight": "model-00001-of-00002.safetensors",
200
+ "esm.encoder.layer.18.intermediate.dense.bias": "model-00001-of-00002.safetensors",
201
+ "esm.encoder.layer.18.intermediate.dense.weight": "model-00001-of-00002.safetensors",
202
+ "esm.encoder.layer.18.output.dense.bias": "model-00001-of-00002.safetensors",
203
+ "esm.encoder.layer.18.output.dense.weight": "model-00001-of-00002.safetensors",
204
+ "esm.encoder.layer.19.LayerNorm.bias": "model-00001-of-00002.safetensors",
205
+ "esm.encoder.layer.19.LayerNorm.weight": "model-00001-of-00002.safetensors",
206
+ "esm.encoder.layer.19.attention.LayerNorm.bias": "model-00001-of-00002.safetensors",
207
+ "esm.encoder.layer.19.attention.LayerNorm.weight": "model-00001-of-00002.safetensors",
208
+ "esm.encoder.layer.19.attention.output.dense.bias": "model-00001-of-00002.safetensors",
209
+ "esm.encoder.layer.19.attention.output.dense.weight": "model-00001-of-00002.safetensors",
210
+ "esm.encoder.layer.19.attention.self.key.bias": "model-00001-of-00002.safetensors",
211
+ "esm.encoder.layer.19.attention.self.key.weight": "model-00001-of-00002.safetensors",
212
+ "esm.encoder.layer.19.attention.self.query.bias": "model-00001-of-00002.safetensors",
213
+ "esm.encoder.layer.19.attention.self.query.weight": "model-00001-of-00002.safetensors",
214
+ "esm.encoder.layer.19.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00002.safetensors",
215
+ "esm.encoder.layer.19.attention.self.value.bias": "model-00001-of-00002.safetensors",
216
+ "esm.encoder.layer.19.attention.self.value.weight": "model-00001-of-00002.safetensors",
217
+ "esm.encoder.layer.19.intermediate.dense.bias": "model-00001-of-00002.safetensors",
218
+ "esm.encoder.layer.19.intermediate.dense.weight": "model-00001-of-00002.safetensors",
219
+ "esm.encoder.layer.19.output.dense.bias": "model-00001-of-00002.safetensors",
220
+ "esm.encoder.layer.19.output.dense.weight": "model-00001-of-00002.safetensors",
221
+ "esm.encoder.layer.2.LayerNorm.bias": "model-00001-of-00002.safetensors",
222
+ "esm.encoder.layer.2.LayerNorm.weight": "model-00001-of-00002.safetensors",
223
+ "esm.encoder.layer.2.attention.LayerNorm.bias": "model-00001-of-00002.safetensors",
224
+ "esm.encoder.layer.2.attention.LayerNorm.weight": "model-00001-of-00002.safetensors",
225
+ "esm.encoder.layer.2.attention.output.dense.bias": "model-00001-of-00002.safetensors",
226
+ "esm.encoder.layer.2.attention.output.dense.weight": "model-00001-of-00002.safetensors",
227
+ "esm.encoder.layer.2.attention.self.key.bias": "model-00001-of-00002.safetensors",
228
+ "esm.encoder.layer.2.attention.self.key.weight": "model-00001-of-00002.safetensors",
229
+ "esm.encoder.layer.2.attention.self.query.bias": "model-00001-of-00002.safetensors",
230
+ "esm.encoder.layer.2.attention.self.query.weight": "model-00001-of-00002.safetensors",
231
+ "esm.encoder.layer.2.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00002.safetensors",
232
+ "esm.encoder.layer.2.attention.self.value.bias": "model-00001-of-00002.safetensors",
233
+ "esm.encoder.layer.2.attention.self.value.weight": "model-00001-of-00002.safetensors",
234
+ "esm.encoder.layer.2.intermediate.dense.bias": "model-00001-of-00002.safetensors",
235
+ "esm.encoder.layer.2.intermediate.dense.weight": "model-00001-of-00002.safetensors",
236
+ "esm.encoder.layer.2.output.dense.bias": "model-00001-of-00002.safetensors",
237
+ "esm.encoder.layer.2.output.dense.weight": "model-00001-of-00002.safetensors",
238
+ "esm.encoder.layer.20.LayerNorm.bias": "model-00001-of-00002.safetensors",
239
+ "esm.encoder.layer.20.LayerNorm.weight": "model-00001-of-00002.safetensors",
240
+ "esm.encoder.layer.20.attention.LayerNorm.bias": "model-00001-of-00002.safetensors",
241
+ "esm.encoder.layer.20.attention.LayerNorm.weight": "model-00001-of-00002.safetensors",
242
+ "esm.encoder.layer.20.attention.output.dense.bias": "model-00001-of-00002.safetensors",
243
+ "esm.encoder.layer.20.attention.output.dense.weight": "model-00001-of-00002.safetensors",
244
+ "esm.encoder.layer.20.attention.self.key.bias": "model-00001-of-00002.safetensors",
245
+ "esm.encoder.layer.20.attention.self.key.weight": "model-00001-of-00002.safetensors",
246
+ "esm.encoder.layer.20.attention.self.query.bias": "model-00001-of-00002.safetensors",
247
+ "esm.encoder.layer.20.attention.self.query.weight": "model-00001-of-00002.safetensors",
248
+ "esm.encoder.layer.20.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00002.safetensors",
249
+ "esm.encoder.layer.20.attention.self.value.bias": "model-00001-of-00002.safetensors",
250
+ "esm.encoder.layer.20.attention.self.value.weight": "model-00001-of-00002.safetensors",
251
+ "esm.encoder.layer.20.intermediate.dense.bias": "model-00001-of-00002.safetensors",
252
+ "esm.encoder.layer.20.intermediate.dense.weight": "model-00001-of-00002.safetensors",
253
+ "esm.encoder.layer.20.output.dense.bias": "model-00001-of-00002.safetensors",
254
+ "esm.encoder.layer.20.output.dense.weight": "model-00001-of-00002.safetensors",
255
+ "esm.encoder.layer.21.LayerNorm.bias": "model-00001-of-00002.safetensors",
256
+ "esm.encoder.layer.21.LayerNorm.weight": "model-00001-of-00002.safetensors",
257
+ "esm.encoder.layer.21.attention.LayerNorm.bias": "model-00001-of-00002.safetensors",
258
+ "esm.encoder.layer.21.attention.LayerNorm.weight": "model-00001-of-00002.safetensors",
259
+ "esm.encoder.layer.21.attention.output.dense.bias": "model-00001-of-00002.safetensors",
260
+ "esm.encoder.layer.21.attention.output.dense.weight": "model-00001-of-00002.safetensors",
261
+ "esm.encoder.layer.21.attention.self.key.bias": "model-00001-of-00002.safetensors",
262
+ "esm.encoder.layer.21.attention.self.key.weight": "model-00001-of-00002.safetensors",
263
+ "esm.encoder.layer.21.attention.self.query.bias": "model-00001-of-00002.safetensors",
264
+ "esm.encoder.layer.21.attention.self.query.weight": "model-00001-of-00002.safetensors",
265
+ "esm.encoder.layer.21.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00002.safetensors",
266
+ "esm.encoder.layer.21.attention.self.value.bias": "model-00001-of-00002.safetensors",
267
+ "esm.encoder.layer.21.attention.self.value.weight": "model-00001-of-00002.safetensors",
268
+ "esm.encoder.layer.21.intermediate.dense.bias": "model-00001-of-00002.safetensors",
269
+ "esm.encoder.layer.21.intermediate.dense.weight": "model-00001-of-00002.safetensors",
270
+ "esm.encoder.layer.21.output.dense.bias": "model-00001-of-00002.safetensors",
271
+ "esm.encoder.layer.21.output.dense.weight": "model-00001-of-00002.safetensors",
272
+ "esm.encoder.layer.22.LayerNorm.bias": "model-00001-of-00002.safetensors",
273
+ "esm.encoder.layer.22.LayerNorm.weight": "model-00001-of-00002.safetensors",
274
+ "esm.encoder.layer.22.attention.LayerNorm.bias": "model-00001-of-00002.safetensors",
275
+ "esm.encoder.layer.22.attention.LayerNorm.weight": "model-00001-of-00002.safetensors",
276
+ "esm.encoder.layer.22.attention.output.dense.bias": "model-00001-of-00002.safetensors",
277
+ "esm.encoder.layer.22.attention.output.dense.weight": "model-00001-of-00002.safetensors",
278
+ "esm.encoder.layer.22.attention.self.key.bias": "model-00001-of-00002.safetensors",
279
+ "esm.encoder.layer.22.attention.self.key.weight": "model-00001-of-00002.safetensors",
280
+ "esm.encoder.layer.22.attention.self.query.bias": "model-00001-of-00002.safetensors",
281
+ "esm.encoder.layer.22.attention.self.query.weight": "model-00001-of-00002.safetensors",
282
+ "esm.encoder.layer.22.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00002.safetensors",
283
+ "esm.encoder.layer.22.attention.self.value.bias": "model-00001-of-00002.safetensors",
284
+ "esm.encoder.layer.22.attention.self.value.weight": "model-00001-of-00002.safetensors",
285
+ "esm.encoder.layer.22.intermediate.dense.bias": "model-00001-of-00002.safetensors",
286
+ "esm.encoder.layer.22.intermediate.dense.weight": "model-00001-of-00002.safetensors",
287
+ "esm.encoder.layer.22.output.dense.bias": "model-00001-of-00002.safetensors",
288
+ "esm.encoder.layer.22.output.dense.weight": "model-00001-of-00002.safetensors",
289
+ "esm.encoder.layer.23.LayerNorm.bias": "model-00001-of-00002.safetensors",
290
+ "esm.encoder.layer.23.LayerNorm.weight": "model-00001-of-00002.safetensors",
291
+ "esm.encoder.layer.23.attention.LayerNorm.bias": "model-00001-of-00002.safetensors",
292
+ "esm.encoder.layer.23.attention.LayerNorm.weight": "model-00001-of-00002.safetensors",
293
+ "esm.encoder.layer.23.attention.output.dense.bias": "model-00001-of-00002.safetensors",
294
+ "esm.encoder.layer.23.attention.output.dense.weight": "model-00001-of-00002.safetensors",
295
+ "esm.encoder.layer.23.attention.self.key.bias": "model-00001-of-00002.safetensors",
296
+ "esm.encoder.layer.23.attention.self.key.weight": "model-00001-of-00002.safetensors",
297
+ "esm.encoder.layer.23.attention.self.query.bias": "model-00001-of-00002.safetensors",
298
+ "esm.encoder.layer.23.attention.self.query.weight": "model-00001-of-00002.safetensors",
299
+ "esm.encoder.layer.23.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00002.safetensors",
300
+ "esm.encoder.layer.23.attention.self.value.bias": "model-00001-of-00002.safetensors",
301
+ "esm.encoder.layer.23.attention.self.value.weight": "model-00001-of-00002.safetensors",
302
+ "esm.encoder.layer.23.intermediate.dense.bias": "model-00001-of-00002.safetensors",
303
+ "esm.encoder.layer.23.intermediate.dense.weight": "model-00001-of-00002.safetensors",
304
+ "esm.encoder.layer.23.output.dense.bias": "model-00001-of-00002.safetensors",
305
+ "esm.encoder.layer.23.output.dense.weight": "model-00001-of-00002.safetensors",
306
+ "esm.encoder.layer.24.LayerNorm.bias": "model-00001-of-00002.safetensors",
307
+ "esm.encoder.layer.24.LayerNorm.weight": "model-00001-of-00002.safetensors",
308
+ "esm.encoder.layer.24.attention.LayerNorm.bias": "model-00001-of-00002.safetensors",
309
+ "esm.encoder.layer.24.attention.LayerNorm.weight": "model-00001-of-00002.safetensors",
310
+ "esm.encoder.layer.24.attention.output.dense.bias": "model-00001-of-00002.safetensors",
311
+ "esm.encoder.layer.24.attention.output.dense.weight": "model-00001-of-00002.safetensors",
312
+ "esm.encoder.layer.24.attention.self.key.bias": "model-00001-of-00002.safetensors",
313
+ "esm.encoder.layer.24.attention.self.key.weight": "model-00001-of-00002.safetensors",
314
+ "esm.encoder.layer.24.attention.self.query.bias": "model-00001-of-00002.safetensors",
315
+ "esm.encoder.layer.24.attention.self.query.weight": "model-00001-of-00002.safetensors",
316
+ "esm.encoder.layer.24.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00002.safetensors",
317
+ "esm.encoder.layer.24.attention.self.value.bias": "model-00001-of-00002.safetensors",
318
+ "esm.encoder.layer.24.attention.self.value.weight": "model-00001-of-00002.safetensors",
319
+ "esm.encoder.layer.24.intermediate.dense.bias": "model-00001-of-00002.safetensors",
320
+ "esm.encoder.layer.24.intermediate.dense.weight": "model-00001-of-00002.safetensors",
321
+ "esm.encoder.layer.24.output.dense.bias": "model-00001-of-00002.safetensors",
322
+ "esm.encoder.layer.24.output.dense.weight": "model-00001-of-00002.safetensors",
323
+ "esm.encoder.layer.25.LayerNorm.bias": "model-00001-of-00002.safetensors",
324
+ "esm.encoder.layer.25.LayerNorm.weight": "model-00001-of-00002.safetensors",
325
+ "esm.encoder.layer.25.attention.LayerNorm.bias": "model-00001-of-00002.safetensors",
326
+ "esm.encoder.layer.25.attention.LayerNorm.weight": "model-00001-of-00002.safetensors",
327
+ "esm.encoder.layer.25.attention.output.dense.bias": "model-00001-of-00002.safetensors",
328
+ "esm.encoder.layer.25.attention.output.dense.weight": "model-00001-of-00002.safetensors",
329
+ "esm.encoder.layer.25.attention.self.key.bias": "model-00001-of-00002.safetensors",
330
+ "esm.encoder.layer.25.attention.self.key.weight": "model-00001-of-00002.safetensors",
331
+ "esm.encoder.layer.25.attention.self.query.bias": "model-00001-of-00002.safetensors",
332
+ "esm.encoder.layer.25.attention.self.query.weight": "model-00001-of-00002.safetensors",
333
+ "esm.encoder.layer.25.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00002.safetensors",
334
+ "esm.encoder.layer.25.attention.self.value.bias": "model-00001-of-00002.safetensors",
335
+ "esm.encoder.layer.25.attention.self.value.weight": "model-00001-of-00002.safetensors",
336
+ "esm.encoder.layer.25.intermediate.dense.bias": "model-00001-of-00002.safetensors",
337
+ "esm.encoder.layer.25.intermediate.dense.weight": "model-00001-of-00002.safetensors",
338
+ "esm.encoder.layer.25.output.dense.bias": "model-00001-of-00002.safetensors",
339
+ "esm.encoder.layer.25.output.dense.weight": "model-00001-of-00002.safetensors",
340
+ "esm.encoder.layer.26.LayerNorm.bias": "model-00001-of-00002.safetensors",
341
+ "esm.encoder.layer.26.LayerNorm.weight": "model-00001-of-00002.safetensors",
342
+ "esm.encoder.layer.26.attention.LayerNorm.bias": "model-00001-of-00002.safetensors",
343
+ "esm.encoder.layer.26.attention.LayerNorm.weight": "model-00001-of-00002.safetensors",
344
+ "esm.encoder.layer.26.attention.output.dense.bias": "model-00001-of-00002.safetensors",
345
+ "esm.encoder.layer.26.attention.output.dense.weight": "model-00001-of-00002.safetensors",
346
+ "esm.encoder.layer.26.attention.self.key.bias": "model-00001-of-00002.safetensors",
347
+ "esm.encoder.layer.26.attention.self.key.weight": "model-00001-of-00002.safetensors",
348
+ "esm.encoder.layer.26.attention.self.query.bias": "model-00001-of-00002.safetensors",
349
+ "esm.encoder.layer.26.attention.self.query.weight": "model-00001-of-00002.safetensors",
350
+ "esm.encoder.layer.26.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00002.safetensors",
351
+ "esm.encoder.layer.26.attention.self.value.bias": "model-00001-of-00002.safetensors",
352
+ "esm.encoder.layer.26.attention.self.value.weight": "model-00001-of-00002.safetensors",
353
+ "esm.encoder.layer.26.intermediate.dense.bias": "model-00001-of-00002.safetensors",
354
+ "esm.encoder.layer.26.intermediate.dense.weight": "model-00001-of-00002.safetensors",
355
+ "esm.encoder.layer.26.output.dense.bias": "model-00001-of-00002.safetensors",
356
+ "esm.encoder.layer.26.output.dense.weight": "model-00001-of-00002.safetensors",
357
+ "esm.encoder.layer.27.LayerNorm.bias": "model-00001-of-00002.safetensors",
358
+ "esm.encoder.layer.27.LayerNorm.weight": "model-00001-of-00002.safetensors",
359
+ "esm.encoder.layer.27.attention.LayerNorm.bias": "model-00001-of-00002.safetensors",
360
+ "esm.encoder.layer.27.attention.LayerNorm.weight": "model-00001-of-00002.safetensors",
361
+ "esm.encoder.layer.27.attention.output.dense.bias": "model-00001-of-00002.safetensors",
362
+ "esm.encoder.layer.27.attention.output.dense.weight": "model-00001-of-00002.safetensors",
363
+ "esm.encoder.layer.27.attention.self.key.bias": "model-00001-of-00002.safetensors",
364
+ "esm.encoder.layer.27.attention.self.key.weight": "model-00001-of-00002.safetensors",
365
+ "esm.encoder.layer.27.attention.self.query.bias": "model-00001-of-00002.safetensors",
366
+ "esm.encoder.layer.27.attention.self.query.weight": "model-00001-of-00002.safetensors",
367
+ "esm.encoder.layer.27.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00002.safetensors",
368
+ "esm.encoder.layer.27.attention.self.value.bias": "model-00001-of-00002.safetensors",
369
+ "esm.encoder.layer.27.attention.self.value.weight": "model-00001-of-00002.safetensors",
370
+ "esm.encoder.layer.27.intermediate.dense.bias": "model-00001-of-00002.safetensors",
371
+ "esm.encoder.layer.27.intermediate.dense.weight": "model-00001-of-00002.safetensors",
372
+ "esm.encoder.layer.27.output.dense.bias": "model-00001-of-00002.safetensors",
373
+ "esm.encoder.layer.27.output.dense.weight": "model-00001-of-00002.safetensors",
374
+ "esm.encoder.layer.28.LayerNorm.bias": "model-00001-of-00002.safetensors",
375
+ "esm.encoder.layer.28.LayerNorm.weight": "model-00001-of-00002.safetensors",
376
+ "esm.encoder.layer.28.attention.LayerNorm.bias": "model-00001-of-00002.safetensors",
377
+ "esm.encoder.layer.28.attention.LayerNorm.weight": "model-00001-of-00002.safetensors",
378
+ "esm.encoder.layer.28.attention.output.dense.bias": "model-00001-of-00002.safetensors",
379
+ "esm.encoder.layer.28.attention.output.dense.weight": "model-00001-of-00002.safetensors",
380
+ "esm.encoder.layer.28.attention.self.key.bias": "model-00001-of-00002.safetensors",
381
+ "esm.encoder.layer.28.attention.self.key.weight": "model-00001-of-00002.safetensors",
382
+ "esm.encoder.layer.28.attention.self.query.bias": "model-00001-of-00002.safetensors",
383
+ "esm.encoder.layer.28.attention.self.query.weight": "model-00001-of-00002.safetensors",
384
+ "esm.encoder.layer.28.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00002.safetensors",
385
+ "esm.encoder.layer.28.attention.self.value.bias": "model-00001-of-00002.safetensors",
386
+ "esm.encoder.layer.28.attention.self.value.weight": "model-00001-of-00002.safetensors",
387
+ "esm.encoder.layer.28.intermediate.dense.bias": "model-00001-of-00002.safetensors",
388
+ "esm.encoder.layer.28.intermediate.dense.weight": "model-00001-of-00002.safetensors",
389
+ "esm.encoder.layer.28.output.dense.bias": "model-00001-of-00002.safetensors",
390
+ "esm.encoder.layer.28.output.dense.weight": "model-00001-of-00002.safetensors",
391
+ "esm.encoder.layer.29.LayerNorm.bias": "model-00001-of-00002.safetensors",
392
+ "esm.encoder.layer.29.LayerNorm.weight": "model-00001-of-00002.safetensors",
393
+ "esm.encoder.layer.29.attention.LayerNorm.bias": "model-00001-of-00002.safetensors",
394
+ "esm.encoder.layer.29.attention.LayerNorm.weight": "model-00001-of-00002.safetensors",
395
+ "esm.encoder.layer.29.attention.output.dense.bias": "model-00001-of-00002.safetensors",
396
+ "esm.encoder.layer.29.attention.output.dense.weight": "model-00001-of-00002.safetensors",
397
+ "esm.encoder.layer.29.attention.self.key.bias": "model-00001-of-00002.safetensors",
398
+ "esm.encoder.layer.29.attention.self.key.weight": "model-00001-of-00002.safetensors",
399
+ "esm.encoder.layer.29.attention.self.query.bias": "model-00001-of-00002.safetensors",
400
+ "esm.encoder.layer.29.attention.self.query.weight": "model-00001-of-00002.safetensors",
401
+ "esm.encoder.layer.29.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00002.safetensors",
402
+ "esm.encoder.layer.29.attention.self.value.bias": "model-00001-of-00002.safetensors",
403
+ "esm.encoder.layer.29.attention.self.value.weight": "model-00001-of-00002.safetensors",
404
+ "esm.encoder.layer.29.intermediate.dense.bias": "model-00001-of-00002.safetensors",
405
+ "esm.encoder.layer.29.intermediate.dense.weight": "model-00001-of-00002.safetensors",
406
+ "esm.encoder.layer.29.output.dense.bias": "model-00001-of-00002.safetensors",
407
+ "esm.encoder.layer.29.output.dense.weight": "model-00001-of-00002.safetensors",
408
+ "esm.encoder.layer.3.LayerNorm.bias": "model-00001-of-00002.safetensors",
409
+ "esm.encoder.layer.3.LayerNorm.weight": "model-00001-of-00002.safetensors",
410
+ "esm.encoder.layer.3.attention.LayerNorm.bias": "model-00001-of-00002.safetensors",
411
+ "esm.encoder.layer.3.attention.LayerNorm.weight": "model-00001-of-00002.safetensors",
412
+ "esm.encoder.layer.3.attention.output.dense.bias": "model-00001-of-00002.safetensors",
413
+ "esm.encoder.layer.3.attention.output.dense.weight": "model-00001-of-00002.safetensors",
414
+ "esm.encoder.layer.3.attention.self.key.bias": "model-00001-of-00002.safetensors",
415
+ "esm.encoder.layer.3.attention.self.key.weight": "model-00001-of-00002.safetensors",
416
+ "esm.encoder.layer.3.attention.self.query.bias": "model-00001-of-00002.safetensors",
417
+ "esm.encoder.layer.3.attention.self.query.weight": "model-00001-of-00002.safetensors",
418
+ "esm.encoder.layer.3.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00002.safetensors",
419
+ "esm.encoder.layer.3.attention.self.value.bias": "model-00001-of-00002.safetensors",
420
+ "esm.encoder.layer.3.attention.self.value.weight": "model-00001-of-00002.safetensors",
421
+ "esm.encoder.layer.3.intermediate.dense.bias": "model-00001-of-00002.safetensors",
422
+ "esm.encoder.layer.3.intermediate.dense.weight": "model-00001-of-00002.safetensors",
423
+ "esm.encoder.layer.3.output.dense.bias": "model-00001-of-00002.safetensors",
424
+ "esm.encoder.layer.3.output.dense.weight": "model-00001-of-00002.safetensors",
425
+ "esm.encoder.layer.4.LayerNorm.bias": "model-00001-of-00002.safetensors",
426
+ "esm.encoder.layer.4.LayerNorm.weight": "model-00001-of-00002.safetensors",
427
+ "esm.encoder.layer.4.attention.LayerNorm.bias": "model-00001-of-00002.safetensors",
428
+ "esm.encoder.layer.4.attention.LayerNorm.weight": "model-00001-of-00002.safetensors",
429
+ "esm.encoder.layer.4.attention.output.dense.bias": "model-00001-of-00002.safetensors",
430
+ "esm.encoder.layer.4.attention.output.dense.weight": "model-00001-of-00002.safetensors",
431
+ "esm.encoder.layer.4.attention.self.key.bias": "model-00001-of-00002.safetensors",
432
+ "esm.encoder.layer.4.attention.self.key.weight": "model-00001-of-00002.safetensors",
433
+ "esm.encoder.layer.4.attention.self.query.bias": "model-00001-of-00002.safetensors",
434
+ "esm.encoder.layer.4.attention.self.query.weight": "model-00001-of-00002.safetensors",
435
+ "esm.encoder.layer.4.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00002.safetensors",
436
+ "esm.encoder.layer.4.attention.self.value.bias": "model-00001-of-00002.safetensors",
437
+ "esm.encoder.layer.4.attention.self.value.weight": "model-00001-of-00002.safetensors",
438
+ "esm.encoder.layer.4.intermediate.dense.bias": "model-00001-of-00002.safetensors",
439
+ "esm.encoder.layer.4.intermediate.dense.weight": "model-00001-of-00002.safetensors",
440
+ "esm.encoder.layer.4.output.dense.bias": "model-00001-of-00002.safetensors",
441
+ "esm.encoder.layer.4.output.dense.weight": "model-00001-of-00002.safetensors",
442
+ "esm.encoder.layer.5.LayerNorm.bias": "model-00001-of-00002.safetensors",
443
+ "esm.encoder.layer.5.LayerNorm.weight": "model-00001-of-00002.safetensors",
444
+ "esm.encoder.layer.5.attention.LayerNorm.bias": "model-00001-of-00002.safetensors",
445
+ "esm.encoder.layer.5.attention.LayerNorm.weight": "model-00001-of-00002.safetensors",
446
+ "esm.encoder.layer.5.attention.output.dense.bias": "model-00001-of-00002.safetensors",
447
+ "esm.encoder.layer.5.attention.output.dense.weight": "model-00001-of-00002.safetensors",
448
+ "esm.encoder.layer.5.attention.self.key.bias": "model-00001-of-00002.safetensors",
449
+ "esm.encoder.layer.5.attention.self.key.weight": "model-00001-of-00002.safetensors",
450
+ "esm.encoder.layer.5.attention.self.query.bias": "model-00001-of-00002.safetensors",
451
+ "esm.encoder.layer.5.attention.self.query.weight": "model-00001-of-00002.safetensors",
452
+ "esm.encoder.layer.5.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00002.safetensors",
453
+ "esm.encoder.layer.5.attention.self.value.bias": "model-00001-of-00002.safetensors",
454
+ "esm.encoder.layer.5.attention.self.value.weight": "model-00001-of-00002.safetensors",
455
+ "esm.encoder.layer.5.intermediate.dense.bias": "model-00001-of-00002.safetensors",
456
+ "esm.encoder.layer.5.intermediate.dense.weight": "model-00001-of-00002.safetensors",
457
+ "esm.encoder.layer.5.output.dense.bias": "model-00001-of-00002.safetensors",
458
+ "esm.encoder.layer.5.output.dense.weight": "model-00001-of-00002.safetensors",
459
+ "esm.encoder.layer.6.LayerNorm.bias": "model-00001-of-00002.safetensors",
460
+ "esm.encoder.layer.6.LayerNorm.weight": "model-00001-of-00002.safetensors",
461
+ "esm.encoder.layer.6.attention.LayerNorm.bias": "model-00001-of-00002.safetensors",
462
+ "esm.encoder.layer.6.attention.LayerNorm.weight": "model-00001-of-00002.safetensors",
463
+ "esm.encoder.layer.6.attention.output.dense.bias": "model-00001-of-00002.safetensors",
464
+ "esm.encoder.layer.6.attention.output.dense.weight": "model-00001-of-00002.safetensors",
465
+ "esm.encoder.layer.6.attention.self.key.bias": "model-00001-of-00002.safetensors",
466
+ "esm.encoder.layer.6.attention.self.key.weight": "model-00001-of-00002.safetensors",
467
+ "esm.encoder.layer.6.attention.self.query.bias": "model-00001-of-00002.safetensors",
468
+ "esm.encoder.layer.6.attention.self.query.weight": "model-00001-of-00002.safetensors",
469
+ "esm.encoder.layer.6.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00002.safetensors",
470
+ "esm.encoder.layer.6.attention.self.value.bias": "model-00001-of-00002.safetensors",
471
+ "esm.encoder.layer.6.attention.self.value.weight": "model-00001-of-00002.safetensors",
472
+ "esm.encoder.layer.6.intermediate.dense.bias": "model-00001-of-00002.safetensors",
473
+ "esm.encoder.layer.6.intermediate.dense.weight": "model-00001-of-00002.safetensors",
474
+ "esm.encoder.layer.6.output.dense.bias": "model-00001-of-00002.safetensors",
475
+ "esm.encoder.layer.6.output.dense.weight": "model-00001-of-00002.safetensors",
476
+ "esm.encoder.layer.7.LayerNorm.bias": "model-00001-of-00002.safetensors",
477
+ "esm.encoder.layer.7.LayerNorm.weight": "model-00001-of-00002.safetensors",
478
+ "esm.encoder.layer.7.attention.LayerNorm.bias": "model-00001-of-00002.safetensors",
479
+ "esm.encoder.layer.7.attention.LayerNorm.weight": "model-00001-of-00002.safetensors",
480
+ "esm.encoder.layer.7.attention.output.dense.bias": "model-00001-of-00002.safetensors",
481
+ "esm.encoder.layer.7.attention.output.dense.weight": "model-00001-of-00002.safetensors",
482
+ "esm.encoder.layer.7.attention.self.key.bias": "model-00001-of-00002.safetensors",
483
+ "esm.encoder.layer.7.attention.self.key.weight": "model-00001-of-00002.safetensors",
484
+ "esm.encoder.layer.7.attention.self.query.bias": "model-00001-of-00002.safetensors",
485
+ "esm.encoder.layer.7.attention.self.query.weight": "model-00001-of-00002.safetensors",
486
+ "esm.encoder.layer.7.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00002.safetensors",
487
+ "esm.encoder.layer.7.attention.self.value.bias": "model-00001-of-00002.safetensors",
488
+ "esm.encoder.layer.7.attention.self.value.weight": "model-00001-of-00002.safetensors",
489
+ "esm.encoder.layer.7.intermediate.dense.bias": "model-00001-of-00002.safetensors",
490
+ "esm.encoder.layer.7.intermediate.dense.weight": "model-00001-of-00002.safetensors",
491
+ "esm.encoder.layer.7.output.dense.bias": "model-00001-of-00002.safetensors",
492
+ "esm.encoder.layer.7.output.dense.weight": "model-00001-of-00002.safetensors",
493
+ "esm.encoder.layer.8.LayerNorm.bias": "model-00001-of-00002.safetensors",
494
+ "esm.encoder.layer.8.LayerNorm.weight": "model-00001-of-00002.safetensors",
495
+ "esm.encoder.layer.8.attention.LayerNorm.bias": "model-00001-of-00002.safetensors",
496
+ "esm.encoder.layer.8.attention.LayerNorm.weight": "model-00001-of-00002.safetensors",
497
+ "esm.encoder.layer.8.attention.output.dense.bias": "model-00001-of-00002.safetensors",
498
+ "esm.encoder.layer.8.attention.output.dense.weight": "model-00001-of-00002.safetensors",
499
+ "esm.encoder.layer.8.attention.self.key.bias": "model-00001-of-00002.safetensors",
500
+ "esm.encoder.layer.8.attention.self.key.weight": "model-00001-of-00002.safetensors",
501
+ "esm.encoder.layer.8.attention.self.query.bias": "model-00001-of-00002.safetensors",
502
+ "esm.encoder.layer.8.attention.self.query.weight": "model-00001-of-00002.safetensors",
503
+ "esm.encoder.layer.8.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00002.safetensors",
504
+ "esm.encoder.layer.8.attention.self.value.bias": "model-00001-of-00002.safetensors",
505
+ "esm.encoder.layer.8.attention.self.value.weight": "model-00001-of-00002.safetensors",
506
+ "esm.encoder.layer.8.intermediate.dense.bias": "model-00001-of-00002.safetensors",
507
+ "esm.encoder.layer.8.intermediate.dense.weight": "model-00001-of-00002.safetensors",
508
+ "esm.encoder.layer.8.output.dense.bias": "model-00001-of-00002.safetensors",
509
+ "esm.encoder.layer.8.output.dense.weight": "model-00001-of-00002.safetensors",
510
+ "esm.encoder.layer.9.LayerNorm.bias": "model-00001-of-00002.safetensors",
511
+ "esm.encoder.layer.9.LayerNorm.weight": "model-00001-of-00002.safetensors",
512
+ "esm.encoder.layer.9.attention.LayerNorm.bias": "model-00001-of-00002.safetensors",
513
+ "esm.encoder.layer.9.attention.LayerNorm.weight": "model-00001-of-00002.safetensors",
514
+ "esm.encoder.layer.9.attention.output.dense.bias": "model-00001-of-00002.safetensors",
515
+ "esm.encoder.layer.9.attention.output.dense.weight": "model-00001-of-00002.safetensors",
516
+ "esm.encoder.layer.9.attention.self.key.bias": "model-00001-of-00002.safetensors",
517
+ "esm.encoder.layer.9.attention.self.key.weight": "model-00001-of-00002.safetensors",
518
+ "esm.encoder.layer.9.attention.self.query.bias": "model-00001-of-00002.safetensors",
519
+ "esm.encoder.layer.9.attention.self.query.weight": "model-00001-of-00002.safetensors",
520
+ "esm.encoder.layer.9.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00002.safetensors",
521
+ "esm.encoder.layer.9.attention.self.value.bias": "model-00001-of-00002.safetensors",
522
+ "esm.encoder.layer.9.attention.self.value.weight": "model-00001-of-00002.safetensors",
523
+ "esm.encoder.layer.9.intermediate.dense.bias": "model-00001-of-00002.safetensors",
524
+ "esm.encoder.layer.9.intermediate.dense.weight": "model-00001-of-00002.safetensors",
525
+ "esm.encoder.layer.9.output.dense.bias": "model-00001-of-00002.safetensors",
526
+ "esm.encoder.layer.9.output.dense.weight": "model-00001-of-00002.safetensors"
527
+ }
528
+ }
finalCheckpoint_25_05_11/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "<cls>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<eos>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "mask_token": {
17
+ "content": "<mask>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "pad_token": {
24
+ "content": "<pad>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "<unk>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
finalCheckpoint_25_05_11/tokenizer_config.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<cls>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "<eos>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "32": {
36
+ "content": "<mask>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "<cls>",
46
+ "eos_token": "<eos>",
47
+ "mask_token": "<mask>",
48
+ "model_max_length": 1000000000000000019884624838656,
49
+ "pad_token": "<pad>",
50
+ "tokenizer_class": "EsmTokenizer",
51
+ "unk_token": "<unk>"
52
+ }
finalCheckpoint_25_05_11/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be1edc20238f883c88163b261b9326bcb206ea88ea8b1303463ddc3d6684549e
3
+ size 5496
finalCheckpoint_25_05_11/vocab.txt ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <cls>
2
+ <pad>
3
+ <eos>
4
+ <unk>
5
+ L
6
+ A
7
+ G
8
+ V
9
+ S
10
+ E
11
+ R
12
+ T
13
+ I
14
+ D
15
+ P
16
+ K
17
+ Q
18
+ N
19
+ F
20
+ Y
21
+ M
22
+ H
23
+ W
24
+ C
25
+ X
26
+ B
27
+ U
28
+ Z
29
+ O
30
+ .
31
+ -
32
+ <null_1>
33
+ <mask>