Upload 14 files
#3
by
Oliverepfl - opened
- multimolecule-ernierna-finetuned/eval_metrics.csv +19 -0
- multimolecule-ernierna-finetuned/final_model/config.json +53 -0
- multimolecule-ernierna-finetuned/final_model/model.safetensors +3 -0
- multimolecule-ernierna-finetuned/final_model/special_tokens_map.json +54 -0
- multimolecule-ernierna-finetuned/final_model/tokenizer_config.json +68 -0
- multimolecule-ernierna-finetuned/final_model/training_args.bin +3 -0
- multimolecule-ernierna-finetuned/final_model/vocab.txt +26 -0
- multimolecule-rnabert-finetuned/eval_metrics.csv +28 -0
- multimolecule-rnabert-finetuned/final_model/config.json +53 -0
- multimolecule-rnabert-finetuned/final_model/model.safetensors +3 -0
- multimolecule-rnabert-finetuned/final_model/special_tokens_map.json +54 -0
- multimolecule-rnabert-finetuned/final_model/tokenizer_config.json +68 -0
- multimolecule-rnabert-finetuned/final_model/training_args.bin +3 -0
- multimolecule-rnabert-finetuned/final_model/vocab.txt +26 -0
multimolecule-ernierna-finetuned/eval_metrics.csv
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
,step,eval_loss,train_loss,eval_seq_accuracy,eval_F1,eval_ideal_threshold
|
| 2 |
+
0,1000,0.001893,0,0.6778,0,0
|
| 3 |
+
1,2000,0.00167,0,0.7191,0,0
|
| 4 |
+
2,3000,0.001627,0,0.7318,0,0
|
| 5 |
+
3,4000,0.001515,0,0.7348,0,0
|
| 6 |
+
4,5000,0.001436,0,0.7256,0,0
|
| 7 |
+
5,6000,0.001438,0,0.7439,0,0
|
| 8 |
+
6,7000,0.001367,0,0.764,0,0
|
| 9 |
+
7,8000,0.001349,0,0.7563,0,0
|
| 10 |
+
8,9000,0.001335,0,0.7683,0,0
|
| 11 |
+
0,10222,0.0013569026486948133,0.0014,0.7723430788416239,0.799498572323978,0.4814424216747284
|
| 12 |
+
1,11222,0.0013802237808704376,0.0014,0.77268948316475,0.8008426287253514,0.4628483057022095
|
| 13 |
+
2,12222,0.0014453979674726725,0.0013,0.7753221560205071,0.7969279106301275,0.48278334736824036
|
| 14 |
+
3,13222,0.0013328788336366415,0.0012,0.7778855480116392,0.8091337292647962,0.3816543221473694
|
| 15 |
+
4,14222,0.0013075487222522497,0.0012,0.7762228072606346,0.8057846960963656,0.4586166441440582
|
| 16 |
+
5,15222,0.0013317714910954237,0.0011,0.7892476098101704,0.8116461208864538,0.5130550265312195
|
| 17 |
+
6,16222,0.0013339577708393335,0.0011,0.7945129555216849,0.8127478950664533,0.4638926684856415
|
| 18 |
+
7,17222,0.0012995643774047494,0.0011,0.7934737425523071,0.816063356344395,0.4715520143508911
|
| 19 |
+
8,18222,0.0012820976553484797,0.0011,0.7947207981155605,0.817995476392423,0.506455659866333
|
multimolecule-ernierna-finetuned/final_model/config.json
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "multimolecule-ernierna-finetuned-secondary-structure/checkpoint-9223",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"ErnieRnaForTokenPrediction"
|
| 5 |
+
],
|
| 6 |
+
"attention_dropout": 0.1,
|
| 7 |
+
"bos_token_id": 1,
|
| 8 |
+
"eos_token_id": 2,
|
| 9 |
+
"head": {
|
| 10 |
+
"act": null,
|
| 11 |
+
"bias": true,
|
| 12 |
+
"dropout": 0.0,
|
| 13 |
+
"hidden_size": 768,
|
| 14 |
+
"layer_norm_eps": 1e-12,
|
| 15 |
+
"num_labels": 2,
|
| 16 |
+
"output_name": null,
|
| 17 |
+
"problem_type": "single_label_classification",
|
| 18 |
+
"transform": null,
|
| 19 |
+
"transform_act": "gelu"
|
| 20 |
+
},
|
| 21 |
+
"hidden_act": "gelu",
|
| 22 |
+
"hidden_dropout": 0.1,
|
| 23 |
+
"hidden_size": 768,
|
| 24 |
+
"initializer_range": 0.02,
|
| 25 |
+
"intermediate_size": 3072,
|
| 26 |
+
"layer_norm_eps": 1e-12,
|
| 27 |
+
"lm_head": {
|
| 28 |
+
"act": null,
|
| 29 |
+
"bias": true,
|
| 30 |
+
"dropout": 0.0,
|
| 31 |
+
"hidden_size": null,
|
| 32 |
+
"layer_norm_eps": 1e-12,
|
| 33 |
+
"output_name": null,
|
| 34 |
+
"transform": "nonlinear",
|
| 35 |
+
"transform_act": "gelu"
|
| 36 |
+
},
|
| 37 |
+
"mask_token_id": 4,
|
| 38 |
+
"max_position_embeddings": 1026,
|
| 39 |
+
"model_type": "ernierna",
|
| 40 |
+
"null_token_id": 5,
|
| 41 |
+
"num_attention_heads": 12,
|
| 42 |
+
"num_hidden_layers": 12,
|
| 43 |
+
"pad_token_id": 0,
|
| 44 |
+
"pairwise_alpha": 0.8,
|
| 45 |
+
"position_embedding_type": "sinusoidal",
|
| 46 |
+
"problem_type": "single_label_classification",
|
| 47 |
+
"torch_dtype": "float32",
|
| 48 |
+
"transformers_version": "4.46.3",
|
| 49 |
+
"type_vocab_size": 2,
|
| 50 |
+
"unk_token_id": 3,
|
| 51 |
+
"use_cache": true,
|
| 52 |
+
"vocab_size": 26
|
| 53 |
+
}
|
multimolecule-ernierna-finetuned/final_model/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6315136bd4dc321daa6b1207030ffcfef731b6844945d3509205273fee45ac65
|
| 3 |
+
size 342703472
|
multimolecule-ernierna-finetuned/final_model/special_tokens_map.json
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<null>"
|
| 4 |
+
],
|
| 5 |
+
"bos_token": {
|
| 6 |
+
"content": "<cls>",
|
| 7 |
+
"lstrip": false,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false
|
| 11 |
+
},
|
| 12 |
+
"cls_token": {
|
| 13 |
+
"content": "<cls>",
|
| 14 |
+
"lstrip": false,
|
| 15 |
+
"normalized": false,
|
| 16 |
+
"rstrip": false,
|
| 17 |
+
"single_word": false
|
| 18 |
+
},
|
| 19 |
+
"eos_token": {
|
| 20 |
+
"content": "<eos>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false
|
| 25 |
+
},
|
| 26 |
+
"mask_token": {
|
| 27 |
+
"content": "<mask>",
|
| 28 |
+
"lstrip": false,
|
| 29 |
+
"normalized": false,
|
| 30 |
+
"rstrip": false,
|
| 31 |
+
"single_word": false
|
| 32 |
+
},
|
| 33 |
+
"pad_token": {
|
| 34 |
+
"content": "<pad>",
|
| 35 |
+
"lstrip": false,
|
| 36 |
+
"normalized": false,
|
| 37 |
+
"rstrip": false,
|
| 38 |
+
"single_word": false
|
| 39 |
+
},
|
| 40 |
+
"sep_token": {
|
| 41 |
+
"content": "<eos>",
|
| 42 |
+
"lstrip": false,
|
| 43 |
+
"normalized": false,
|
| 44 |
+
"rstrip": false,
|
| 45 |
+
"single_word": false
|
| 46 |
+
},
|
| 47 |
+
"unk_token": {
|
| 48 |
+
"content": "<unk>",
|
| 49 |
+
"lstrip": false,
|
| 50 |
+
"normalized": false,
|
| 51 |
+
"rstrip": false,
|
| 52 |
+
"single_word": false
|
| 53 |
+
}
|
| 54 |
+
}
|
multimolecule-ernierna-finetuned/final_model/tokenizer_config.json
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "<pad>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "<cls>",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "<eos>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"3": {
|
| 28 |
+
"content": "<unk>",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"4": {
|
| 36 |
+
"content": "<mask>",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
},
|
| 43 |
+
"5": {
|
| 44 |
+
"content": "<null>",
|
| 45 |
+
"lstrip": false,
|
| 46 |
+
"normalized": false,
|
| 47 |
+
"rstrip": false,
|
| 48 |
+
"single_word": false,
|
| 49 |
+
"special": true
|
| 50 |
+
}
|
| 51 |
+
},
|
| 52 |
+
"additional_special_tokens": [
|
| 53 |
+
"<null>"
|
| 54 |
+
],
|
| 55 |
+
"bos_token": "<cls>",
|
| 56 |
+
"clean_up_tokenization_spaces": true,
|
| 57 |
+
"cls_token": "<cls>",
|
| 58 |
+
"codon": false,
|
| 59 |
+
"eos_token": "<eos>",
|
| 60 |
+
"mask_token": "<mask>",
|
| 61 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 62 |
+
"nmers": 1,
|
| 63 |
+
"pad_token": "<pad>",
|
| 64 |
+
"replace_T_with_U": true,
|
| 65 |
+
"sep_token": "<eos>",
|
| 66 |
+
"tokenizer_class": "RnaTokenizer",
|
| 67 |
+
"unk_token": "<unk>"
|
| 68 |
+
}
|
multimolecule-ernierna-finetuned/final_model/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:50cc58eed759712c80de480486f3a3e6cbf18929ef20c9db71e5d42a32f66331
|
| 3 |
+
size 5368
|
multimolecule-ernierna-finetuned/final_model/vocab.txt
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<pad>
|
| 2 |
+
<cls>
|
| 3 |
+
<eos>
|
| 4 |
+
<unk>
|
| 5 |
+
<mask>
|
| 6 |
+
<null>
|
| 7 |
+
A
|
| 8 |
+
C
|
| 9 |
+
G
|
| 10 |
+
U
|
| 11 |
+
N
|
| 12 |
+
R
|
| 13 |
+
Y
|
| 14 |
+
S
|
| 15 |
+
W
|
| 16 |
+
K
|
| 17 |
+
M
|
| 18 |
+
B
|
| 19 |
+
D
|
| 20 |
+
H
|
| 21 |
+
V
|
| 22 |
+
.
|
| 23 |
+
X
|
| 24 |
+
*
|
| 25 |
+
-
|
| 26 |
+
I
|
multimolecule-rnabert-finetuned/eval_metrics.csv
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
,step,eval_loss,train_loss,eval_seq_accuracy,eval_F1,eval_ideal_threshold
|
| 2 |
+
0,1000,0.017599036917090416,0.0619,0.0028405154496328115,0.06760251675909366,0.002752443542703986
|
| 3 |
+
1,2000,0.010641932487487793,0.0136,0.0008313703755022863,0.17336715689422644,0.06894613802433014
|
| 4 |
+
2,3000,0.010108882561326027,0.0109,0.005888873493141195,0.18778440922915093,0.1004333645105362
|
| 5 |
+
3,4000,0.009159804321825504,0.0101,0.1630178744630733,0.3005389916081053,0.20941995084285736
|
| 6 |
+
4,5000,0.008511984720826149,0.0094,0.29943189691007344,0.3760882182240279,0.176517054438591
|
| 7 |
+
5,6000,0.008205186575651169,0.009,0.2932658999584315,0.40500653838968803,0.2101859599351883
|
| 8 |
+
6,7000,0.007991783320903778,0.0087,0.3256893446030206,0.41617984175109074,0.16983383893966675
|
| 9 |
+
7,8000,0.007753537502139807,0.0084,0.32693640016627407,0.43854245830166794,0.22341981530189514
|
| 10 |
+
8,9000,0.007683382835239172,0.0083,0.3367742829430511,0.44404401334202354,0.22184684872627258
|
| 11 |
+
9,10000,0.00736592523753643,0.008,0.3589441596231121,0.47782095179471396,0.2355053871870041
|
| 12 |
+
10,11000,0.007255251985043287,0.0078,0.35575723985035335,0.4768779433335907,0.24868690967559814
|
| 13 |
+
11,12000,0.007080541457980871,0.0077,0.38069835111542194,0.501257768570583,0.20804649591445923
|
| 14 |
+
12,13000,0.006860875058919191,0.0076,0.3925453789663295,0.5207151519893588,0.28079092502593994
|
| 15 |
+
13,14000,0.006875431630760431,0.0075,0.39157544686157686,0.5180910436994571,0.2605779469013214
|
| 16 |
+
14,15000,0.006711602210998535,0.0072,0.40238326174310657,0.5312057551754487,0.26566949486732483
|
| 17 |
+
15,16000,0.0065771667286753654,0.0071,0.394900928363586,0.5377417342482844,0.24032439291477203
|
| 18 |
+
16,17000,0.006533265113830566,0.007,0.40896494388249965,0.5494887131005209,0.3130424916744232
|
| 19 |
+
17,18000,0.006406453438103199,0.0069,0.41617015380351946,0.555965097036257,0.31994837522506714
|
| 20 |
+
18,19000,0.006370759103447199,0.0069,0.4168629624497714,0.5573931430297538,0.306517094373703
|
| 21 |
+
19,20000,0.006276487372815609,0.0068,0.426562283497298,0.5612076247842216,0.2519727647304535
|
| 22 |
+
20,21000,0.006201908458024263,0.0067,0.42531522793404464,0.5668179156908665,0.25417062640190125
|
| 23 |
+
21,22000,0.006176957860589027,0.0066,0.4246224192877927,0.5686274509803921,0.26505014300346375
|
| 24 |
+
22,23000,0.006125412415713072,0.0066,0.41700152417902175,0.5691474194468207,0.22473326325416565
|
| 25 |
+
23,24000,0.006025252863764763,0.0065,0.44055701815158654,0.5787080530607643,0.30939093232154846
|
| 26 |
+
24,25000,0.0060025970451533794,0.0064,0.4467230151032285,0.5797747713702668,0.30625563859939575
|
| 27 |
+
25,26000,0.0059701185673475266,0.0064,0.4386864348067064,0.5838601710673113,0.27428191900253296
|
| 28 |
+
26,27000,0.0059480220079422,0.0065,0.441457669391714,0.5848109735697558,0.293146550655365
|
multimolecule-rnabert-finetuned/final_model/config.json
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "multimolecule/rnabert",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"RnaBertForTokenPrediction"
|
| 5 |
+
],
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"bos_token_id": 1,
|
| 8 |
+
"eos_token_id": 2,
|
| 9 |
+
"head": {
|
| 10 |
+
"act": null,
|
| 11 |
+
"bias": true,
|
| 12 |
+
"dropout": 0.0,
|
| 13 |
+
"hidden_size": 120,
|
| 14 |
+
"layer_norm_eps": 1e-12,
|
| 15 |
+
"num_labels": 2,
|
| 16 |
+
"output_name": null,
|
| 17 |
+
"problem_type": "single_label_classification",
|
| 18 |
+
"transform": null,
|
| 19 |
+
"transform_act": "gelu"
|
| 20 |
+
},
|
| 21 |
+
"hidden_act": "gelu",
|
| 22 |
+
"hidden_dropout": 0.0,
|
| 23 |
+
"hidden_size": 120,
|
| 24 |
+
"initializer_range": 0.02,
|
| 25 |
+
"intermediate_size": 40,
|
| 26 |
+
"layer_norm_eps": 1e-12,
|
| 27 |
+
"lm_head": {
|
| 28 |
+
"act": null,
|
| 29 |
+
"bias": true,
|
| 30 |
+
"dropout": 0.0,
|
| 31 |
+
"hidden_size": null,
|
| 32 |
+
"layer_norm_eps": 1e-12,
|
| 33 |
+
"output_name": null,
|
| 34 |
+
"transform": "nonlinear",
|
| 35 |
+
"transform_act": "gelu"
|
| 36 |
+
},
|
| 37 |
+
"mask_token_id": 4,
|
| 38 |
+
"max_position_embeddings": 440,
|
| 39 |
+
"model_type": "rnabert",
|
| 40 |
+
"null_token_id": 5,
|
| 41 |
+
"num_attention_heads": 12,
|
| 42 |
+
"num_hidden_layers": 6,
|
| 43 |
+
"pad_token_id": 0,
|
| 44 |
+
"position_embedding_type": "absolute",
|
| 45 |
+
"problem_type": "single_label_classification",
|
| 46 |
+
"ss_vocab_size": 8,
|
| 47 |
+
"torch_dtype": "float32",
|
| 48 |
+
"transformers_version": "4.46.3",
|
| 49 |
+
"type_vocab_size": 2,
|
| 50 |
+
"unk_token_id": 3,
|
| 51 |
+
"use_cache": true,
|
| 52 |
+
"vocab_size": 26
|
| 53 |
+
}
|
multimolecule-rnabert-finetuned/final_model/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:54480f20e84d8a04ebe0f9739be8a0b9ccc1e31e13ddbd728f7d181ee17271e0
|
| 3 |
+
size 1936360
|
multimolecule-rnabert-finetuned/final_model/special_tokens_map.json
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<null>"
|
| 4 |
+
],
|
| 5 |
+
"bos_token": {
|
| 6 |
+
"content": "<cls>",
|
| 7 |
+
"lstrip": false,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false
|
| 11 |
+
},
|
| 12 |
+
"cls_token": {
|
| 13 |
+
"content": "<cls>",
|
| 14 |
+
"lstrip": false,
|
| 15 |
+
"normalized": false,
|
| 16 |
+
"rstrip": false,
|
| 17 |
+
"single_word": false
|
| 18 |
+
},
|
| 19 |
+
"eos_token": {
|
| 20 |
+
"content": "<eos>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false
|
| 25 |
+
},
|
| 26 |
+
"mask_token": {
|
| 27 |
+
"content": "<mask>",
|
| 28 |
+
"lstrip": false,
|
| 29 |
+
"normalized": false,
|
| 30 |
+
"rstrip": false,
|
| 31 |
+
"single_word": false
|
| 32 |
+
},
|
| 33 |
+
"pad_token": {
|
| 34 |
+
"content": "<pad>",
|
| 35 |
+
"lstrip": false,
|
| 36 |
+
"normalized": false,
|
| 37 |
+
"rstrip": false,
|
| 38 |
+
"single_word": false
|
| 39 |
+
},
|
| 40 |
+
"sep_token": {
|
| 41 |
+
"content": "<eos>",
|
| 42 |
+
"lstrip": false,
|
| 43 |
+
"normalized": false,
|
| 44 |
+
"rstrip": false,
|
| 45 |
+
"single_word": false
|
| 46 |
+
},
|
| 47 |
+
"unk_token": {
|
| 48 |
+
"content": "<unk>",
|
| 49 |
+
"lstrip": false,
|
| 50 |
+
"normalized": false,
|
| 51 |
+
"rstrip": false,
|
| 52 |
+
"single_word": false
|
| 53 |
+
}
|
| 54 |
+
}
|
multimolecule-rnabert-finetuned/final_model/tokenizer_config.json
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "<pad>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "<cls>",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "<eos>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"3": {
|
| 28 |
+
"content": "<unk>",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"4": {
|
| 36 |
+
"content": "<mask>",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
},
|
| 43 |
+
"5": {
|
| 44 |
+
"content": "<null>",
|
| 45 |
+
"lstrip": false,
|
| 46 |
+
"normalized": false,
|
| 47 |
+
"rstrip": false,
|
| 48 |
+
"single_word": false,
|
| 49 |
+
"special": true
|
| 50 |
+
}
|
| 51 |
+
},
|
| 52 |
+
"additional_special_tokens": [
|
| 53 |
+
"<null>"
|
| 54 |
+
],
|
| 55 |
+
"bos_token": "<cls>",
|
| 56 |
+
"clean_up_tokenization_spaces": true,
|
| 57 |
+
"cls_token": "<cls>",
|
| 58 |
+
"codon": false,
|
| 59 |
+
"eos_token": "<eos>",
|
| 60 |
+
"mask_token": "<mask>",
|
| 61 |
+
"model_max_length": 440,
|
| 62 |
+
"nmers": 1,
|
| 63 |
+
"pad_token": "<pad>",
|
| 64 |
+
"replace_T_with_U": true,
|
| 65 |
+
"sep_token": "<eos>",
|
| 66 |
+
"tokenizer_class": "RnaTokenizer",
|
| 67 |
+
"unk_token": "<unk>"
|
| 68 |
+
}
|
multimolecule-rnabert-finetuned/final_model/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ea20c1d47784442dab5daebaa45ac338a4bf55b8ecd37a6d37d62534f283c19c
|
| 3 |
+
size 5368
|
multimolecule-rnabert-finetuned/final_model/vocab.txt
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<pad>
|
| 2 |
+
<cls>
|
| 3 |
+
<eos>
|
| 4 |
+
<unk>
|
| 5 |
+
<mask>
|
| 6 |
+
<null>
|
| 7 |
+
A
|
| 8 |
+
C
|
| 9 |
+
G
|
| 10 |
+
U
|
| 11 |
+
N
|
| 12 |
+
R
|
| 13 |
+
Y
|
| 14 |
+
S
|
| 15 |
+
W
|
| 16 |
+
K
|
| 17 |
+
M
|
| 18 |
+
B
|
| 19 |
+
D
|
| 20 |
+
H
|
| 21 |
+
V
|
| 22 |
+
.
|
| 23 |
+
X
|
| 24 |
+
*
|
| 25 |
+
-
|
| 26 |
+
I
|