multimolecule-ernierna-finetuned/eval_metrics.csv ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,step,eval_loss,train_loss,eval_seq_accuracy,eval_F1,eval_ideal_threshold
2
+ 0,1000,0.001893,0,0.6778,0,0
3
+ 1,2000,0.00167,0,0.7191,0,0
4
+ 2,3000,0.001627,0,0.7318,0,0
5
+ 3,4000,0.001515,0,0.7348,0,0
6
+ 4,5000,0.001436,0,0.7256,0,0
7
+ 5,6000,0.001438,0,0.7439,0,0
8
+ 6,7000,0.001367,0,0.764,0,0
9
+ 7,8000,0.001349,0,0.7563,0,0
10
+ 8,9000,0.001335,0,0.7683,0,0
11
+ 0,10222,0.0013569026486948133,0.0014,0.7723430788416239,0.799498572323978,0.4814424216747284
12
+ 1,11222,0.0013802237808704376,0.0014,0.77268948316475,0.8008426287253514,0.4628483057022095
13
+ 2,12222,0.0014453979674726725,0.0013,0.7753221560205071,0.7969279106301275,0.48278334736824036
14
+ 3,13222,0.0013328788336366415,0.0012,0.7778855480116392,0.8091337292647962,0.3816543221473694
15
+ 4,14222,0.0013075487222522497,0.0012,0.7762228072606346,0.8057846960963656,0.4586166441440582
16
+ 5,15222,0.0013317714910954237,0.0011,0.7892476098101704,0.8116461208864538,0.5130550265312195
17
+ 6,16222,0.0013339577708393335,0.0011,0.7945129555216849,0.8127478950664533,0.4638926684856415
18
+ 7,17222,0.0012995643774047494,0.0011,0.7934737425523071,0.816063356344395,0.4715520143508911
19
+ 8,18222,0.0012820976553484797,0.0011,0.7947207981155605,0.817995476392423,0.506455659866333
multimolecule-ernierna-finetuned/final_model/config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "multimolecule-ernierna-finetuned-secondary-structure/checkpoint-9223",
3
+ "architectures": [
4
+ "ErnieRnaForTokenPrediction"
5
+ ],
6
+ "attention_dropout": 0.1,
7
+ "bos_token_id": 1,
8
+ "eos_token_id": 2,
9
+ "head": {
10
+ "act": null,
11
+ "bias": true,
12
+ "dropout": 0.0,
13
+ "hidden_size": 768,
14
+ "layer_norm_eps": 1e-12,
15
+ "num_labels": 2,
16
+ "output_name": null,
17
+ "problem_type": "single_label_classification",
18
+ "transform": null,
19
+ "transform_act": "gelu"
20
+ },
21
+ "hidden_act": "gelu",
22
+ "hidden_dropout": 0.1,
23
+ "hidden_size": 768,
24
+ "initializer_range": 0.02,
25
+ "intermediate_size": 3072,
26
+ "layer_norm_eps": 1e-12,
27
+ "lm_head": {
28
+ "act": null,
29
+ "bias": true,
30
+ "dropout": 0.0,
31
+ "hidden_size": null,
32
+ "layer_norm_eps": 1e-12,
33
+ "output_name": null,
34
+ "transform": "nonlinear",
35
+ "transform_act": "gelu"
36
+ },
37
+ "mask_token_id": 4,
38
+ "max_position_embeddings": 1026,
39
+ "model_type": "ernierna",
40
+ "null_token_id": 5,
41
+ "num_attention_heads": 12,
42
+ "num_hidden_layers": 12,
43
+ "pad_token_id": 0,
44
+ "pairwise_alpha": 0.8,
45
+ "position_embedding_type": "sinusoidal",
46
+ "problem_type": "single_label_classification",
47
+ "torch_dtype": "float32",
48
+ "transformers_version": "4.46.3",
49
+ "type_vocab_size": 2,
50
+ "unk_token_id": 3,
51
+ "use_cache": true,
52
+ "vocab_size": 26
53
+ }
multimolecule-ernierna-finetuned/final_model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6315136bd4dc321daa6b1207030ffcfef731b6844945d3509205273fee45ac65
3
+ size 342703472
multimolecule-ernierna-finetuned/final_model/special_tokens_map.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<null>"
4
+ ],
5
+ "bos_token": {
6
+ "content": "<cls>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "cls_token": {
13
+ "content": "<cls>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "eos_token": {
20
+ "content": "<eos>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ },
26
+ "mask_token": {
27
+ "content": "<mask>",
28
+ "lstrip": false,
29
+ "normalized": false,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ },
33
+ "pad_token": {
34
+ "content": "<pad>",
35
+ "lstrip": false,
36
+ "normalized": false,
37
+ "rstrip": false,
38
+ "single_word": false
39
+ },
40
+ "sep_token": {
41
+ "content": "<eos>",
42
+ "lstrip": false,
43
+ "normalized": false,
44
+ "rstrip": false,
45
+ "single_word": false
46
+ },
47
+ "unk_token": {
48
+ "content": "<unk>",
49
+ "lstrip": false,
50
+ "normalized": false,
51
+ "rstrip": false,
52
+ "single_word": false
53
+ }
54
+ }
multimolecule-ernierna-finetuned/final_model/tokenizer_config.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<cls>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "<eos>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "<mask>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "5": {
44
+ "content": "<null>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ }
51
+ },
52
+ "additional_special_tokens": [
53
+ "<null>"
54
+ ],
55
+ "bos_token": "<cls>",
56
+ "clean_up_tokenization_spaces": true,
57
+ "cls_token": "<cls>",
58
+ "codon": false,
59
+ "eos_token": "<eos>",
60
+ "mask_token": "<mask>",
61
+ "model_max_length": 1000000000000000019884624838656,
62
+ "nmers": 1,
63
+ "pad_token": "<pad>",
64
+ "replace_T_with_U": true,
65
+ "sep_token": "<eos>",
66
+ "tokenizer_class": "RnaTokenizer",
67
+ "unk_token": "<unk>"
68
+ }
multimolecule-ernierna-finetuned/final_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50cc58eed759712c80de480486f3a3e6cbf18929ef20c9db71e5d42a32f66331
3
+ size 5368
multimolecule-ernierna-finetuned/final_model/vocab.txt ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <pad>
2
+ <cls>
3
+ <eos>
4
+ <unk>
5
+ <mask>
6
+ <null>
7
+ A
8
+ C
9
+ G
10
+ U
11
+ N
12
+ R
13
+ Y
14
+ S
15
+ W
16
+ K
17
+ M
18
+ B
19
+ D
20
+ H
21
+ V
22
+ .
23
+ X
24
+ *
25
+ -
26
+ I
multimolecule-rnabert-finetuned/eval_metrics.csv ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,step,eval_loss,train_loss,eval_seq_accuracy,eval_F1,eval_ideal_threshold
2
+ 0,1000,0.017599036917090416,0.0619,0.0028405154496328115,0.06760251675909366,0.002752443542703986
3
+ 1,2000,0.010641932487487793,0.0136,0.0008313703755022863,0.17336715689422644,0.06894613802433014
4
+ 2,3000,0.010108882561326027,0.0109,0.005888873493141195,0.18778440922915093,0.1004333645105362
5
+ 3,4000,0.009159804321825504,0.0101,0.1630178744630733,0.3005389916081053,0.20941995084285736
6
+ 4,5000,0.008511984720826149,0.0094,0.29943189691007344,0.3760882182240279,0.176517054438591
7
+ 5,6000,0.008205186575651169,0.009,0.2932658999584315,0.40500653838968803,0.2101859599351883
8
+ 6,7000,0.007991783320903778,0.0087,0.3256893446030206,0.41617984175109074,0.16983383893966675
9
+ 7,8000,0.007753537502139807,0.0084,0.32693640016627407,0.43854245830166794,0.22341981530189514
10
+ 8,9000,0.007683382835239172,0.0083,0.3367742829430511,0.44404401334202354,0.22184684872627258
11
+ 9,10000,0.00736592523753643,0.008,0.3589441596231121,0.47782095179471396,0.2355053871870041
12
+ 10,11000,0.007255251985043287,0.0078,0.35575723985035335,0.4768779433335907,0.24868690967559814
13
+ 11,12000,0.007080541457980871,0.0077,0.38069835111542194,0.501257768570583,0.20804649591445923
14
+ 12,13000,0.006860875058919191,0.0076,0.3925453789663295,0.5207151519893588,0.28079092502593994
15
+ 13,14000,0.006875431630760431,0.0075,0.39157544686157686,0.5180910436994571,0.2605779469013214
16
+ 14,15000,0.006711602210998535,0.0072,0.40238326174310657,0.5312057551754487,0.26566949486732483
17
+ 15,16000,0.0065771667286753654,0.0071,0.394900928363586,0.5377417342482844,0.24032439291477203
18
+ 16,17000,0.006533265113830566,0.007,0.40896494388249965,0.5494887131005209,0.3130424916744232
19
+ 17,18000,0.006406453438103199,0.0069,0.41617015380351946,0.555965097036257,0.31994837522506714
20
+ 18,19000,0.006370759103447199,0.0069,0.4168629624497714,0.5573931430297538,0.306517094373703
21
+ 19,20000,0.006276487372815609,0.0068,0.426562283497298,0.5612076247842216,0.2519727647304535
22
+ 20,21000,0.006201908458024263,0.0067,0.42531522793404464,0.5668179156908665,0.25417062640190125
23
+ 21,22000,0.006176957860589027,0.0066,0.4246224192877927,0.5686274509803921,0.26505014300346375
24
+ 22,23000,0.006125412415713072,0.0066,0.41700152417902175,0.5691474194468207,0.22473326325416565
25
+ 23,24000,0.006025252863764763,0.0065,0.44055701815158654,0.5787080530607643,0.30939093232154846
26
+ 24,25000,0.0060025970451533794,0.0064,0.4467230151032285,0.5797747713702668,0.30625563859939575
27
+ 25,26000,0.0059701185673475266,0.0064,0.4386864348067064,0.5838601710673113,0.27428191900253296
28
+ 26,27000,0.0059480220079422,0.0065,0.441457669391714,0.5848109735697558,0.293146550655365
multimolecule-rnabert-finetuned/final_model/config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "multimolecule/rnabert",
3
+ "architectures": [
4
+ "RnaBertForTokenPrediction"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "eos_token_id": 2,
9
+ "head": {
10
+ "act": null,
11
+ "bias": true,
12
+ "dropout": 0.0,
13
+ "hidden_size": 120,
14
+ "layer_norm_eps": 1e-12,
15
+ "num_labels": 2,
16
+ "output_name": null,
17
+ "problem_type": "single_label_classification",
18
+ "transform": null,
19
+ "transform_act": "gelu"
20
+ },
21
+ "hidden_act": "gelu",
22
+ "hidden_dropout": 0.0,
23
+ "hidden_size": 120,
24
+ "initializer_range": 0.02,
25
+ "intermediate_size": 40,
26
+ "layer_norm_eps": 1e-12,
27
+ "lm_head": {
28
+ "act": null,
29
+ "bias": true,
30
+ "dropout": 0.0,
31
+ "hidden_size": null,
32
+ "layer_norm_eps": 1e-12,
33
+ "output_name": null,
34
+ "transform": "nonlinear",
35
+ "transform_act": "gelu"
36
+ },
37
+ "mask_token_id": 4,
38
+ "max_position_embeddings": 440,
39
+ "model_type": "rnabert",
40
+ "null_token_id": 5,
41
+ "num_attention_heads": 12,
42
+ "num_hidden_layers": 6,
43
+ "pad_token_id": 0,
44
+ "position_embedding_type": "absolute",
45
+ "problem_type": "single_label_classification",
46
+ "ss_vocab_size": 8,
47
+ "torch_dtype": "float32",
48
+ "transformers_version": "4.46.3",
49
+ "type_vocab_size": 2,
50
+ "unk_token_id": 3,
51
+ "use_cache": true,
52
+ "vocab_size": 26
53
+ }
multimolecule-rnabert-finetuned/final_model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54480f20e84d8a04ebe0f9739be8a0b9ccc1e31e13ddbd728f7d181ee17271e0
3
+ size 1936360
multimolecule-rnabert-finetuned/final_model/special_tokens_map.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<null>"
4
+ ],
5
+ "bos_token": {
6
+ "content": "<cls>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "cls_token": {
13
+ "content": "<cls>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "eos_token": {
20
+ "content": "<eos>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ },
26
+ "mask_token": {
27
+ "content": "<mask>",
28
+ "lstrip": false,
29
+ "normalized": false,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ },
33
+ "pad_token": {
34
+ "content": "<pad>",
35
+ "lstrip": false,
36
+ "normalized": false,
37
+ "rstrip": false,
38
+ "single_word": false
39
+ },
40
+ "sep_token": {
41
+ "content": "<eos>",
42
+ "lstrip": false,
43
+ "normalized": false,
44
+ "rstrip": false,
45
+ "single_word": false
46
+ },
47
+ "unk_token": {
48
+ "content": "<unk>",
49
+ "lstrip": false,
50
+ "normalized": false,
51
+ "rstrip": false,
52
+ "single_word": false
53
+ }
54
+ }
multimolecule-rnabert-finetuned/final_model/tokenizer_config.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<cls>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "<eos>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "<mask>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "5": {
44
+ "content": "<null>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ }
51
+ },
52
+ "additional_special_tokens": [
53
+ "<null>"
54
+ ],
55
+ "bos_token": "<cls>",
56
+ "clean_up_tokenization_spaces": true,
57
+ "cls_token": "<cls>",
58
+ "codon": false,
59
+ "eos_token": "<eos>",
60
+ "mask_token": "<mask>",
61
+ "model_max_length": 440,
62
+ "nmers": 1,
63
+ "pad_token": "<pad>",
64
+ "replace_T_with_U": true,
65
+ "sep_token": "<eos>",
66
+ "tokenizer_class": "RnaTokenizer",
67
+ "unk_token": "<unk>"
68
+ }
multimolecule-rnabert-finetuned/final_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea20c1d47784442dab5daebaa45ac338a4bf55b8ecd37a6d37d62534f283c19c
3
+ size 5368
multimolecule-rnabert-finetuned/final_model/vocab.txt ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <pad>
2
+ <cls>
3
+ <eos>
4
+ <unk>
5
+ <mask>
6
+ <null>
7
+ A
8
+ C
9
+ G
10
+ U
11
+ N
12
+ R
13
+ Y
14
+ S
15
+ W
16
+ K
17
+ M
18
+ B
19
+ D
20
+ H
21
+ V
22
+ .
23
+ X
24
+ *
25
+ -
26
+ I