timothytran commited on
Commit
27cc1d8
·
verified ·
1 Parent(s): b4f620e

Upload 14 files

Browse files

Added fine-tuned RNA-MSM and UTR-LM models

multimolecule-rnamsm-finetuned/eval_metrics_rnamsm.csv ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,step,eval_loss,eval_seq_accuracy,eval_F1,eval_ideal_threshold
2
+ 0,1000,0.05519681051373482,0.26125814050159346,0.4350057273768614,0.37631821632385254
3
+ 1,2000,0.03893322870135307,0.3647637522516281,0.5378662244333886,0.30903738737106323
4
+ 2,3000,0.028525250032544136,0.3755715671331578,0.5549429512213799,0.2456846535205841
5
+ 3,4000,0.02174016274511814,0.3655951226271304,0.5509549274255157,0.23982320725917816
6
+ 4,5000,0.016855748370289803,0.3618539559373701,0.5402715630303966,0.28385332226753235
7
+ 5,6000,0.013312540017068386,0.38908133573507,0.5628882897667331,0.2723926901817322
8
+ 6,7000,0.010887965559959412,0.39961202715809896,0.5627312880506148,0.2942980229854584
9
+ 7,8000,0.008999842219054699,0.3880421227656921,0.5705256667180515,0.2603302597999573
10
+ 8,9000,0.007666711695492268,0.3941388388527089,0.5710518150791188,0.2760649621486664
11
+ 9,10000,0.006694463547319174,0.41914923098240264,0.5723599484867997,0.30005908012390137
12
+ 10,11000,0.005955410189926624,0.40924206734100044,0.5766063901036127,0.257188618183136
13
+ 11,12000,0.005375804379582405,0.4158237494803935,0.5808003153952297,0.2415660172700882
14
+ 12,13000,0.004951318260282278,0.39815712900096994,0.5821285758228237,0.21721985936164856
15
+ 13,14000,0.004618560895323753,0.43536095330469726,0.5880176919983916,0.29595890641212463
16
+ 14,15000,0.004383981693536043,0.42337536372453927,0.5909629512599731,0.27418968081474304
17
+ 15,16000,0.004173209425061941,0.4216433421089095,0.5938036522553457,0.2998376786708832
18
+ 16,17000,0.004032590426504612,0.4490092836358598,0.5962818639389511,0.3073800504207611
19
+ 17,18000,0.003937114961445332,0.44852431758348343,0.5997752448226039,0.2759152352809906
20
+ 18,19000,0.0038168877363204956,0.4537203824303727,0.6064870986006633,0.3203374147415161
21
+ 19,20000,0.003732781857252121,0.4609255923513925,0.6107774711897419,0.3254184126853943
22
+ 20,21000,0.00369548168964684,0.4604406262990162,0.6098836502209429,0.3310704231262207
23
+ 21,22000,0.0036285948008298874,0.4467922959678537,0.6158394764851973,0.2818971574306488
24
+ 22,23000,0.0035786391235888004,0.44914784536511015,0.6145894977709394,0.2523304224014282
25
+ 23,24000,0.0035338157322257757,0.457807953443259,0.6218440618663273,0.2906527817249298
26
+ 24,25000,0.003507734276354313,0.4684079257309131,0.6231796361289281,0.31092220544815063
27
+ 25,26000,0.0034806893672794104,0.4759595399750589,0.6255520888204547,0.32073238492012024
28
+ 26,27000,0.0034628279972821474,0.475266731328807,0.6271426834704381,0.32266730070114136
multimolecule-rnamsm-finetuned/final_model/config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "multimolecule/rnamsm",
3
+ "architectures": [
4
+ "RnaMsmForTokenPrediction"
5
+ ],
6
+ "attention_bias": true,
7
+ "attention_dropout": 0.1,
8
+ "attention_type": "standard",
9
+ "bos_token_id": 1,
10
+ "embed_positions_msa": true,
11
+ "eos_token_id": 2,
12
+ "head": {
13
+ "act": null,
14
+ "bias": true,
15
+ "dropout": 0.0,
16
+ "hidden_size": 768,
17
+ "layer_norm_eps": 1e-12,
18
+ "num_labels": 2,
19
+ "output_name": null,
20
+ "problem_type": "single_label_classification",
21
+ "transform": null,
22
+ "transform_act": "gelu"
23
+ },
24
+ "hidden_act": "gelu",
25
+ "hidden_dropout": 0.1,
26
+ "hidden_size": 768,
27
+ "initializer_range": 0.02,
28
+ "intermediate_size": 3072,
29
+ "layer_norm_eps": 1e-12,
30
+ "layer_type": "standard",
31
+ "lm_head": {
32
+ "act": null,
33
+ "bias": true,
34
+ "dropout": 0.0,
35
+ "hidden_size": null,
36
+ "layer_norm_eps": 1e-12,
37
+ "output_name": null,
38
+ "transform": "nonlinear",
39
+ "transform_act": "gelu"
40
+ },
41
+ "mask_token_id": 4,
42
+ "max_position_embeddings": 1024,
43
+ "max_tokens_per_msa": 16384,
44
+ "model_type": "rnamsm",
45
+ "null_token_id": 5,
46
+ "num_attention_heads": 12,
47
+ "num_hidden_layers": 10,
48
+ "pad_token_id": 0,
49
+ "position_embedding_type": "absolute",
50
+ "problem_type": "single_label_classification",
51
+ "torch_dtype": "float32",
52
+ "transformers_version": "4.46.3",
53
+ "unk_token_id": 3,
54
+ "use_cache": true,
55
+ "vocab_size": 26
56
+ }
multimolecule-rnamsm-finetuned/final_model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6de68264d9e151361883f5987ea8b852a333daa7b5688c2a14a1e29ef0537c2
3
+ size 383723048
multimolecule-rnamsm-finetuned/final_model/special_tokens_map.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<null>"
4
+ ],
5
+ "bos_token": {
6
+ "content": "<cls>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "cls_token": {
13
+ "content": "<cls>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "eos_token": {
20
+ "content": "<eos>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ },
26
+ "mask_token": {
27
+ "content": "<mask>",
28
+ "lstrip": false,
29
+ "normalized": false,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ },
33
+ "pad_token": {
34
+ "content": "<pad>",
35
+ "lstrip": false,
36
+ "normalized": false,
37
+ "rstrip": false,
38
+ "single_word": false
39
+ },
40
+ "sep_token": {
41
+ "content": "<eos>",
42
+ "lstrip": false,
43
+ "normalized": false,
44
+ "rstrip": false,
45
+ "single_word": false
46
+ },
47
+ "unk_token": {
48
+ "content": "<unk>",
49
+ "lstrip": false,
50
+ "normalized": false,
51
+ "rstrip": false,
52
+ "single_word": false
53
+ }
54
+ }
multimolecule-rnamsm-finetuned/final_model/tokenizer_config.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<cls>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "<eos>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "<mask>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "5": {
44
+ "content": "<null>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ }
51
+ },
52
+ "additional_special_tokens": [
53
+ "<null>"
54
+ ],
55
+ "bos_token": "<cls>",
56
+ "clean_up_tokenization_spaces": true,
57
+ "cls_token": "<cls>",
58
+ "codon": false,
59
+ "eos_token": "<eos>",
60
+ "mask_token": "<mask>",
61
+ "model_max_length": 1024,
62
+ "nmers": 1,
63
+ "pad_token": "<pad>",
64
+ "replace_T_with_U": true,
65
+ "sep_token": "<eos>",
66
+ "tokenizer_class": "RnaTokenizer",
67
+ "unk_token": "<unk>"
68
+ }
multimolecule-rnamsm-finetuned/final_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39e3155af0bbdf8ab54bf23ec057cf5d560d2f5535e7182f14a0b454f8e911f2
3
+ size 5368
multimolecule-rnamsm-finetuned/final_model/vocab.txt ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <pad>
2
+ <cls>
3
+ <eos>
4
+ <unk>
5
+ <mask>
6
+ <null>
7
+ A
8
+ C
9
+ G
10
+ U
11
+ N
12
+ R
13
+ Y
14
+ S
15
+ W
16
+ K
17
+ M
18
+ B
19
+ D
20
+ H
21
+ V
22
+ .
23
+ X
24
+ *
25
+ -
26
+ I
multimolecule-utrlm-te_el-finetuned/eval_metrics_utrlm.csv ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,step,eval_loss,eval_seq_accuracy,eval_F1,eval_ideal_threshold
2
+ 0,1000,0.0561181865632534,0.3783428017181654,0.5486725663716814,0.3837876319885254
3
+ 1,2000,0.03955318406224251,0.4335596508244423,0.5831632040350666,0.2904277443885803
4
+ 2,3000,0.028913654386997223,0.43196619093806293,0.5898072602577746,0.24150322377681732
5
+ 3,4000,0.02200213074684143,0.4532354163779964,0.591035623512325,0.29686254262924194
6
+ 4,5000,0.016823219135403633,0.4544824719412498,0.6033627676303375,0.3609362840652466
7
+ 5,6000,0.013310940936207771,0.4539975058888735,0.6002348463376118,0.3392348885536194
8
+ 6,7000,0.010862333700060844,0.4561452126922544,0.6046418084550098,0.35353171825408936
9
+ 7,8000,0.008927548304200172,0.447623666343356,0.6070160035119927,0.34765511751174927
10
+ 8,9000,0.007522304076701403,0.4575308299847582,0.6139172594682669,0.32793378829956055
11
+ 9,10000,0.006470794323831797,0.4678536788139116,0.6153034300791557,0.3684046268463135
12
+ 10,11000,0.005790261551737785,0.4670915893030345,0.6154158050521024,0.3683505356311798
13
+ 11,12000,0.005166235379874706,0.4625883331023971,0.6170426669883782,0.35821619629859924
14
+ 12,13000,0.004909387789666653,0.45794651517250934,0.6159547338221231,0.3005988299846649
15
+ 13,14000,0.004459976684302092,0.4526118885963697,0.6179464919895825,0.32433372735977173
16
+ 14,15000,0.0041742813773453236,0.46300401829014826,0.6182023559786993,0.35960569977760315
17
+ 15,16000,0.003995656967163086,0.46251905223777195,0.6188691283196427,0.3561147153377533
18
+ 16,17000,0.003880647011101246,0.46799224054316196,0.62408,0.3329227566719055
19
+ 17,18000,0.0037868961226195097,0.47485104614105583,0.6216785482825664,0.3749973177909851
20
+ 18,19000,0.003646288299933076,0.460371345434391,0.6252417985867119,0.3197007477283478
21
+ 19,20000,0.003600230673328042,0.47540529305805734,0.6271309687323261,0.3553476631641388
22
+ 20,21000,0.0035520156379789114,0.4833033116253291,0.6279894072112446,0.38413992524147034
23
+ 21,22000,0.0035468924324959517,0.47540529305805734,0.6282247046532187,0.34830567240715027
24
+ 22,23000,0.003507823683321476,0.47492032700568104,0.6284495936921716,0.3478001654148102
25
+ 23,24000,0.0035892806481570005,0.47658306775668563,0.6291899722345178,0.3500838279724121
26
+ 24,25000,0.003446375485509634,0.4720798115560482,0.6297732816186173,0.34543856978416443
27
+ 25,26000,0.0035068897996097803,0.4738811140363032,0.6302181308785271,0.34250420331954956
28
+ 26,27000,0.0034666049759835005,0.46833864486628796,0.6296649198030808,0.32472842931747437
multimolecule-utrlm-te_el-finetuned/final_model/config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "multimolecule/utrlm-te_el",
3
+ "architectures": [
4
+ "UtrLmForTokenPrediction"
5
+ ],
6
+ "attention_dropout": 0.1,
7
+ "bos_token_id": 1,
8
+ "emb_layer_norm_before": false,
9
+ "eos_token_id": 2,
10
+ "head": {
11
+ "act": null,
12
+ "bias": true,
13
+ "dropout": 0.0,
14
+ "hidden_size": 128,
15
+ "layer_norm_eps": 1e-12,
16
+ "num_labels": 2,
17
+ "output_name": null,
18
+ "problem_type": "single_label_classification",
19
+ "transform": null,
20
+ "transform_act": "gelu"
21
+ },
22
+ "hidden_act": "gelu",
23
+ "hidden_dropout": 0.1,
24
+ "hidden_size": 128,
25
+ "initializer_range": 0.02,
26
+ "intermediate_size": 512,
27
+ "layer_norm_eps": 1e-12,
28
+ "lm_head": {
29
+ "act": null,
30
+ "bias": true,
31
+ "dropout": 0.0,
32
+ "hidden_size": null,
33
+ "layer_norm_eps": 1e-12,
34
+ "output_name": null,
35
+ "transform": "nonlinear",
36
+ "transform_act": "gelu"
37
+ },
38
+ "mask_token_id": 4,
39
+ "max_position_embeddings": 1026,
40
+ "mfe_head": null,
41
+ "model_type": "utrlm",
42
+ "null_token_id": 5,
43
+ "num_attention_heads": 16,
44
+ "num_hidden_layers": 6,
45
+ "pad_token_id": 0,
46
+ "position_embedding_type": "rotary",
47
+ "problem_type": "single_label_classification",
48
+ "ss_head": null,
49
+ "token_dropout": false,
50
+ "torch_dtype": "float32",
51
+ "transformers_version": "4.46.3",
52
+ "unk_token_id": 3,
53
+ "use_cache": true,
54
+ "vocab_size": 26
55
+ }
multimolecule-utrlm-te_el-finetuned/final_model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bd6c8ec2dd174580c271d89b87d4e2a67f0973a1759a72fa7076565e3109ffb
3
+ size 4852280
multimolecule-utrlm-te_el-finetuned/final_model/special_tokens_map.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<null>"
4
+ ],
5
+ "bos_token": {
6
+ "content": "<cls>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "cls_token": {
13
+ "content": "<cls>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "eos_token": {
20
+ "content": "<eos>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ },
26
+ "mask_token": {
27
+ "content": "<mask>",
28
+ "lstrip": false,
29
+ "normalized": false,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ },
33
+ "pad_token": {
34
+ "content": "<pad>",
35
+ "lstrip": false,
36
+ "normalized": false,
37
+ "rstrip": false,
38
+ "single_word": false
39
+ },
40
+ "sep_token": {
41
+ "content": "<eos>",
42
+ "lstrip": false,
43
+ "normalized": false,
44
+ "rstrip": false,
45
+ "single_word": false
46
+ },
47
+ "unk_token": {
48
+ "content": "<unk>",
49
+ "lstrip": false,
50
+ "normalized": false,
51
+ "rstrip": false,
52
+ "single_word": false
53
+ }
54
+ }
multimolecule-utrlm-te_el-finetuned/final_model/tokenizer_config.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<cls>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "<eos>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "<mask>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "5": {
44
+ "content": "<null>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ }
51
+ },
52
+ "additional_special_tokens": [
53
+ "<null>"
54
+ ],
55
+ "bos_token": "<cls>",
56
+ "clean_up_tokenization_spaces": true,
57
+ "cls_token": "<cls>",
58
+ "codon": false,
59
+ "eos_token": "<eos>",
60
+ "mask_token": "<mask>",
61
+ "model_max_length": 1000000000000000019884624838656,
62
+ "nmers": 1,
63
+ "pad_token": "<pad>",
64
+ "replace_T_with_U": true,
65
+ "sep_token": "<eos>",
66
+ "tokenizer_class": "RnaTokenizer",
67
+ "unk_token": "<unk>"
68
+ }
multimolecule-utrlm-te_el-finetuned/final_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ca768245670e4e0088ed896f94c5e11f380a58fa8c478011dff853949d6e2fd
3
+ size 5368
multimolecule-utrlm-te_el-finetuned/final_model/vocab.txt ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <pad>
2
+ <cls>
3
+ <eos>
4
+ <unk>
5
+ <mask>
6
+ <null>
7
+ A
8
+ C
9
+ G
10
+ U
11
+ N
12
+ R
13
+ Y
14
+ S
15
+ W
16
+ K
17
+ M
18
+ B
19
+ D
20
+ H
21
+ V
22
+ .
23
+ X
24
+ *
25
+ -
26
+ I