Upload 14 files
Browse filesAdded fine-tuned RNA-MSM and UTR-LM models
- multimolecule-rnamsm-finetuned/eval_metrics_rnamsm.csv +28 -0
- multimolecule-rnamsm-finetuned/final_model/config.json +56 -0
- multimolecule-rnamsm-finetuned/final_model/model.safetensors +3 -0
- multimolecule-rnamsm-finetuned/final_model/special_tokens_map.json +54 -0
- multimolecule-rnamsm-finetuned/final_model/tokenizer_config.json +68 -0
- multimolecule-rnamsm-finetuned/final_model/training_args.bin +3 -0
- multimolecule-rnamsm-finetuned/final_model/vocab.txt +26 -0
- multimolecule-utrlm-te_el-finetuned/eval_metrics_utrlm.csv +28 -0
- multimolecule-utrlm-te_el-finetuned/final_model/config.json +55 -0
- multimolecule-utrlm-te_el-finetuned/final_model/model.safetensors +3 -0
- multimolecule-utrlm-te_el-finetuned/final_model/special_tokens_map.json +54 -0
- multimolecule-utrlm-te_el-finetuned/final_model/tokenizer_config.json +68 -0
- multimolecule-utrlm-te_el-finetuned/final_model/training_args.bin +3 -0
- multimolecule-utrlm-te_el-finetuned/final_model/vocab.txt +26 -0
multimolecule-rnamsm-finetuned/eval_metrics_rnamsm.csv
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
,step,eval_loss,eval_seq_accuracy,eval_F1,eval_ideal_threshold
|
| 2 |
+
0,1000,0.05519681051373482,0.26125814050159346,0.4350057273768614,0.37631821632385254
|
| 3 |
+
1,2000,0.03893322870135307,0.3647637522516281,0.5378662244333886,0.30903738737106323
|
| 4 |
+
2,3000,0.028525250032544136,0.3755715671331578,0.5549429512213799,0.2456846535205841
|
| 5 |
+
3,4000,0.02174016274511814,0.3655951226271304,0.5509549274255157,0.23982320725917816
|
| 6 |
+
4,5000,0.016855748370289803,0.3618539559373701,0.5402715630303966,0.28385332226753235
|
| 7 |
+
5,6000,0.013312540017068386,0.38908133573507,0.5628882897667331,0.2723926901817322
|
| 8 |
+
6,7000,0.010887965559959412,0.39961202715809896,0.5627312880506148,0.2942980229854584
|
| 9 |
+
7,8000,0.008999842219054699,0.3880421227656921,0.5705256667180515,0.2603302597999573
|
| 10 |
+
8,9000,0.007666711695492268,0.3941388388527089,0.5710518150791188,0.2760649621486664
|
| 11 |
+
9,10000,0.006694463547319174,0.41914923098240264,0.5723599484867997,0.30005908012390137
|
| 12 |
+
10,11000,0.005955410189926624,0.40924206734100044,0.5766063901036127,0.257188618183136
|
| 13 |
+
11,12000,0.005375804379582405,0.4158237494803935,0.5808003153952297,0.2415660172700882
|
| 14 |
+
12,13000,0.004951318260282278,0.39815712900096994,0.5821285758228237,0.21721985936164856
|
| 15 |
+
13,14000,0.004618560895323753,0.43536095330469726,0.5880176919983916,0.29595890641212463
|
| 16 |
+
14,15000,0.004383981693536043,0.42337536372453927,0.5909629512599731,0.27418968081474304
|
| 17 |
+
15,16000,0.004173209425061941,0.4216433421089095,0.5938036522553457,0.2998376786708832
|
| 18 |
+
16,17000,0.004032590426504612,0.4490092836358598,0.5962818639389511,0.3073800504207611
|
| 19 |
+
17,18000,0.003937114961445332,0.44852431758348343,0.5997752448226039,0.2759152352809906
|
| 20 |
+
18,19000,0.0038168877363204956,0.4537203824303727,0.6064870986006633,0.3203374147415161
|
| 21 |
+
19,20000,0.003732781857252121,0.4609255923513925,0.6107774711897419,0.3254184126853943
|
| 22 |
+
20,21000,0.00369548168964684,0.4604406262990162,0.6098836502209429,0.3310704231262207
|
| 23 |
+
21,22000,0.0036285948008298874,0.4467922959678537,0.6158394764851973,0.2818971574306488
|
| 24 |
+
22,23000,0.0035786391235888004,0.44914784536511015,0.6145894977709394,0.2523304224014282
|
| 25 |
+
23,24000,0.0035338157322257757,0.457807953443259,0.6218440618663273,0.2906527817249298
|
| 26 |
+
24,25000,0.003507734276354313,0.4684079257309131,0.6231796361289281,0.31092220544815063
|
| 27 |
+
25,26000,0.0034806893672794104,0.4759595399750589,0.6255520888204547,0.32073238492012024
|
| 28 |
+
26,27000,0.0034628279972821474,0.475266731328807,0.6271426834704381,0.32266730070114136
|
multimolecule-rnamsm-finetuned/final_model/config.json
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "multimolecule/rnamsm",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"RnaMsmForTokenPrediction"
|
| 5 |
+
],
|
| 6 |
+
"attention_bias": true,
|
| 7 |
+
"attention_dropout": 0.1,
|
| 8 |
+
"attention_type": "standard",
|
| 9 |
+
"bos_token_id": 1,
|
| 10 |
+
"embed_positions_msa": true,
|
| 11 |
+
"eos_token_id": 2,
|
| 12 |
+
"head": {
|
| 13 |
+
"act": null,
|
| 14 |
+
"bias": true,
|
| 15 |
+
"dropout": 0.0,
|
| 16 |
+
"hidden_size": 768,
|
| 17 |
+
"layer_norm_eps": 1e-12,
|
| 18 |
+
"num_labels": 2,
|
| 19 |
+
"output_name": null,
|
| 20 |
+
"problem_type": "single_label_classification",
|
| 21 |
+
"transform": null,
|
| 22 |
+
"transform_act": "gelu"
|
| 23 |
+
},
|
| 24 |
+
"hidden_act": "gelu",
|
| 25 |
+
"hidden_dropout": 0.1,
|
| 26 |
+
"hidden_size": 768,
|
| 27 |
+
"initializer_range": 0.02,
|
| 28 |
+
"intermediate_size": 3072,
|
| 29 |
+
"layer_norm_eps": 1e-12,
|
| 30 |
+
"layer_type": "standard",
|
| 31 |
+
"lm_head": {
|
| 32 |
+
"act": null,
|
| 33 |
+
"bias": true,
|
| 34 |
+
"dropout": 0.0,
|
| 35 |
+
"hidden_size": null,
|
| 36 |
+
"layer_norm_eps": 1e-12,
|
| 37 |
+
"output_name": null,
|
| 38 |
+
"transform": "nonlinear",
|
| 39 |
+
"transform_act": "gelu"
|
| 40 |
+
},
|
| 41 |
+
"mask_token_id": 4,
|
| 42 |
+
"max_position_embeddings": 1024,
|
| 43 |
+
"max_tokens_per_msa": 16384,
|
| 44 |
+
"model_type": "rnamsm",
|
| 45 |
+
"null_token_id": 5,
|
| 46 |
+
"num_attention_heads": 12,
|
| 47 |
+
"num_hidden_layers": 10,
|
| 48 |
+
"pad_token_id": 0,
|
| 49 |
+
"position_embedding_type": "absolute",
|
| 50 |
+
"problem_type": "single_label_classification",
|
| 51 |
+
"torch_dtype": "float32",
|
| 52 |
+
"transformers_version": "4.46.3",
|
| 53 |
+
"unk_token_id": 3,
|
| 54 |
+
"use_cache": true,
|
| 55 |
+
"vocab_size": 26
|
| 56 |
+
}
|
multimolecule-rnamsm-finetuned/final_model/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e6de68264d9e151361883f5987ea8b852a333daa7b5688c2a14a1e29ef0537c2
|
| 3 |
+
size 383723048
|
multimolecule-rnamsm-finetuned/final_model/special_tokens_map.json
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<null>"
|
| 4 |
+
],
|
| 5 |
+
"bos_token": {
|
| 6 |
+
"content": "<cls>",
|
| 7 |
+
"lstrip": false,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false
|
| 11 |
+
},
|
| 12 |
+
"cls_token": {
|
| 13 |
+
"content": "<cls>",
|
| 14 |
+
"lstrip": false,
|
| 15 |
+
"normalized": false,
|
| 16 |
+
"rstrip": false,
|
| 17 |
+
"single_word": false
|
| 18 |
+
},
|
| 19 |
+
"eos_token": {
|
| 20 |
+
"content": "<eos>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false
|
| 25 |
+
},
|
| 26 |
+
"mask_token": {
|
| 27 |
+
"content": "<mask>",
|
| 28 |
+
"lstrip": false,
|
| 29 |
+
"normalized": false,
|
| 30 |
+
"rstrip": false,
|
| 31 |
+
"single_word": false
|
| 32 |
+
},
|
| 33 |
+
"pad_token": {
|
| 34 |
+
"content": "<pad>",
|
| 35 |
+
"lstrip": false,
|
| 36 |
+
"normalized": false,
|
| 37 |
+
"rstrip": false,
|
| 38 |
+
"single_word": false
|
| 39 |
+
},
|
| 40 |
+
"sep_token": {
|
| 41 |
+
"content": "<eos>",
|
| 42 |
+
"lstrip": false,
|
| 43 |
+
"normalized": false,
|
| 44 |
+
"rstrip": false,
|
| 45 |
+
"single_word": false
|
| 46 |
+
},
|
| 47 |
+
"unk_token": {
|
| 48 |
+
"content": "<unk>",
|
| 49 |
+
"lstrip": false,
|
| 50 |
+
"normalized": false,
|
| 51 |
+
"rstrip": false,
|
| 52 |
+
"single_word": false
|
| 53 |
+
}
|
| 54 |
+
}
|
multimolecule-rnamsm-finetuned/final_model/tokenizer_config.json
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "<pad>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "<cls>",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "<eos>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"3": {
|
| 28 |
+
"content": "<unk>",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"4": {
|
| 36 |
+
"content": "<mask>",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
},
|
| 43 |
+
"5": {
|
| 44 |
+
"content": "<null>",
|
| 45 |
+
"lstrip": false,
|
| 46 |
+
"normalized": false,
|
| 47 |
+
"rstrip": false,
|
| 48 |
+
"single_word": false,
|
| 49 |
+
"special": true
|
| 50 |
+
}
|
| 51 |
+
},
|
| 52 |
+
"additional_special_tokens": [
|
| 53 |
+
"<null>"
|
| 54 |
+
],
|
| 55 |
+
"bos_token": "<cls>",
|
| 56 |
+
"clean_up_tokenization_spaces": true,
|
| 57 |
+
"cls_token": "<cls>",
|
| 58 |
+
"codon": false,
|
| 59 |
+
"eos_token": "<eos>",
|
| 60 |
+
"mask_token": "<mask>",
|
| 61 |
+
"model_max_length": 1024,
|
| 62 |
+
"nmers": 1,
|
| 63 |
+
"pad_token": "<pad>",
|
| 64 |
+
"replace_T_with_U": true,
|
| 65 |
+
"sep_token": "<eos>",
|
| 66 |
+
"tokenizer_class": "RnaTokenizer",
|
| 67 |
+
"unk_token": "<unk>"
|
| 68 |
+
}
|
multimolecule-rnamsm-finetuned/final_model/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:39e3155af0bbdf8ab54bf23ec057cf5d560d2f5535e7182f14a0b454f8e911f2
|
| 3 |
+
size 5368
|
multimolecule-rnamsm-finetuned/final_model/vocab.txt
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<pad>
|
| 2 |
+
<cls>
|
| 3 |
+
<eos>
|
| 4 |
+
<unk>
|
| 5 |
+
<mask>
|
| 6 |
+
<null>
|
| 7 |
+
A
|
| 8 |
+
C
|
| 9 |
+
G
|
| 10 |
+
U
|
| 11 |
+
N
|
| 12 |
+
R
|
| 13 |
+
Y
|
| 14 |
+
S
|
| 15 |
+
W
|
| 16 |
+
K
|
| 17 |
+
M
|
| 18 |
+
B
|
| 19 |
+
D
|
| 20 |
+
H
|
| 21 |
+
V
|
| 22 |
+
.
|
| 23 |
+
X
|
| 24 |
+
*
|
| 25 |
+
-
|
| 26 |
+
I
|
multimolecule-utrlm-te_el-finetuned/eval_metrics_utrlm.csv
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
,step,eval_loss,eval_seq_accuracy,eval_F1,eval_ideal_threshold
|
| 2 |
+
0,1000,0.0561181865632534,0.3783428017181654,0.5486725663716814,0.3837876319885254
|
| 3 |
+
1,2000,0.03955318406224251,0.4335596508244423,0.5831632040350666,0.2904277443885803
|
| 4 |
+
2,3000,0.028913654386997223,0.43196619093806293,0.5898072602577746,0.24150322377681732
|
| 5 |
+
3,4000,0.02200213074684143,0.4532354163779964,0.591035623512325,0.29686254262924194
|
| 6 |
+
4,5000,0.016823219135403633,0.4544824719412498,0.6033627676303375,0.3609362840652466
|
| 7 |
+
5,6000,0.013310940936207771,0.4539975058888735,0.6002348463376118,0.3392348885536194
|
| 8 |
+
6,7000,0.010862333700060844,0.4561452126922544,0.6046418084550098,0.35353171825408936
|
| 9 |
+
7,8000,0.008927548304200172,0.447623666343356,0.6070160035119927,0.34765511751174927
|
| 10 |
+
8,9000,0.007522304076701403,0.4575308299847582,0.6139172594682669,0.32793378829956055
|
| 11 |
+
9,10000,0.006470794323831797,0.4678536788139116,0.6153034300791557,0.3684046268463135
|
| 12 |
+
10,11000,0.005790261551737785,0.4670915893030345,0.6154158050521024,0.3683505356311798
|
| 13 |
+
11,12000,0.005166235379874706,0.4625883331023971,0.6170426669883782,0.35821619629859924
|
| 14 |
+
12,13000,0.004909387789666653,0.45794651517250934,0.6159547338221231,0.3005988299846649
|
| 15 |
+
13,14000,0.004459976684302092,0.4526118885963697,0.6179464919895825,0.32433372735977173
|
| 16 |
+
14,15000,0.0041742813773453236,0.46300401829014826,0.6182023559786993,0.35960569977760315
|
| 17 |
+
15,16000,0.003995656967163086,0.46251905223777195,0.6188691283196427,0.3561147153377533
|
| 18 |
+
16,17000,0.003880647011101246,0.46799224054316196,0.62408,0.3329227566719055
|
| 19 |
+
17,18000,0.0037868961226195097,0.47485104614105583,0.6216785482825664,0.3749973177909851
|
| 20 |
+
18,19000,0.003646288299933076,0.460371345434391,0.6252417985867119,0.3197007477283478
|
| 21 |
+
19,20000,0.003600230673328042,0.47540529305805734,0.6271309687323261,0.3553476631641388
|
| 22 |
+
20,21000,0.0035520156379789114,0.4833033116253291,0.6279894072112446,0.38413992524147034
|
| 23 |
+
21,22000,0.0035468924324959517,0.47540529305805734,0.6282247046532187,0.34830567240715027
|
| 24 |
+
22,23000,0.003507823683321476,0.47492032700568104,0.6284495936921716,0.3478001654148102
|
| 25 |
+
23,24000,0.0035892806481570005,0.47658306775668563,0.6291899722345178,0.3500838279724121
|
| 26 |
+
24,25000,0.003446375485509634,0.4720798115560482,0.6297732816186173,0.34543856978416443
|
| 27 |
+
25,26000,0.0035068897996097803,0.4738811140363032,0.6302181308785271,0.34250420331954956
|
| 28 |
+
26,27000,0.0034666049759835005,0.46833864486628796,0.6296649198030808,0.32472842931747437
|
multimolecule-utrlm-te_el-finetuned/final_model/config.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "multimolecule/utrlm-te_el",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"UtrLmForTokenPrediction"
|
| 5 |
+
],
|
| 6 |
+
"attention_dropout": 0.1,
|
| 7 |
+
"bos_token_id": 1,
|
| 8 |
+
"emb_layer_norm_before": false,
|
| 9 |
+
"eos_token_id": 2,
|
| 10 |
+
"head": {
|
| 11 |
+
"act": null,
|
| 12 |
+
"bias": true,
|
| 13 |
+
"dropout": 0.0,
|
| 14 |
+
"hidden_size": 128,
|
| 15 |
+
"layer_norm_eps": 1e-12,
|
| 16 |
+
"num_labels": 2,
|
| 17 |
+
"output_name": null,
|
| 18 |
+
"problem_type": "single_label_classification",
|
| 19 |
+
"transform": null,
|
| 20 |
+
"transform_act": "gelu"
|
| 21 |
+
},
|
| 22 |
+
"hidden_act": "gelu",
|
| 23 |
+
"hidden_dropout": 0.1,
|
| 24 |
+
"hidden_size": 128,
|
| 25 |
+
"initializer_range": 0.02,
|
| 26 |
+
"intermediate_size": 512,
|
| 27 |
+
"layer_norm_eps": 1e-12,
|
| 28 |
+
"lm_head": {
|
| 29 |
+
"act": null,
|
| 30 |
+
"bias": true,
|
| 31 |
+
"dropout": 0.0,
|
| 32 |
+
"hidden_size": null,
|
| 33 |
+
"layer_norm_eps": 1e-12,
|
| 34 |
+
"output_name": null,
|
| 35 |
+
"transform": "nonlinear",
|
| 36 |
+
"transform_act": "gelu"
|
| 37 |
+
},
|
| 38 |
+
"mask_token_id": 4,
|
| 39 |
+
"max_position_embeddings": 1026,
|
| 40 |
+
"mfe_head": null,
|
| 41 |
+
"model_type": "utrlm",
|
| 42 |
+
"null_token_id": 5,
|
| 43 |
+
"num_attention_heads": 16,
|
| 44 |
+
"num_hidden_layers": 6,
|
| 45 |
+
"pad_token_id": 0,
|
| 46 |
+
"position_embedding_type": "rotary",
|
| 47 |
+
"problem_type": "single_label_classification",
|
| 48 |
+
"ss_head": null,
|
| 49 |
+
"token_dropout": false,
|
| 50 |
+
"torch_dtype": "float32",
|
| 51 |
+
"transformers_version": "4.46.3",
|
| 52 |
+
"unk_token_id": 3,
|
| 53 |
+
"use_cache": true,
|
| 54 |
+
"vocab_size": 26
|
| 55 |
+
}
|
multimolecule-utrlm-te_el-finetuned/final_model/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7bd6c8ec2dd174580c271d89b87d4e2a67f0973a1759a72fa7076565e3109ffb
|
| 3 |
+
size 4852280
|
multimolecule-utrlm-te_el-finetuned/final_model/special_tokens_map.json
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<null>"
|
| 4 |
+
],
|
| 5 |
+
"bos_token": {
|
| 6 |
+
"content": "<cls>",
|
| 7 |
+
"lstrip": false,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false
|
| 11 |
+
},
|
| 12 |
+
"cls_token": {
|
| 13 |
+
"content": "<cls>",
|
| 14 |
+
"lstrip": false,
|
| 15 |
+
"normalized": false,
|
| 16 |
+
"rstrip": false,
|
| 17 |
+
"single_word": false
|
| 18 |
+
},
|
| 19 |
+
"eos_token": {
|
| 20 |
+
"content": "<eos>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false
|
| 25 |
+
},
|
| 26 |
+
"mask_token": {
|
| 27 |
+
"content": "<mask>",
|
| 28 |
+
"lstrip": false,
|
| 29 |
+
"normalized": false,
|
| 30 |
+
"rstrip": false,
|
| 31 |
+
"single_word": false
|
| 32 |
+
},
|
| 33 |
+
"pad_token": {
|
| 34 |
+
"content": "<pad>",
|
| 35 |
+
"lstrip": false,
|
| 36 |
+
"normalized": false,
|
| 37 |
+
"rstrip": false,
|
| 38 |
+
"single_word": false
|
| 39 |
+
},
|
| 40 |
+
"sep_token": {
|
| 41 |
+
"content": "<eos>",
|
| 42 |
+
"lstrip": false,
|
| 43 |
+
"normalized": false,
|
| 44 |
+
"rstrip": false,
|
| 45 |
+
"single_word": false
|
| 46 |
+
},
|
| 47 |
+
"unk_token": {
|
| 48 |
+
"content": "<unk>",
|
| 49 |
+
"lstrip": false,
|
| 50 |
+
"normalized": false,
|
| 51 |
+
"rstrip": false,
|
| 52 |
+
"single_word": false
|
| 53 |
+
}
|
| 54 |
+
}
|
multimolecule-utrlm-te_el-finetuned/final_model/tokenizer_config.json
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "<pad>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "<cls>",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "<eos>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"3": {
|
| 28 |
+
"content": "<unk>",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"4": {
|
| 36 |
+
"content": "<mask>",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
},
|
| 43 |
+
"5": {
|
| 44 |
+
"content": "<null>",
|
| 45 |
+
"lstrip": false,
|
| 46 |
+
"normalized": false,
|
| 47 |
+
"rstrip": false,
|
| 48 |
+
"single_word": false,
|
| 49 |
+
"special": true
|
| 50 |
+
}
|
| 51 |
+
},
|
| 52 |
+
"additional_special_tokens": [
|
| 53 |
+
"<null>"
|
| 54 |
+
],
|
| 55 |
+
"bos_token": "<cls>",
|
| 56 |
+
"clean_up_tokenization_spaces": true,
|
| 57 |
+
"cls_token": "<cls>",
|
| 58 |
+
"codon": false,
|
| 59 |
+
"eos_token": "<eos>",
|
| 60 |
+
"mask_token": "<mask>",
|
| 61 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 62 |
+
"nmers": 1,
|
| 63 |
+
"pad_token": "<pad>",
|
| 64 |
+
"replace_T_with_U": true,
|
| 65 |
+
"sep_token": "<eos>",
|
| 66 |
+
"tokenizer_class": "RnaTokenizer",
|
| 67 |
+
"unk_token": "<unk>"
|
| 68 |
+
}
|
multimolecule-utrlm-te_el-finetuned/final_model/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2ca768245670e4e0088ed896f94c5e11f380a58fa8c478011dff853949d6e2fd
|
| 3 |
+
size 5368
|
multimolecule-utrlm-te_el-finetuned/final_model/vocab.txt
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<pad>
|
| 2 |
+
<cls>
|
| 3 |
+
<eos>
|
| 4 |
+
<unk>
|
| 5 |
+
<mask>
|
| 6 |
+
<null>
|
| 7 |
+
A
|
| 8 |
+
C
|
| 9 |
+
G
|
| 10 |
+
U
|
| 11 |
+
N
|
| 12 |
+
R
|
| 13 |
+
Y
|
| 14 |
+
S
|
| 15 |
+
W
|
| 16 |
+
K
|
| 17 |
+
M
|
| 18 |
+
B
|
| 19 |
+
D
|
| 20 |
+
H
|
| 21 |
+
V
|
| 22 |
+
.
|
| 23 |
+
X
|
| 24 |
+
*
|
| 25 |
+
-
|
| 26 |
+
I
|