Upload RNAFM (#1)
Browse files- Upload RNAFM (e0927cab4c4c4f0b8d1470fc823c87b2d74b6ddf)
Co-authored-by: Rodenas <qrodenas@users.noreply.huggingface.co>
- multimolecule-rnafm-finetuned/eval_metrics_rnafm.csv +37 -0
- multimolecule-rnafm-finetuned/final_model/config.json +54 -0
- multimolecule-rnafm-finetuned/final_model/model.safetensors +3 -0
- multimolecule-rnafm-finetuned/final_model/special_tokens_map.json +54 -0
- multimolecule-rnafm-finetuned/final_model/tokenizer_config.json +68 -0
- multimolecule-rnafm-finetuned/final_model/training_args.bin +3 -0
- multimolecule-rnafm-finetuned/final_model/vocab.txt +26 -0
multimolecule-rnafm-finetuned/eval_metrics_rnafm.csv
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
,step,eval_loss,eval_seq_accuracy,eval_F1,eval_ideal_threshold
|
| 2 |
+
0,1000,0.05654533579945564,0.26735485658861025,0.41148063781321187,0.377992182970047
|
| 3 |
+
1,2000,0.03999479115009308,0.40667867534986835,0.5678379892739934,0.28405293822288513
|
| 4 |
+
2,3000,0.029324576258659363,0.4106969654981294,0.5714511727247558,0.309300035238266
|
| 5 |
+
3,4000,0.022111676633358,0.42275183594291255,0.5634552566842763,0.2923034131526947
|
| 6 |
+
4,5000,0.01701202243566513,0.4226825550782874,0.5901236525047558,0.25730085372924805
|
| 7 |
+
5,6000,0.013379547744989395,0.43619232368019956,0.5925659472422062,0.32337579131126404
|
| 8 |
+
6,7000,0.010823683813214302,0.41776361368989884,0.5878646441073512,0.2581407427787781
|
| 9 |
+
7,8000,0.008882307447493076,0.4336982125536927,0.5927243969948597,0.3128620982170105
|
| 10 |
+
8,9000,0.007649441249668598,0.4209505334626576,0.5834815956984146,0.2866809666156769
|
| 11 |
+
9,10000,0.006518819835036993,0.4351531107108217,0.597099845904619,0.3071558177471161
|
| 12 |
+
10,11000,0.005697860382497311,0.44104198420396284,0.608356326369728,0.24161586165428162
|
| 13 |
+
11,12000,0.005122509319335222,0.44034917555771097,0.6052124690435945,0.26517346501350403
|
| 14 |
+
12,13000,0.0048288097605109215,0.4484550367188582,0.6015997414559263,0.3307763636112213
|
| 15 |
+
13,14000,0.004520080517977476,0.4085492586947485,0.5873728650930723,0.2596055269241333
|
| 16 |
+
14,15000,0.0042026108130812645,0.46529028682277956,0.6141560798548095,0.30816274881362915
|
| 17 |
+
15,16000,0.004035507794469595,0.4568380213385063,0.6124875324157192,0.28661900758743286
|
| 18 |
+
16,17000,0.0039116437546908855,0.4631425800193986,0.6133354859541492,0.33768612146377563
|
| 19 |
+
17,18000,0.003864646190777421,0.44360537619509494,0.6070835639180963,0.34542742371559143
|
| 20 |
+
18,19000,0.0037449176888912916,0.4678536788139116,0.6191800701414923,0.2799220681190491
|
| 21 |
+
19,20000,0.0037235149648040533,0.46050990716364143,0.6152490789684446,0.3276229500770569
|
| 22 |
+
20,21000,0.0036446144804358482,0.4675765553554108,0.6257604867114953,0.30810418725013733
|
| 23 |
+
21,22000,0.003612295025959611,0.4717334072329223,0.6290419281346177,0.3131464123725891
|
| 24 |
+
22,23000,0.003577238880097866,0.4677843979492864,0.6298631766805473,0.29628777503967285
|
| 25 |
+
23,24000,0.003594768000766635,0.4914091727864764,0.6268041237113402,0.36331790685653687
|
| 26 |
+
24,25000,0.0035471622832119465,0.47692947207981157,0.6357695066549773,0.28095003962516785
|
| 27 |
+
25,26000,0.0035547115840017796,0.48510461410558403,0.628321478211103,0.3544747233390808
|
| 28 |
+
26,27000,0.0035257022827863693,0.4830954690314535,0.6359147296158015,0.29656633734703064
|
| 29 |
+
27,28000,0.0035129161551594734,0.49293335180823056,0.6351279644067105,0.36126482486724854
|
| 30 |
+
28,29000,0.003488227492198348,0.49369544131910764,0.6377565535460272,0.3647649884223938
|
| 31 |
+
29,30000,0.003478726837784052,0.4920327005681031,0.6421196967252782,0.31777191162109375
|
| 32 |
+
30,31000,0.0034667213913053274,0.501108493834003,0.6408713098308971,0.34830623865127563
|
| 33 |
+
31,32000,0.0034414229448884726,0.49653595676874046,0.6438972944469233,0.33935099840164185
|
| 34 |
+
32,33000,0.0034377938136458397,0.4927947900789802,0.643843892222804,0.3039156198501587
|
| 35 |
+
33,34000,0.0034172709565609694,0.5103921296937786,0.6450729927007299,0.3888712227344513
|
| 36 |
+
34,35000,0.003412203397601843,0.49521962034086187,0.6462359505134633,0.33549195528030396
|
| 37 |
+
35,36000,0.003401830093935132,0.49009283635859774,0.6467669654289373,0.3117576837539673
|
multimolecule-rnafm-finetuned/final_model/config.json
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "multimolecule/rnafm",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"RnaFmForTokenPrediction"
|
| 5 |
+
],
|
| 6 |
+
"attention_dropout": 0.1,
|
| 7 |
+
"bos_token_id": 1,
|
| 8 |
+
"codon": false,
|
| 9 |
+
"emb_layer_norm_before": true,
|
| 10 |
+
"eos_token_id": 2,
|
| 11 |
+
"head": {
|
| 12 |
+
"act": null,
|
| 13 |
+
"bias": true,
|
| 14 |
+
"dropout": 0.0,
|
| 15 |
+
"hidden_size": 640,
|
| 16 |
+
"layer_norm_eps": 1e-12,
|
| 17 |
+
"num_labels": 2,
|
| 18 |
+
"output_name": null,
|
| 19 |
+
"problem_type": "single_label_classification",
|
| 20 |
+
"transform": null,
|
| 21 |
+
"transform_act": "gelu"
|
| 22 |
+
},
|
| 23 |
+
"hidden_act": "gelu",
|
| 24 |
+
"hidden_dropout": 0.1,
|
| 25 |
+
"hidden_size": 640,
|
| 26 |
+
"initializer_range": 0.02,
|
| 27 |
+
"intermediate_size": 5120,
|
| 28 |
+
"layer_norm_eps": 1e-12,
|
| 29 |
+
"lm_head": {
|
| 30 |
+
"act": null,
|
| 31 |
+
"bias": true,
|
| 32 |
+
"dropout": 0.0,
|
| 33 |
+
"hidden_size": null,
|
| 34 |
+
"layer_norm_eps": 1e-12,
|
| 35 |
+
"output_name": null,
|
| 36 |
+
"transform": "nonlinear",
|
| 37 |
+
"transform_act": "gelu"
|
| 38 |
+
},
|
| 39 |
+
"mask_token_id": 4,
|
| 40 |
+
"max_position_embeddings": 1026,
|
| 41 |
+
"model_type": "rnafm",
|
| 42 |
+
"null_token_id": 5,
|
| 43 |
+
"num_attention_heads": 20,
|
| 44 |
+
"num_hidden_layers": 12,
|
| 45 |
+
"pad_token_id": 0,
|
| 46 |
+
"position_embedding_type": "absolute",
|
| 47 |
+
"problem_type": "single_label_classification",
|
| 48 |
+
"token_dropout": false,
|
| 49 |
+
"torch_dtype": "float32",
|
| 50 |
+
"transformers_version": "4.46.3",
|
| 51 |
+
"unk_token_id": 3,
|
| 52 |
+
"use_cache": true,
|
| 53 |
+
"vocab_size": 26
|
| 54 |
+
}
|
multimolecule-rnafm-finetuned/final_model/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cad389b97d924dfd7ec0d694e02bb3750248c8d5419f12629ec4d7622e22528c
|
| 3 |
+
size 398111136
|
multimolecule-rnafm-finetuned/final_model/special_tokens_map.json
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<null>"
|
| 4 |
+
],
|
| 5 |
+
"bos_token": {
|
| 6 |
+
"content": "<cls>",
|
| 7 |
+
"lstrip": false,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false
|
| 11 |
+
},
|
| 12 |
+
"cls_token": {
|
| 13 |
+
"content": "<cls>",
|
| 14 |
+
"lstrip": false,
|
| 15 |
+
"normalized": false,
|
| 16 |
+
"rstrip": false,
|
| 17 |
+
"single_word": false
|
| 18 |
+
},
|
| 19 |
+
"eos_token": {
|
| 20 |
+
"content": "<eos>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false
|
| 25 |
+
},
|
| 26 |
+
"mask_token": {
|
| 27 |
+
"content": "<mask>",
|
| 28 |
+
"lstrip": false,
|
| 29 |
+
"normalized": false,
|
| 30 |
+
"rstrip": false,
|
| 31 |
+
"single_word": false
|
| 32 |
+
},
|
| 33 |
+
"pad_token": {
|
| 34 |
+
"content": "<pad>",
|
| 35 |
+
"lstrip": false,
|
| 36 |
+
"normalized": false,
|
| 37 |
+
"rstrip": false,
|
| 38 |
+
"single_word": false
|
| 39 |
+
},
|
| 40 |
+
"sep_token": {
|
| 41 |
+
"content": "<eos>",
|
| 42 |
+
"lstrip": false,
|
| 43 |
+
"normalized": false,
|
| 44 |
+
"rstrip": false,
|
| 45 |
+
"single_word": false
|
| 46 |
+
},
|
| 47 |
+
"unk_token": {
|
| 48 |
+
"content": "<unk>",
|
| 49 |
+
"lstrip": false,
|
| 50 |
+
"normalized": false,
|
| 51 |
+
"rstrip": false,
|
| 52 |
+
"single_word": false
|
| 53 |
+
}
|
| 54 |
+
}
|
multimolecule-rnafm-finetuned/final_model/tokenizer_config.json
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "<pad>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "<cls>",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "<eos>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"3": {
|
| 28 |
+
"content": "<unk>",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"4": {
|
| 36 |
+
"content": "<mask>",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
},
|
| 43 |
+
"5": {
|
| 44 |
+
"content": "<null>",
|
| 45 |
+
"lstrip": false,
|
| 46 |
+
"normalized": false,
|
| 47 |
+
"rstrip": false,
|
| 48 |
+
"single_word": false,
|
| 49 |
+
"special": true
|
| 50 |
+
}
|
| 51 |
+
},
|
| 52 |
+
"additional_special_tokens": [
|
| 53 |
+
"<null>"
|
| 54 |
+
],
|
| 55 |
+
"bos_token": "<cls>",
|
| 56 |
+
"clean_up_tokenization_spaces": true,
|
| 57 |
+
"cls_token": "<cls>",
|
| 58 |
+
"codon": false,
|
| 59 |
+
"eos_token": "<eos>",
|
| 60 |
+
"mask_token": "<mask>",
|
| 61 |
+
"model_max_length": 1024,
|
| 62 |
+
"nmers": 1,
|
| 63 |
+
"pad_token": "<pad>",
|
| 64 |
+
"replace_T_with_U": true,
|
| 65 |
+
"sep_token": "<eos>",
|
| 66 |
+
"tokenizer_class": "RnaTokenizer",
|
| 67 |
+
"unk_token": "<unk>"
|
| 68 |
+
}
|
multimolecule-rnafm-finetuned/final_model/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:94b874b2919304f950adc67dfe7b8ef0aeccc265daf436f4e203b7dc4acf0470
|
| 3 |
+
size 5368
|
multimolecule-rnafm-finetuned/final_model/vocab.txt
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<pad>
|
| 2 |
+
<cls>
|
| 3 |
+
<eos>
|
| 4 |
+
<unk>
|
| 5 |
+
<mask>
|
| 6 |
+
<null>
|
| 7 |
+
A
|
| 8 |
+
C
|
| 9 |
+
G
|
| 10 |
+
U
|
| 11 |
+
N
|
| 12 |
+
R
|
| 13 |
+
Y
|
| 14 |
+
S
|
| 15 |
+
W
|
| 16 |
+
K
|
| 17 |
+
M
|
| 18 |
+
B
|
| 19 |
+
D
|
| 20 |
+
H
|
| 21 |
+
V
|
| 22 |
+
.
|
| 23 |
+
X
|
| 24 |
+
*
|
| 25 |
+
-
|
| 26 |
+
I
|