nancyH commited on
Commit
a754136
·
verified ·
1 Parent(s): f8f3a0b

Delete human_ocr_ensembl

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. human_ocr_ensembl/base_3072/checkpoint-2800/config.json +0 -27
  2. human_ocr_ensembl/base_3072/checkpoint-2800/model.safetensors +0 -3
  3. human_ocr_ensembl/base_3072/checkpoint-2800/optimizer.pt +0 -3
  4. human_ocr_ensembl/base_3072/checkpoint-2800/rng_state.pth +0 -3
  5. human_ocr_ensembl/base_3072/checkpoint-2800/scheduler.pt +0 -3
  6. human_ocr_ensembl/base_3072/checkpoint-2800/special_tokens_map.json +0 -7
  7. human_ocr_ensembl/base_3072/checkpoint-2800/tokenizer.json +0 -0
  8. human_ocr_ensembl/base_3072/checkpoint-2800/tokenizer_config.json +0 -56
  9. human_ocr_ensembl/base_3072/checkpoint-2800/trainer_state.json +0 -201
  10. human_ocr_ensembl/base_3072/checkpoint-2800/training_args.bin +0 -3
  11. human_ocr_ensembl/base_3072/checkpoint-3000/config.json +0 -27
  12. human_ocr_ensembl/base_3072/checkpoint-3000/model.safetensors +0 -3
  13. human_ocr_ensembl/base_3072/checkpoint-3000/optimizer.pt +0 -3
  14. human_ocr_ensembl/base_3072/checkpoint-3000/rng_state.pth +0 -3
  15. human_ocr_ensembl/base_3072/checkpoint-3000/scheduler.pt +0 -3
  16. human_ocr_ensembl/base_3072/checkpoint-3000/special_tokens_map.json +0 -7
  17. human_ocr_ensembl/base_3072/checkpoint-3000/tokenizer.json +0 -0
  18. human_ocr_ensembl/base_3072/checkpoint-3000/tokenizer_config.json +0 -56
  19. human_ocr_ensembl/base_3072/checkpoint-3000/trainer_state.json +0 -214
  20. human_ocr_ensembl/base_3072/checkpoint-3000/training_args.bin +0 -3
  21. human_ocr_ensembl/base_3072/checkpoint-3200/config.json +0 -27
  22. human_ocr_ensembl/base_3072/checkpoint-3200/model.safetensors +0 -3
  23. human_ocr_ensembl/base_3072/checkpoint-3200/optimizer.pt +0 -3
  24. human_ocr_ensembl/base_3072/checkpoint-3200/rng_state.pth +0 -3
  25. human_ocr_ensembl/base_3072/checkpoint-3200/scheduler.pt +0 -3
  26. human_ocr_ensembl/base_3072/checkpoint-3200/special_tokens_map.json +0 -7
  27. human_ocr_ensembl/base_3072/checkpoint-3200/tokenizer.json +0 -0
  28. human_ocr_ensembl/base_3072/checkpoint-3200/tokenizer_config.json +0 -56
  29. human_ocr_ensembl/base_3072/checkpoint-3200/trainer_state.json +0 -227
  30. human_ocr_ensembl/base_3072/checkpoint-3200/training_args.bin +0 -3
  31. human_ocr_ensembl/base_3072/results/base_3072_hg38_BPE_3e-5_human_ocr_ensembl_seed42/eval_results.json +0 -1
  32. human_ocr_ensembl/base_4096/checkpoint-2800/config.json +0 -27
  33. human_ocr_ensembl/base_4096/checkpoint-2800/model.safetensors +0 -3
  34. human_ocr_ensembl/base_4096/checkpoint-2800/optimizer.pt +0 -3
  35. human_ocr_ensembl/base_4096/checkpoint-2800/rng_state.pth +0 -3
  36. human_ocr_ensembl/base_4096/checkpoint-2800/scheduler.pt +0 -3
  37. human_ocr_ensembl/base_4096/checkpoint-2800/special_tokens_map.json +0 -7
  38. human_ocr_ensembl/base_4096/checkpoint-2800/tokenizer.json +0 -0
  39. human_ocr_ensembl/base_4096/checkpoint-2800/tokenizer_config.json +0 -56
  40. human_ocr_ensembl/base_4096/checkpoint-2800/trainer_state.json +0 -201
  41. human_ocr_ensembl/base_4096/checkpoint-2800/training_args.bin +0 -3
  42. human_ocr_ensembl/base_4096/checkpoint-3000/config.json +0 -27
  43. human_ocr_ensembl/base_4096/checkpoint-3000/model.safetensors +0 -3
  44. human_ocr_ensembl/base_4096/checkpoint-3000/optimizer.pt +0 -3
  45. human_ocr_ensembl/base_4096/checkpoint-3000/rng_state.pth +0 -3
  46. human_ocr_ensembl/base_4096/checkpoint-3000/scheduler.pt +0 -3
  47. human_ocr_ensembl/base_4096/checkpoint-3000/special_tokens_map.json +0 -7
  48. human_ocr_ensembl/base_4096/checkpoint-3000/tokenizer.json +0 -0
  49. human_ocr_ensembl/base_4096/checkpoint-3000/tokenizer_config.json +0 -56
  50. human_ocr_ensembl/base_4096/checkpoint-3000/trainer_state.json +0 -214
human_ocr_ensembl/base_3072/checkpoint-2800/config.json DELETED
@@ -1,27 +0,0 @@
1
- {
2
- "_name_or_path": "/root/NaN/dna-tokenizer/pretrain/models/base_3072/checkpoint-100000",
3
- "architectures": [
4
- "BertForSequenceClassification"
5
- ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "classifier_dropout": null,
8
- "hidden_act": "gelu",
9
- "hidden_dropout_prob": 0.1,
10
- "hidden_size": 768,
11
- "initializer_range": 0.02,
12
- "intermediate_size": 3072,
13
- "layer_norm_eps": 1e-12,
14
- "max_length": 512,
15
- "max_position_embeddings": 512,
16
- "model_type": "bert",
17
- "num_attention_heads": 12,
18
- "num_hidden_layers": 12,
19
- "pad_token_id": 0,
20
- "position_embedding_type": "absolute",
21
- "problem_type": "single_label_classification",
22
- "torch_dtype": "float32",
23
- "transformers_version": "4.35.2",
24
- "type_vocab_size": 2,
25
- "use_cache": true,
26
- "vocab_size": 3072
27
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
human_ocr_ensembl/base_3072/checkpoint-2800/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d03574f1a1cbc4c5f1d09bf4e6310e9496e420a3c922f4186b08ddc5b294d997
3
- size 353632152
 
 
 
 
human_ocr_ensembl/base_3072/checkpoint-2800/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d145281badf1a6e12a06492a3af79a751464e2e2705dbc953ab34078876731a
3
- size 707385995
 
 
 
 
human_ocr_ensembl/base_3072/checkpoint-2800/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5eb4f754a0414db80041f9f91e8cbf397684de0d3ca59ef63abd1e17e5e76c1f
3
- size 14709
 
 
 
 
human_ocr_ensembl/base_3072/checkpoint-2800/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f48bffb25980b1ddc9943acd0f93ae0e40f503ed6b7b3e2f666408c6a8e8fc33
3
- size 1465
 
 
 
 
human_ocr_ensembl/base_3072/checkpoint-2800/special_tokens_map.json DELETED
@@ -1,7 +0,0 @@
1
- {
2
- "cls_token": "[CLS]",
3
- "mask_token": "[MASK]",
4
- "pad_token": "[PAD]",
5
- "sep_token": "[SEP]",
6
- "unk_token": "[UNK]"
7
- }
 
 
 
 
 
 
 
 
human_ocr_ensembl/base_3072/checkpoint-2800/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
human_ocr_ensembl/base_3072/checkpoint-2800/tokenizer_config.json DELETED
@@ -1,56 +0,0 @@
1
- {
2
- "added_tokens_decoder": {
3
- "0": {
4
- "content": "[PAD]",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false,
9
- "special": true
10
- },
11
- "1": {
12
- "content": "[UNK]",
13
- "lstrip": false,
14
- "normalized": false,
15
- "rstrip": false,
16
- "single_word": false,
17
- "special": true
18
- },
19
- "2": {
20
- "content": "[CLS]",
21
- "lstrip": false,
22
- "normalized": false,
23
- "rstrip": false,
24
- "single_word": false,
25
- "special": true
26
- },
27
- "3": {
28
- "content": "[SEP]",
29
- "lstrip": false,
30
- "normalized": false,
31
- "rstrip": false,
32
- "single_word": false,
33
- "special": true
34
- },
35
- "4": {
36
- "content": "[MASK]",
37
- "lstrip": false,
38
- "normalized": false,
39
- "rstrip": false,
40
- "single_word": false,
41
- "special": true
42
- }
43
- },
44
- "cache_dir": null,
45
- "clean_up_tokenization_spaces": true,
46
- "cls_token": "[CLS]",
47
- "mask_token": "[MASK]",
48
- "model_max_length": 100,
49
- "pad_token": "[PAD]",
50
- "padding_side": "right",
51
- "sep_token": "[SEP]",
52
- "tokenizer_class": "PreTrainedTokenizerFast",
53
- "trust_remote_code": true,
54
- "unk_token": "[UNK]",
55
- "use_fast": true
56
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
human_ocr_ensembl/base_3072/checkpoint-2800/trainer_state.json DELETED
@@ -1,201 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 2.5617566331198534,
5
- "eval_steps": 200,
6
- "global_step": 2800,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.18,
13
- "eval_accuracy": 0.6992446784161135,
14
- "eval_f1": 0.6967343333524518,
15
- "eval_loss": 0.5663131475448608,
16
- "eval_matthews_correlation": 0.4032838364382199,
17
- "eval_precision": 0.7047615707046442,
18
- "eval_recall": 0.6985697953144293,
19
- "eval_runtime": 2.0971,
20
- "eval_samples_per_second": 8333.515,
21
- "eval_steps_per_second": 65.329,
22
- "step": 200
23
- },
24
- {
25
- "epoch": 0.37,
26
- "eval_accuracy": 0.6932364385442893,
27
- "eval_f1": 0.6882830158676496,
28
- "eval_loss": 0.5725054144859314,
29
- "eval_matthews_correlation": 0.4023780258477855,
30
- "eval_precision": 0.7083489182429268,
31
- "eval_recall": 0.6942751575705117,
32
- "eval_runtime": 2.0863,
33
- "eval_samples_per_second": 8376.436,
34
- "eval_steps_per_second": 65.666,
35
- "step": 400
36
- },
37
- {
38
- "epoch": 0.55,
39
- "eval_accuracy": 0.7249942778667887,
40
- "eval_f1": 0.7236563504089994,
41
- "eval_loss": 0.5338560342788696,
42
- "eval_matthews_correlation": 0.45288472081496567,
43
- "eval_precision": 0.7284116969707025,
44
- "eval_recall": 0.7244900032133177,
45
- "eval_runtime": 2.0847,
46
- "eval_samples_per_second": 8382.839,
47
- "eval_steps_per_second": 65.716,
48
- "step": 600
49
- },
50
- {
51
- "epoch": 0.73,
52
- "eval_accuracy": 0.727454795147631,
53
- "eval_f1": 0.7271190497286429,
54
- "eval_loss": 0.5313814282417297,
55
- "eval_matthews_correlation": 0.45531538250312886,
56
- "eval_precision": 0.7280930565651094,
57
- "eval_recall": 0.7272231569276124,
58
- "eval_runtime": 2.0843,
59
- "eval_samples_per_second": 8384.453,
60
- "eval_steps_per_second": 65.728,
61
- "step": 800
62
- },
63
- {
64
- "epoch": 0.91,
65
- "eval_accuracy": 0.7303158617532616,
66
- "eval_f1": 0.7275747475999237,
67
- "eval_loss": 0.5319721102714539,
68
- "eval_matthews_correlation": 0.46789560676374625,
69
- "eval_precision": 0.7384098581673617,
70
- "eval_recall": 0.7295692599623227,
71
- "eval_runtime": 2.085,
72
- "eval_samples_per_second": 8381.713,
73
- "eval_steps_per_second": 65.707,
74
- "step": 1000
75
- },
76
- {
77
- "epoch": 1.1,
78
- "eval_accuracy": 0.7380407415884642,
79
- "eval_f1": 0.7380382404292021,
80
- "eval_loss": 0.5272664427757263,
81
- "eval_matthews_correlation": 0.4762546721569884,
82
- "eval_precision": 0.7381435065052877,
83
- "eval_recall": 0.7381111667497053,
84
- "eval_runtime": 2.0861,
85
- "eval_samples_per_second": 8377.25,
86
- "eval_steps_per_second": 65.672,
87
- "step": 1200
88
- },
89
- {
90
- "epoch": 1.28,
91
- "eval_accuracy": 0.7364385442893111,
92
- "eval_f1": 0.736433021138519,
93
- "eval_loss": 0.5285834074020386,
94
- "eval_matthews_correlation": 0.47309527398952067,
95
- "eval_precision": 0.736574658685002,
96
- "eval_recall": 0.7365206183909514,
97
- "eval_runtime": 2.0972,
98
- "eval_samples_per_second": 8332.96,
99
- "eval_steps_per_second": 65.325,
100
- "step": 1400
101
- },
102
- {
103
- "epoch": 1.46,
104
- "eval_accuracy": 0.7356946669718472,
105
- "eval_f1": 0.7355568209971208,
106
- "eval_loss": 0.524845540523529,
107
- "eval_matthews_correlation": 0.47250517415343696,
108
- "eval_precision": 0.7365847965721319,
109
- "eval_recall": 0.73592084406584,
110
- "eval_runtime": 2.0891,
111
- "eval_samples_per_second": 8365.186,
112
- "eval_steps_per_second": 65.577,
113
- "step": 1600
114
- },
115
- {
116
- "epoch": 1.65,
117
- "eval_accuracy": 0.7402723735408561,
118
- "eval_f1": 0.738616418688818,
119
- "eval_loss": 0.5155823230743408,
120
- "eval_matthews_correlation": 0.4849472607211828,
121
- "eval_precision": 0.7452896161327611,
122
- "eval_recall": 0.7396899728051398,
123
- "eval_runtime": 2.0895,
124
- "eval_samples_per_second": 8363.752,
125
- "eval_steps_per_second": 65.566,
126
- "step": 1800
127
- },
128
- {
129
- "epoch": 1.83,
130
- "eval_accuracy": 0.742790112153811,
131
- "eval_f1": 0.7427467082709358,
132
- "eval_loss": 0.5115640163421631,
133
- "eval_matthews_correlation": 0.4855319554250339,
134
- "eval_precision": 0.7427981041740035,
135
- "eval_recall": 0.7427338555019267,
136
- "eval_runtime": 2.0887,
137
- "eval_samples_per_second": 8366.987,
138
- "eval_steps_per_second": 65.592,
139
- "step": 2000
140
- },
141
- {
142
- "epoch": 2.01,
143
- "eval_accuracy": 0.7434195468070497,
144
- "eval_f1": 0.7432712646174982,
145
- "eval_loss": 0.517207145690918,
146
- "eval_matthews_correlation": 0.48806105010385226,
147
- "eval_precision": 0.7444059316502798,
148
- "eval_recall": 0.7436556950767057,
149
- "eval_runtime": 2.087,
150
- "eval_samples_per_second": 8373.626,
151
- "eval_steps_per_second": 65.644,
152
- "step": 2200
153
- },
154
- {
155
- "epoch": 2.2,
156
- "eval_accuracy": 0.7436484321355001,
157
- "eval_f1": 0.7434945812464957,
158
- "eval_loss": 0.5187065601348877,
159
- "eval_matthews_correlation": 0.4873991502151912,
160
- "eval_precision": 0.7438979101569678,
161
- "eval_recall": 0.7435014013420647,
162
- "eval_runtime": 2.0875,
163
- "eval_samples_per_second": 8371.903,
164
- "eval_steps_per_second": 65.63,
165
- "step": 2400
166
- },
167
- {
168
- "epoch": 2.38,
169
- "eval_accuracy": 0.7444495307850767,
170
- "eval_f1": 0.7444090259561158,
171
- "eval_loss": 0.5209320187568665,
172
- "eval_matthews_correlation": 0.48944325343888784,
173
- "eval_precision": 0.7448479511164695,
174
- "eval_recall": 0.7445953674969612,
175
- "eval_runtime": 2.0891,
176
- "eval_samples_per_second": 8365.525,
177
- "eval_steps_per_second": 65.58,
178
- "step": 2600
179
- },
180
- {
181
- "epoch": 2.56,
182
- "eval_accuracy": 0.7459372854200046,
183
- "eval_f1": 0.7451259594798789,
184
- "eval_loss": 0.5175760984420776,
185
- "eval_matthews_correlation": 0.49378781361272983,
186
- "eval_precision": 0.748257049515006,
187
- "eval_recall": 0.7455382489125468,
188
- "eval_runtime": 2.0886,
189
- "eval_samples_per_second": 8367.432,
190
- "eval_steps_per_second": 65.595,
191
- "step": 2800
192
- }
193
- ],
194
- "logging_steps": 100000,
195
- "max_steps": 3279,
196
- "num_train_epochs": 3,
197
- "save_steps": 200,
198
- "total_flos": 1.8407496733425664e+16,
199
- "trial_name": null,
200
- "trial_params": null
201
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
human_ocr_ensembl/base_3072/checkpoint-2800/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:074c5d4b16f394462a2ed140114255544a90f5962dd86dcd667bda481a83bcaa
3
- size 5265
 
 
 
 
human_ocr_ensembl/base_3072/checkpoint-3000/config.json DELETED
@@ -1,27 +0,0 @@
1
- {
2
- "_name_or_path": "/root/NaN/dna-tokenizer/pretrain/models/base_3072/checkpoint-100000",
3
- "architectures": [
4
- "BertForSequenceClassification"
5
- ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "classifier_dropout": null,
8
- "hidden_act": "gelu",
9
- "hidden_dropout_prob": 0.1,
10
- "hidden_size": 768,
11
- "initializer_range": 0.02,
12
- "intermediate_size": 3072,
13
- "layer_norm_eps": 1e-12,
14
- "max_length": 512,
15
- "max_position_embeddings": 512,
16
- "model_type": "bert",
17
- "num_attention_heads": 12,
18
- "num_hidden_layers": 12,
19
- "pad_token_id": 0,
20
- "position_embedding_type": "absolute",
21
- "problem_type": "single_label_classification",
22
- "torch_dtype": "float32",
23
- "transformers_version": "4.35.2",
24
- "type_vocab_size": 2,
25
- "use_cache": true,
26
- "vocab_size": 3072
27
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
human_ocr_ensembl/base_3072/checkpoint-3000/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:da059090f80aacd25942238bff7d321bf7f033bf80fcbb171de88c32a36ae386
3
- size 353632152
 
 
 
 
human_ocr_ensembl/base_3072/checkpoint-3000/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb12062fbf81225bfabc5daa9ddca02dfddccb226fc1e7d45af7072adb971920
3
- size 707385995
 
 
 
 
human_ocr_ensembl/base_3072/checkpoint-3000/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:45efb8dc53020d0c80c0b48b34097b0336f1926c50380ac1b67ecece13fd0e52
3
- size 14709
 
 
 
 
human_ocr_ensembl/base_3072/checkpoint-3000/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e80689286bea2a0662cb87fde9669f458e4a8e334c380b87fbb028f5a928a685
3
- size 1465
 
 
 
 
human_ocr_ensembl/base_3072/checkpoint-3000/special_tokens_map.json DELETED
@@ -1,7 +0,0 @@
1
- {
2
- "cls_token": "[CLS]",
3
- "mask_token": "[MASK]",
4
- "pad_token": "[PAD]",
5
- "sep_token": "[SEP]",
6
- "unk_token": "[UNK]"
7
- }
 
 
 
 
 
 
 
 
human_ocr_ensembl/base_3072/checkpoint-3000/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
human_ocr_ensembl/base_3072/checkpoint-3000/tokenizer_config.json DELETED
@@ -1,56 +0,0 @@
1
- {
2
- "added_tokens_decoder": {
3
- "0": {
4
- "content": "[PAD]",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false,
9
- "special": true
10
- },
11
- "1": {
12
- "content": "[UNK]",
13
- "lstrip": false,
14
- "normalized": false,
15
- "rstrip": false,
16
- "single_word": false,
17
- "special": true
18
- },
19
- "2": {
20
- "content": "[CLS]",
21
- "lstrip": false,
22
- "normalized": false,
23
- "rstrip": false,
24
- "single_word": false,
25
- "special": true
26
- },
27
- "3": {
28
- "content": "[SEP]",
29
- "lstrip": false,
30
- "normalized": false,
31
- "rstrip": false,
32
- "single_word": false,
33
- "special": true
34
- },
35
- "4": {
36
- "content": "[MASK]",
37
- "lstrip": false,
38
- "normalized": false,
39
- "rstrip": false,
40
- "single_word": false,
41
- "special": true
42
- }
43
- },
44
- "cache_dir": null,
45
- "clean_up_tokenization_spaces": true,
46
- "cls_token": "[CLS]",
47
- "mask_token": "[MASK]",
48
- "model_max_length": 100,
49
- "pad_token": "[PAD]",
50
- "padding_side": "right",
51
- "sep_token": "[SEP]",
52
- "tokenizer_class": "PreTrainedTokenizerFast",
53
- "trust_remote_code": true,
54
- "unk_token": "[UNK]",
55
- "use_fast": true
56
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
human_ocr_ensembl/base_3072/checkpoint-3000/trainer_state.json DELETED
@@ -1,214 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 2.7447392497712717,
5
- "eval_steps": 200,
6
- "global_step": 3000,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.18,
13
- "eval_accuracy": 0.6992446784161135,
14
- "eval_f1": 0.6967343333524518,
15
- "eval_loss": 0.5663131475448608,
16
- "eval_matthews_correlation": 0.4032838364382199,
17
- "eval_precision": 0.7047615707046442,
18
- "eval_recall": 0.6985697953144293,
19
- "eval_runtime": 2.0971,
20
- "eval_samples_per_second": 8333.515,
21
- "eval_steps_per_second": 65.329,
22
- "step": 200
23
- },
24
- {
25
- "epoch": 0.37,
26
- "eval_accuracy": 0.6932364385442893,
27
- "eval_f1": 0.6882830158676496,
28
- "eval_loss": 0.5725054144859314,
29
- "eval_matthews_correlation": 0.4023780258477855,
30
- "eval_precision": 0.7083489182429268,
31
- "eval_recall": 0.6942751575705117,
32
- "eval_runtime": 2.0863,
33
- "eval_samples_per_second": 8376.436,
34
- "eval_steps_per_second": 65.666,
35
- "step": 400
36
- },
37
- {
38
- "epoch": 0.55,
39
- "eval_accuracy": 0.7249942778667887,
40
- "eval_f1": 0.7236563504089994,
41
- "eval_loss": 0.5338560342788696,
42
- "eval_matthews_correlation": 0.45288472081496567,
43
- "eval_precision": 0.7284116969707025,
44
- "eval_recall": 0.7244900032133177,
45
- "eval_runtime": 2.0847,
46
- "eval_samples_per_second": 8382.839,
47
- "eval_steps_per_second": 65.716,
48
- "step": 600
49
- },
50
- {
51
- "epoch": 0.73,
52
- "eval_accuracy": 0.727454795147631,
53
- "eval_f1": 0.7271190497286429,
54
- "eval_loss": 0.5313814282417297,
55
- "eval_matthews_correlation": 0.45531538250312886,
56
- "eval_precision": 0.7280930565651094,
57
- "eval_recall": 0.7272231569276124,
58
- "eval_runtime": 2.0843,
59
- "eval_samples_per_second": 8384.453,
60
- "eval_steps_per_second": 65.728,
61
- "step": 800
62
- },
63
- {
64
- "epoch": 0.91,
65
- "eval_accuracy": 0.7303158617532616,
66
- "eval_f1": 0.7275747475999237,
67
- "eval_loss": 0.5319721102714539,
68
- "eval_matthews_correlation": 0.46789560676374625,
69
- "eval_precision": 0.7384098581673617,
70
- "eval_recall": 0.7295692599623227,
71
- "eval_runtime": 2.085,
72
- "eval_samples_per_second": 8381.713,
73
- "eval_steps_per_second": 65.707,
74
- "step": 1000
75
- },
76
- {
77
- "epoch": 1.1,
78
- "eval_accuracy": 0.7380407415884642,
79
- "eval_f1": 0.7380382404292021,
80
- "eval_loss": 0.5272664427757263,
81
- "eval_matthews_correlation": 0.4762546721569884,
82
- "eval_precision": 0.7381435065052877,
83
- "eval_recall": 0.7381111667497053,
84
- "eval_runtime": 2.0861,
85
- "eval_samples_per_second": 8377.25,
86
- "eval_steps_per_second": 65.672,
87
- "step": 1200
88
- },
89
- {
90
- "epoch": 1.28,
91
- "eval_accuracy": 0.7364385442893111,
92
- "eval_f1": 0.736433021138519,
93
- "eval_loss": 0.5285834074020386,
94
- "eval_matthews_correlation": 0.47309527398952067,
95
- "eval_precision": 0.736574658685002,
96
- "eval_recall": 0.7365206183909514,
97
- "eval_runtime": 2.0972,
98
- "eval_samples_per_second": 8332.96,
99
- "eval_steps_per_second": 65.325,
100
- "step": 1400
101
- },
102
- {
103
- "epoch": 1.46,
104
- "eval_accuracy": 0.7356946669718472,
105
- "eval_f1": 0.7355568209971208,
106
- "eval_loss": 0.524845540523529,
107
- "eval_matthews_correlation": 0.47250517415343696,
108
- "eval_precision": 0.7365847965721319,
109
- "eval_recall": 0.73592084406584,
110
- "eval_runtime": 2.0891,
111
- "eval_samples_per_second": 8365.186,
112
- "eval_steps_per_second": 65.577,
113
- "step": 1600
114
- },
115
- {
116
- "epoch": 1.65,
117
- "eval_accuracy": 0.7402723735408561,
118
- "eval_f1": 0.738616418688818,
119
- "eval_loss": 0.5155823230743408,
120
- "eval_matthews_correlation": 0.4849472607211828,
121
- "eval_precision": 0.7452896161327611,
122
- "eval_recall": 0.7396899728051398,
123
- "eval_runtime": 2.0895,
124
- "eval_samples_per_second": 8363.752,
125
- "eval_steps_per_second": 65.566,
126
- "step": 1800
127
- },
128
- {
129
- "epoch": 1.83,
130
- "eval_accuracy": 0.742790112153811,
131
- "eval_f1": 0.7427467082709358,
132
- "eval_loss": 0.5115640163421631,
133
- "eval_matthews_correlation": 0.4855319554250339,
134
- "eval_precision": 0.7427981041740035,
135
- "eval_recall": 0.7427338555019267,
136
- "eval_runtime": 2.0887,
137
- "eval_samples_per_second": 8366.987,
138
- "eval_steps_per_second": 65.592,
139
- "step": 2000
140
- },
141
- {
142
- "epoch": 2.01,
143
- "eval_accuracy": 0.7434195468070497,
144
- "eval_f1": 0.7432712646174982,
145
- "eval_loss": 0.517207145690918,
146
- "eval_matthews_correlation": 0.48806105010385226,
147
- "eval_precision": 0.7444059316502798,
148
- "eval_recall": 0.7436556950767057,
149
- "eval_runtime": 2.087,
150
- "eval_samples_per_second": 8373.626,
151
- "eval_steps_per_second": 65.644,
152
- "step": 2200
153
- },
154
- {
155
- "epoch": 2.2,
156
- "eval_accuracy": 0.7436484321355001,
157
- "eval_f1": 0.7434945812464957,
158
- "eval_loss": 0.5187065601348877,
159
- "eval_matthews_correlation": 0.4873991502151912,
160
- "eval_precision": 0.7438979101569678,
161
- "eval_recall": 0.7435014013420647,
162
- "eval_runtime": 2.0875,
163
- "eval_samples_per_second": 8371.903,
164
- "eval_steps_per_second": 65.63,
165
- "step": 2400
166
- },
167
- {
168
- "epoch": 2.38,
169
- "eval_accuracy": 0.7444495307850767,
170
- "eval_f1": 0.7444090259561158,
171
- "eval_loss": 0.5209320187568665,
172
- "eval_matthews_correlation": 0.48944325343888784,
173
- "eval_precision": 0.7448479511164695,
174
- "eval_recall": 0.7445953674969612,
175
- "eval_runtime": 2.0891,
176
- "eval_samples_per_second": 8365.525,
177
- "eval_steps_per_second": 65.58,
178
- "step": 2600
179
- },
180
- {
181
- "epoch": 2.56,
182
- "eval_accuracy": 0.7459372854200046,
183
- "eval_f1": 0.7451259594798789,
184
- "eval_loss": 0.5175760984420776,
185
- "eval_matthews_correlation": 0.49378781361272983,
186
- "eval_precision": 0.748257049515006,
187
- "eval_recall": 0.7455382489125468,
188
- "eval_runtime": 2.0886,
189
- "eval_samples_per_second": 8367.432,
190
- "eval_steps_per_second": 65.595,
191
- "step": 2800
192
- },
193
- {
194
- "epoch": 2.74,
195
- "eval_accuracy": 0.7489127946898604,
196
- "eval_f1": 0.7482889784661584,
197
- "eval_loss": 0.515264630317688,
198
- "eval_matthews_correlation": 0.4992191497597552,
199
- "eval_precision": 0.7506571663528661,
200
- "eval_recall": 0.7485663616894263,
201
- "eval_runtime": 2.0902,
202
- "eval_samples_per_second": 8360.847,
203
- "eval_steps_per_second": 65.543,
204
- "step": 3000
205
- }
206
- ],
207
- "logging_steps": 100000,
208
- "max_steps": 3279,
209
- "num_train_epochs": 3,
210
- "save_steps": 200,
211
- "total_flos": 1.972305206129459e+16,
212
- "trial_name": null,
213
- "trial_params": null
214
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
human_ocr_ensembl/base_3072/checkpoint-3000/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:074c5d4b16f394462a2ed140114255544a90f5962dd86dcd667bda481a83bcaa
3
- size 5265
 
 
 
 
human_ocr_ensembl/base_3072/checkpoint-3200/config.json DELETED
@@ -1,27 +0,0 @@
1
- {
2
- "_name_or_path": "/root/NaN/dna-tokenizer/pretrain/models/base_3072/checkpoint-100000",
3
- "architectures": [
4
- "BertForSequenceClassification"
5
- ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "classifier_dropout": null,
8
- "hidden_act": "gelu",
9
- "hidden_dropout_prob": 0.1,
10
- "hidden_size": 768,
11
- "initializer_range": 0.02,
12
- "intermediate_size": 3072,
13
- "layer_norm_eps": 1e-12,
14
- "max_length": 512,
15
- "max_position_embeddings": 512,
16
- "model_type": "bert",
17
- "num_attention_heads": 12,
18
- "num_hidden_layers": 12,
19
- "pad_token_id": 0,
20
- "position_embedding_type": "absolute",
21
- "problem_type": "single_label_classification",
22
- "torch_dtype": "float32",
23
- "transformers_version": "4.35.2",
24
- "type_vocab_size": 2,
25
- "use_cache": true,
26
- "vocab_size": 3072
27
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
human_ocr_ensembl/base_3072/checkpoint-3200/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5dd1b9888b6ea9691bdbc2ba75b04ecf0eaa0c020d226e35298b84b55dc7bcf2
3
- size 353632152
 
 
 
 
human_ocr_ensembl/base_3072/checkpoint-3200/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbcaf8c00572e9144154e120f659af2878f586fd9c6ba8b4802fd7ea32bd3285
3
- size 707385995
 
 
 
 
human_ocr_ensembl/base_3072/checkpoint-3200/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1afaa0a5ee0f0e30642206abaabfa11e26562c46c22249e5acac85deb5d1059
3
- size 14709
 
 
 
 
human_ocr_ensembl/base_3072/checkpoint-3200/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:755d46cfc5215a19bb48117a6d65bfe2d6991b60203c226b2795038ee66cbfc4
3
- size 1465
 
 
 
 
human_ocr_ensembl/base_3072/checkpoint-3200/special_tokens_map.json DELETED
@@ -1,7 +0,0 @@
1
- {
2
- "cls_token": "[CLS]",
3
- "mask_token": "[MASK]",
4
- "pad_token": "[PAD]",
5
- "sep_token": "[SEP]",
6
- "unk_token": "[UNK]"
7
- }
 
 
 
 
 
 
 
 
human_ocr_ensembl/base_3072/checkpoint-3200/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
human_ocr_ensembl/base_3072/checkpoint-3200/tokenizer_config.json DELETED
@@ -1,56 +0,0 @@
1
- {
2
- "added_tokens_decoder": {
3
- "0": {
4
- "content": "[PAD]",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false,
9
- "special": true
10
- },
11
- "1": {
12
- "content": "[UNK]",
13
- "lstrip": false,
14
- "normalized": false,
15
- "rstrip": false,
16
- "single_word": false,
17
- "special": true
18
- },
19
- "2": {
20
- "content": "[CLS]",
21
- "lstrip": false,
22
- "normalized": false,
23
- "rstrip": false,
24
- "single_word": false,
25
- "special": true
26
- },
27
- "3": {
28
- "content": "[SEP]",
29
- "lstrip": false,
30
- "normalized": false,
31
- "rstrip": false,
32
- "single_word": false,
33
- "special": true
34
- },
35
- "4": {
36
- "content": "[MASK]",
37
- "lstrip": false,
38
- "normalized": false,
39
- "rstrip": false,
40
- "single_word": false,
41
- "special": true
42
- }
43
- },
44
- "cache_dir": null,
45
- "clean_up_tokenization_spaces": true,
46
- "cls_token": "[CLS]",
47
- "mask_token": "[MASK]",
48
- "model_max_length": 100,
49
- "pad_token": "[PAD]",
50
- "padding_side": "right",
51
- "sep_token": "[SEP]",
52
- "tokenizer_class": "PreTrainedTokenizerFast",
53
- "trust_remote_code": true,
54
- "unk_token": "[UNK]",
55
- "use_fast": true
56
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
human_ocr_ensembl/base_3072/checkpoint-3200/trainer_state.json DELETED
@@ -1,227 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 2.92772186642269,
5
- "eval_steps": 200,
6
- "global_step": 3200,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.18,
13
- "eval_accuracy": 0.6992446784161135,
14
- "eval_f1": 0.6967343333524518,
15
- "eval_loss": 0.5663131475448608,
16
- "eval_matthews_correlation": 0.4032838364382199,
17
- "eval_precision": 0.7047615707046442,
18
- "eval_recall": 0.6985697953144293,
19
- "eval_runtime": 2.0971,
20
- "eval_samples_per_second": 8333.515,
21
- "eval_steps_per_second": 65.329,
22
- "step": 200
23
- },
24
- {
25
- "epoch": 0.37,
26
- "eval_accuracy": 0.6932364385442893,
27
- "eval_f1": 0.6882830158676496,
28
- "eval_loss": 0.5725054144859314,
29
- "eval_matthews_correlation": 0.4023780258477855,
30
- "eval_precision": 0.7083489182429268,
31
- "eval_recall": 0.6942751575705117,
32
- "eval_runtime": 2.0863,
33
- "eval_samples_per_second": 8376.436,
34
- "eval_steps_per_second": 65.666,
35
- "step": 400
36
- },
37
- {
38
- "epoch": 0.55,
39
- "eval_accuracy": 0.7249942778667887,
40
- "eval_f1": 0.7236563504089994,
41
- "eval_loss": 0.5338560342788696,
42
- "eval_matthews_correlation": 0.45288472081496567,
43
- "eval_precision": 0.7284116969707025,
44
- "eval_recall": 0.7244900032133177,
45
- "eval_runtime": 2.0847,
46
- "eval_samples_per_second": 8382.839,
47
- "eval_steps_per_second": 65.716,
48
- "step": 600
49
- },
50
- {
51
- "epoch": 0.73,
52
- "eval_accuracy": 0.727454795147631,
53
- "eval_f1": 0.7271190497286429,
54
- "eval_loss": 0.5313814282417297,
55
- "eval_matthews_correlation": 0.45531538250312886,
56
- "eval_precision": 0.7280930565651094,
57
- "eval_recall": 0.7272231569276124,
58
- "eval_runtime": 2.0843,
59
- "eval_samples_per_second": 8384.453,
60
- "eval_steps_per_second": 65.728,
61
- "step": 800
62
- },
63
- {
64
- "epoch": 0.91,
65
- "eval_accuracy": 0.7303158617532616,
66
- "eval_f1": 0.7275747475999237,
67
- "eval_loss": 0.5319721102714539,
68
- "eval_matthews_correlation": 0.46789560676374625,
69
- "eval_precision": 0.7384098581673617,
70
- "eval_recall": 0.7295692599623227,
71
- "eval_runtime": 2.085,
72
- "eval_samples_per_second": 8381.713,
73
- "eval_steps_per_second": 65.707,
74
- "step": 1000
75
- },
76
- {
77
- "epoch": 1.1,
78
- "eval_accuracy": 0.7380407415884642,
79
- "eval_f1": 0.7380382404292021,
80
- "eval_loss": 0.5272664427757263,
81
- "eval_matthews_correlation": 0.4762546721569884,
82
- "eval_precision": 0.7381435065052877,
83
- "eval_recall": 0.7381111667497053,
84
- "eval_runtime": 2.0861,
85
- "eval_samples_per_second": 8377.25,
86
- "eval_steps_per_second": 65.672,
87
- "step": 1200
88
- },
89
- {
90
- "epoch": 1.28,
91
- "eval_accuracy": 0.7364385442893111,
92
- "eval_f1": 0.736433021138519,
93
- "eval_loss": 0.5285834074020386,
94
- "eval_matthews_correlation": 0.47309527398952067,
95
- "eval_precision": 0.736574658685002,
96
- "eval_recall": 0.7365206183909514,
97
- "eval_runtime": 2.0972,
98
- "eval_samples_per_second": 8332.96,
99
- "eval_steps_per_second": 65.325,
100
- "step": 1400
101
- },
102
- {
103
- "epoch": 1.46,
104
- "eval_accuracy": 0.7356946669718472,
105
- "eval_f1": 0.7355568209971208,
106
- "eval_loss": 0.524845540523529,
107
- "eval_matthews_correlation": 0.47250517415343696,
108
- "eval_precision": 0.7365847965721319,
109
- "eval_recall": 0.73592084406584,
110
- "eval_runtime": 2.0891,
111
- "eval_samples_per_second": 8365.186,
112
- "eval_steps_per_second": 65.577,
113
- "step": 1600
114
- },
115
- {
116
- "epoch": 1.65,
117
- "eval_accuracy": 0.7402723735408561,
118
- "eval_f1": 0.738616418688818,
119
- "eval_loss": 0.5155823230743408,
120
- "eval_matthews_correlation": 0.4849472607211828,
121
- "eval_precision": 0.7452896161327611,
122
- "eval_recall": 0.7396899728051398,
123
- "eval_runtime": 2.0895,
124
- "eval_samples_per_second": 8363.752,
125
- "eval_steps_per_second": 65.566,
126
- "step": 1800
127
- },
128
- {
129
- "epoch": 1.83,
130
- "eval_accuracy": 0.742790112153811,
131
- "eval_f1": 0.7427467082709358,
132
- "eval_loss": 0.5115640163421631,
133
- "eval_matthews_correlation": 0.4855319554250339,
134
- "eval_precision": 0.7427981041740035,
135
- "eval_recall": 0.7427338555019267,
136
- "eval_runtime": 2.0887,
137
- "eval_samples_per_second": 8366.987,
138
- "eval_steps_per_second": 65.592,
139
- "step": 2000
140
- },
141
- {
142
- "epoch": 2.01,
143
- "eval_accuracy": 0.7434195468070497,
144
- "eval_f1": 0.7432712646174982,
145
- "eval_loss": 0.517207145690918,
146
- "eval_matthews_correlation": 0.48806105010385226,
147
- "eval_precision": 0.7444059316502798,
148
- "eval_recall": 0.7436556950767057,
149
- "eval_runtime": 2.087,
150
- "eval_samples_per_second": 8373.626,
151
- "eval_steps_per_second": 65.644,
152
- "step": 2200
153
- },
154
- {
155
- "epoch": 2.2,
156
- "eval_accuracy": 0.7436484321355001,
157
- "eval_f1": 0.7434945812464957,
158
- "eval_loss": 0.5187065601348877,
159
- "eval_matthews_correlation": 0.4873991502151912,
160
- "eval_precision": 0.7438979101569678,
161
- "eval_recall": 0.7435014013420647,
162
- "eval_runtime": 2.0875,
163
- "eval_samples_per_second": 8371.903,
164
- "eval_steps_per_second": 65.63,
165
- "step": 2400
166
- },
167
- {
168
- "epoch": 2.38,
169
- "eval_accuracy": 0.7444495307850767,
170
- "eval_f1": 0.7444090259561158,
171
- "eval_loss": 0.5209320187568665,
172
- "eval_matthews_correlation": 0.48944325343888784,
173
- "eval_precision": 0.7448479511164695,
174
- "eval_recall": 0.7445953674969612,
175
- "eval_runtime": 2.0891,
176
- "eval_samples_per_second": 8365.525,
177
- "eval_steps_per_second": 65.58,
178
- "step": 2600
179
- },
180
- {
181
- "epoch": 2.56,
182
- "eval_accuracy": 0.7459372854200046,
183
- "eval_f1": 0.7451259594798789,
184
- "eval_loss": 0.5175760984420776,
185
- "eval_matthews_correlation": 0.49378781361272983,
186
- "eval_precision": 0.748257049515006,
187
- "eval_recall": 0.7455382489125468,
188
- "eval_runtime": 2.0886,
189
- "eval_samples_per_second": 8367.432,
190
- "eval_steps_per_second": 65.595,
191
- "step": 2800
192
- },
193
- {
194
- "epoch": 2.74,
195
- "eval_accuracy": 0.7489127946898604,
196
- "eval_f1": 0.7482889784661584,
197
- "eval_loss": 0.515264630317688,
198
- "eval_matthews_correlation": 0.4992191497597552,
199
- "eval_precision": 0.7506571663528661,
200
- "eval_recall": 0.7485663616894263,
201
- "eval_runtime": 2.0902,
202
- "eval_samples_per_second": 8360.847,
203
- "eval_steps_per_second": 65.543,
204
- "step": 3000
205
- },
206
- {
207
- "epoch": 2.93,
208
- "eval_accuracy": 0.7493133440146487,
209
- "eval_f1": 0.7492244662983006,
210
- "eval_loss": 0.5168305039405823,
211
- "eval_matthews_correlation": 0.4986280479962233,
212
- "eval_precision": 0.7494166877586124,
213
- "eval_recall": 0.7492114024956003,
214
- "eval_runtime": 2.0916,
215
- "eval_samples_per_second": 8355.395,
216
- "eval_steps_per_second": 65.501,
217
- "step": 3200
218
- }
219
- ],
220
- "logging_steps": 100000,
221
- "max_steps": 3279,
222
- "num_train_epochs": 3,
223
- "save_steps": 200,
224
- "total_flos": 2.103860738916352e+16,
225
- "trial_name": null,
226
- "trial_params": null
227
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
human_ocr_ensembl/base_3072/checkpoint-3200/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:074c5d4b16f394462a2ed140114255544a90f5962dd86dcd667bda481a83bcaa
3
- size 5265
 
 
 
 
human_ocr_ensembl/base_3072/results/base_3072_hg38_BPE_3e-5_human_ocr_ensembl_seed42/eval_results.json DELETED
@@ -1 +0,0 @@
1
- {"eval_loss": 0.5223153829574585, "eval_accuracy": 0.7482833600366217, "eval_f1": 0.7481930284444026, "eval_matthews_correlation": 0.49715136303222757, "eval_precision": 0.7487887865364967, "eval_recall": 0.7483627590351063, "eval_runtime": 2.084, "eval_samples_per_second": 8385.667, "eval_steps_per_second": 65.738, "epoch": 3.0}
 
 
human_ocr_ensembl/base_4096/checkpoint-2800/config.json DELETED
@@ -1,27 +0,0 @@
1
- {
2
- "_name_or_path": "/root/NaN/dna-tokenizer/pretrain/models/base_4096/checkpoint-100000",
3
- "architectures": [
4
- "BertForSequenceClassification"
5
- ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "classifier_dropout": null,
8
- "hidden_act": "gelu",
9
- "hidden_dropout_prob": 0.1,
10
- "hidden_size": 768,
11
- "initializer_range": 0.02,
12
- "intermediate_size": 3072,
13
- "layer_norm_eps": 1e-12,
14
- "max_length": 512,
15
- "max_position_embeddings": 512,
16
- "model_type": "bert",
17
- "num_attention_heads": 12,
18
- "num_hidden_layers": 12,
19
- "pad_token_id": 0,
20
- "position_embedding_type": "absolute",
21
- "problem_type": "single_label_classification",
22
- "torch_dtype": "float32",
23
- "transformers_version": "4.35.2",
24
- "type_vocab_size": 2,
25
- "use_cache": true,
26
- "vocab_size": 4096
27
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
human_ocr_ensembl/base_4096/checkpoint-2800/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e890440bca514d6ea88df2adca78949197902e2646df8c9c24ae2c7c7360c791
3
- size 356777880
 
 
 
 
human_ocr_ensembl/base_4096/checkpoint-2800/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4609e64e050219e62593afb2f9fd4b1090006128d6ca4931df68ea714de8845
3
- size 713677451
 
 
 
 
human_ocr_ensembl/base_4096/checkpoint-2800/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5eb4f754a0414db80041f9f91e8cbf397684de0d3ca59ef63abd1e17e5e76c1f
3
- size 14709
 
 
 
 
human_ocr_ensembl/base_4096/checkpoint-2800/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f48bffb25980b1ddc9943acd0f93ae0e40f503ed6b7b3e2f666408c6a8e8fc33
3
- size 1465
 
 
 
 
human_ocr_ensembl/base_4096/checkpoint-2800/special_tokens_map.json DELETED
@@ -1,7 +0,0 @@
1
- {
2
- "cls_token": "[CLS]",
3
- "mask_token": "[MASK]",
4
- "pad_token": "[PAD]",
5
- "sep_token": "[SEP]",
6
- "unk_token": "[UNK]"
7
- }
 
 
 
 
 
 
 
 
human_ocr_ensembl/base_4096/checkpoint-2800/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
human_ocr_ensembl/base_4096/checkpoint-2800/tokenizer_config.json DELETED
@@ -1,56 +0,0 @@
1
- {
2
- "added_tokens_decoder": {
3
- "0": {
4
- "content": "[PAD]",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false,
9
- "special": true
10
- },
11
- "1": {
12
- "content": "[UNK]",
13
- "lstrip": false,
14
- "normalized": false,
15
- "rstrip": false,
16
- "single_word": false,
17
- "special": true
18
- },
19
- "2": {
20
- "content": "[CLS]",
21
- "lstrip": false,
22
- "normalized": false,
23
- "rstrip": false,
24
- "single_word": false,
25
- "special": true
26
- },
27
- "3": {
28
- "content": "[SEP]",
29
- "lstrip": false,
30
- "normalized": false,
31
- "rstrip": false,
32
- "single_word": false,
33
- "special": true
34
- },
35
- "4": {
36
- "content": "[MASK]",
37
- "lstrip": false,
38
- "normalized": false,
39
- "rstrip": false,
40
- "single_word": false,
41
- "special": true
42
- }
43
- },
44
- "cache_dir": null,
45
- "clean_up_tokenization_spaces": true,
46
- "cls_token": "[CLS]",
47
- "mask_token": "[MASK]",
48
- "model_max_length": 100,
49
- "pad_token": "[PAD]",
50
- "padding_side": "right",
51
- "sep_token": "[SEP]",
52
- "tokenizer_class": "PreTrainedTokenizerFast",
53
- "trust_remote_code": true,
54
- "unk_token": "[UNK]",
55
- "use_fast": true
56
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
human_ocr_ensembl/base_4096/checkpoint-2800/trainer_state.json DELETED
@@ -1,201 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 2.5617566331198534,
5
- "eval_steps": 200,
6
- "global_step": 2800,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.18,
13
- "eval_accuracy": 0.6910048065918974,
14
- "eval_f1": 0.6867953797121131,
15
- "eval_loss": 0.5721011161804199,
16
- "eval_matthews_correlation": 0.39016412760658553,
17
- "eval_precision": 0.7001605908873203,
18
- "eval_recall": 0.6901323904423126,
19
- "eval_runtime": 2.1037,
20
- "eval_samples_per_second": 8307.24,
21
- "eval_steps_per_second": 65.123,
22
- "step": 200
23
- },
24
- {
25
- "epoch": 0.37,
26
- "eval_accuracy": 0.696955825131609,
27
- "eval_f1": 0.6937934283608221,
28
- "eval_loss": 0.5714722275733948,
29
- "eval_matthews_correlation": 0.40471588399833497,
30
- "eval_precision": 0.7070190047052158,
31
- "eval_recall": 0.6978018237912373,
32
- "eval_runtime": 2.1035,
33
- "eval_samples_per_second": 8308.029,
34
- "eval_steps_per_second": 65.129,
35
- "step": 400
36
- },
37
- {
38
- "epoch": 0.55,
39
- "eval_accuracy": 0.7172121766994736,
40
- "eval_f1": 0.7141046145807004,
41
- "eval_loss": 0.5357879400253296,
42
- "eval_matthews_correlation": 0.4418363941437564,
43
- "eval_precision": 0.7254957397099481,
44
- "eval_recall": 0.7164335781255389,
45
- "eval_runtime": 2.0923,
46
- "eval_samples_per_second": 8352.684,
47
- "eval_steps_per_second": 65.479,
48
- "step": 600
49
- },
50
- {
51
- "epoch": 0.73,
52
- "eval_accuracy": 0.7287708857862211,
53
- "eval_f1": 0.7276482683515697,
54
- "eval_loss": 0.5328035950660706,
55
- "eval_matthews_correlation": 0.45995641554970856,
56
- "eval_precision": 0.7316592914223471,
57
- "eval_recall": 0.7283093232591662,
58
- "eval_runtime": 2.093,
59
- "eval_samples_per_second": 8349.865,
60
- "eval_steps_per_second": 65.457,
61
- "step": 800
62
- },
63
- {
64
- "epoch": 0.91,
65
- "eval_accuracy": 0.7316319523918516,
66
- "eval_f1": 0.7297143679036964,
67
- "eval_loss": 0.5285016298294067,
68
- "eval_matthews_correlation": 0.4680835263721031,
69
- "eval_precision": 0.7371108407581257,
70
- "eval_recall": 0.7310124106519102,
71
- "eval_runtime": 2.0907,
72
- "eval_samples_per_second": 8358.782,
73
- "eval_steps_per_second": 65.527,
74
- "step": 1000
75
- },
76
- {
77
- "epoch": 1.1,
78
- "eval_accuracy": 0.7360952162966354,
79
- "eval_f1": 0.7360730935237626,
80
- "eval_loss": 0.5258046388626099,
81
- "eval_matthews_correlation": 0.47257442532896854,
82
- "eval_precision": 0.7363610290787824,
83
- "eval_recall": 0.7362134193033224,
84
- "eval_runtime": 2.0907,
85
- "eval_samples_per_second": 8359.004,
86
- "eval_steps_per_second": 65.529,
87
- "step": 1200
88
- },
89
- {
90
- "epoch": 1.28,
91
- "eval_accuracy": 0.7324902723735408,
92
- "eval_f1": 0.7320882177333161,
93
- "eval_loss": 0.5314179062843323,
94
- "eval_matthews_correlation": 0.4673756340020734,
95
- "eval_precision": 0.7345368354436258,
96
- "eval_recall": 0.7328418719874656,
97
- "eval_runtime": 2.0874,
98
- "eval_samples_per_second": 8372.02,
99
- "eval_steps_per_second": 65.631,
100
- "step": 1400
101
- },
102
- {
103
- "epoch": 1.46,
104
- "eval_accuracy": 0.7369535362783246,
105
- "eval_f1": 0.7369259368540753,
106
- "eval_loss": 0.5230308175086975,
107
- "eval_matthews_correlation": 0.47433705732533343,
108
- "eval_precision": 0.7372567115919358,
109
- "eval_recall": 0.7370803785089889,
110
- "eval_runtime": 2.0869,
111
- "eval_samples_per_second": 8373.996,
112
- "eval_steps_per_second": 65.646,
113
- "step": 1600
114
- },
115
- {
116
- "epoch": 1.65,
117
- "eval_accuracy": 0.7384985122453651,
118
- "eval_f1": 0.7364573406181625,
119
- "eval_loss": 0.5120736956596375,
120
- "eval_matthews_correlation": 0.4825524030233992,
121
- "eval_precision": 0.7447521947091545,
122
- "eval_recall": 0.7378495746895823,
123
- "eval_runtime": 2.087,
124
- "eval_samples_per_second": 8373.798,
125
- "eval_steps_per_second": 65.645,
126
- "step": 1800
127
- },
128
- {
129
- "epoch": 1.83,
130
- "eval_accuracy": 0.7433051041428245,
131
- "eval_f1": 0.7430220511355246,
132
- "eval_loss": 0.5135318636894226,
133
- "eval_matthews_correlation": 0.48699703656933685,
134
- "eval_precision": 0.7439083588851283,
135
- "eval_recall": 0.7430893663416968,
136
- "eval_runtime": 2.0857,
137
- "eval_samples_per_second": 8379.027,
138
- "eval_steps_per_second": 65.686,
139
- "step": 2000
140
- },
141
- {
142
- "epoch": 2.01,
143
- "eval_accuracy": 0.7444495307850767,
144
- "eval_f1": 0.7440317856809506,
145
- "eval_loss": 0.5141501426696777,
146
- "eval_matthews_correlation": 0.4915518994120194,
147
- "eval_precision": 0.74674069083228,
148
- "eval_recall": 0.7448149806589922,
149
- "eval_runtime": 2.0881,
150
- "eval_samples_per_second": 8369.321,
151
- "eval_steps_per_second": 65.61,
152
- "step": 2200
153
- },
154
- {
155
- "epoch": 2.2,
156
- "eval_accuracy": 0.7462233920805676,
157
- "eval_f1": 0.7462187179801155,
158
- "eval_loss": 0.5196935534477234,
159
- "eval_matthews_correlation": 0.49265917852699986,
160
- "eval_precision": 0.7463553642599894,
161
- "eval_recall": 0.7463038169637264,
162
- "eval_runtime": 2.0917,
163
- "eval_samples_per_second": 8354.739,
164
- "eval_steps_per_second": 65.495,
165
- "step": 2400
166
- },
167
- {
168
- "epoch": 2.38,
169
- "eval_accuracy": 0.7490272373540856,
170
- "eval_f1": 0.7490262874013512,
171
- "eval_loss": 0.5187999606132507,
172
- "eval_matthews_correlation": 0.4981056299811567,
173
- "eval_precision": 0.7490470478354829,
174
- "eval_recall": 0.7490585822792231,
175
- "eval_runtime": 2.092,
176
- "eval_samples_per_second": 8353.748,
177
- "eval_steps_per_second": 65.488,
178
- "step": 2600
179
- },
180
- {
181
- "epoch": 2.56,
182
- "eval_accuracy": 0.7502288853284504,
183
- "eval_f1": 0.7500893125061421,
184
- "eval_loss": 0.514468789100647,
185
- "eval_matthews_correlation": 0.5005505338315976,
186
- "eval_precision": 0.7504616294280523,
187
- "eval_recall": 0.7500890430714365,
188
- "eval_runtime": 2.0929,
189
- "eval_samples_per_second": 8350.285,
190
- "eval_steps_per_second": 65.461,
191
- "step": 2800
192
- }
193
- ],
194
- "logging_steps": 100000,
195
- "max_steps": 3279,
196
- "num_train_epochs": 3,
197
- "save_steps": 200,
198
- "total_flos": 1.8407496733425664e+16,
199
- "trial_name": null,
200
- "trial_params": null
201
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
human_ocr_ensembl/base_4096/checkpoint-2800/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ea21f800f5b90d2324bd49cc200761eb2aaa74c2eb1b302723e6327f540d7b3
3
- size 5265
 
 
 
 
human_ocr_ensembl/base_4096/checkpoint-3000/config.json DELETED
@@ -1,27 +0,0 @@
1
- {
2
- "_name_or_path": "/root/NaN/dna-tokenizer/pretrain/models/base_4096/checkpoint-100000",
3
- "architectures": [
4
- "BertForSequenceClassification"
5
- ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "classifier_dropout": null,
8
- "hidden_act": "gelu",
9
- "hidden_dropout_prob": 0.1,
10
- "hidden_size": 768,
11
- "initializer_range": 0.02,
12
- "intermediate_size": 3072,
13
- "layer_norm_eps": 1e-12,
14
- "max_length": 512,
15
- "max_position_embeddings": 512,
16
- "model_type": "bert",
17
- "num_attention_heads": 12,
18
- "num_hidden_layers": 12,
19
- "pad_token_id": 0,
20
- "position_embedding_type": "absolute",
21
- "problem_type": "single_label_classification",
22
- "torch_dtype": "float32",
23
- "transformers_version": "4.35.2",
24
- "type_vocab_size": 2,
25
- "use_cache": true,
26
- "vocab_size": 4096
27
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
human_ocr_ensembl/base_4096/checkpoint-3000/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a17ad1ae66605310be0b64109c296d64e804953467c9a7a5fb81334ce4e9d131
3
- size 356777880
 
 
 
 
human_ocr_ensembl/base_4096/checkpoint-3000/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c4dc4c177139592facc29e9d4c1059ebc3e2731d9f7898c71201a17b26bbc23
3
- size 713677451
 
 
 
 
human_ocr_ensembl/base_4096/checkpoint-3000/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:45efb8dc53020d0c80c0b48b34097b0336f1926c50380ac1b67ecece13fd0e52
3
- size 14709
 
 
 
 
human_ocr_ensembl/base_4096/checkpoint-3000/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e80689286bea2a0662cb87fde9669f458e4a8e334c380b87fbb028f5a928a685
3
- size 1465
 
 
 
 
human_ocr_ensembl/base_4096/checkpoint-3000/special_tokens_map.json DELETED
@@ -1,7 +0,0 @@
1
- {
2
- "cls_token": "[CLS]",
3
- "mask_token": "[MASK]",
4
- "pad_token": "[PAD]",
5
- "sep_token": "[SEP]",
6
- "unk_token": "[UNK]"
7
- }
 
 
 
 
 
 
 
 
human_ocr_ensembl/base_4096/checkpoint-3000/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
human_ocr_ensembl/base_4096/checkpoint-3000/tokenizer_config.json DELETED
@@ -1,56 +0,0 @@
1
- {
2
- "added_tokens_decoder": {
3
- "0": {
4
- "content": "[PAD]",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false,
9
- "special": true
10
- },
11
- "1": {
12
- "content": "[UNK]",
13
- "lstrip": false,
14
- "normalized": false,
15
- "rstrip": false,
16
- "single_word": false,
17
- "special": true
18
- },
19
- "2": {
20
- "content": "[CLS]",
21
- "lstrip": false,
22
- "normalized": false,
23
- "rstrip": false,
24
- "single_word": false,
25
- "special": true
26
- },
27
- "3": {
28
- "content": "[SEP]",
29
- "lstrip": false,
30
- "normalized": false,
31
- "rstrip": false,
32
- "single_word": false,
33
- "special": true
34
- },
35
- "4": {
36
- "content": "[MASK]",
37
- "lstrip": false,
38
- "normalized": false,
39
- "rstrip": false,
40
- "single_word": false,
41
- "special": true
42
- }
43
- },
44
- "cache_dir": null,
45
- "clean_up_tokenization_spaces": true,
46
- "cls_token": "[CLS]",
47
- "mask_token": "[MASK]",
48
- "model_max_length": 100,
49
- "pad_token": "[PAD]",
50
- "padding_side": "right",
51
- "sep_token": "[SEP]",
52
- "tokenizer_class": "PreTrainedTokenizerFast",
53
- "trust_remote_code": true,
54
- "unk_token": "[UNK]",
55
- "use_fast": true
56
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
human_ocr_ensembl/base_4096/checkpoint-3000/trainer_state.json DELETED
@@ -1,214 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 2.7447392497712717,
5
- "eval_steps": 200,
6
- "global_step": 3000,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.18,
13
- "eval_accuracy": 0.6910048065918974,
14
- "eval_f1": 0.6867953797121131,
15
- "eval_loss": 0.5721011161804199,
16
- "eval_matthews_correlation": 0.39016412760658553,
17
- "eval_precision": 0.7001605908873203,
18
- "eval_recall": 0.6901323904423126,
19
- "eval_runtime": 2.1037,
20
- "eval_samples_per_second": 8307.24,
21
- "eval_steps_per_second": 65.123,
22
- "step": 200
23
- },
24
- {
25
- "epoch": 0.37,
26
- "eval_accuracy": 0.696955825131609,
27
- "eval_f1": 0.6937934283608221,
28
- "eval_loss": 0.5714722275733948,
29
- "eval_matthews_correlation": 0.40471588399833497,
30
- "eval_precision": 0.7070190047052158,
31
- "eval_recall": 0.6978018237912373,
32
- "eval_runtime": 2.1035,
33
- "eval_samples_per_second": 8308.029,
34
- "eval_steps_per_second": 65.129,
35
- "step": 400
36
- },
37
- {
38
- "epoch": 0.55,
39
- "eval_accuracy": 0.7172121766994736,
40
- "eval_f1": 0.7141046145807004,
41
- "eval_loss": 0.5357879400253296,
42
- "eval_matthews_correlation": 0.4418363941437564,
43
- "eval_precision": 0.7254957397099481,
44
- "eval_recall": 0.7164335781255389,
45
- "eval_runtime": 2.0923,
46
- "eval_samples_per_second": 8352.684,
47
- "eval_steps_per_second": 65.479,
48
- "step": 600
49
- },
50
- {
51
- "epoch": 0.73,
52
- "eval_accuracy": 0.7287708857862211,
53
- "eval_f1": 0.7276482683515697,
54
- "eval_loss": 0.5328035950660706,
55
- "eval_matthews_correlation": 0.45995641554970856,
56
- "eval_precision": 0.7316592914223471,
57
- "eval_recall": 0.7283093232591662,
58
- "eval_runtime": 2.093,
59
- "eval_samples_per_second": 8349.865,
60
- "eval_steps_per_second": 65.457,
61
- "step": 800
62
- },
63
- {
64
- "epoch": 0.91,
65
- "eval_accuracy": 0.7316319523918516,
66
- "eval_f1": 0.7297143679036964,
67
- "eval_loss": 0.5285016298294067,
68
- "eval_matthews_correlation": 0.4680835263721031,
69
- "eval_precision": 0.7371108407581257,
70
- "eval_recall": 0.7310124106519102,
71
- "eval_runtime": 2.0907,
72
- "eval_samples_per_second": 8358.782,
73
- "eval_steps_per_second": 65.527,
74
- "step": 1000
75
- },
76
- {
77
- "epoch": 1.1,
78
- "eval_accuracy": 0.7360952162966354,
79
- "eval_f1": 0.7360730935237626,
80
- "eval_loss": 0.5258046388626099,
81
- "eval_matthews_correlation": 0.47257442532896854,
82
- "eval_precision": 0.7363610290787824,
83
- "eval_recall": 0.7362134193033224,
84
- "eval_runtime": 2.0907,
85
- "eval_samples_per_second": 8359.004,
86
- "eval_steps_per_second": 65.529,
87
- "step": 1200
88
- },
89
- {
90
- "epoch": 1.28,
91
- "eval_accuracy": 0.7324902723735408,
92
- "eval_f1": 0.7320882177333161,
93
- "eval_loss": 0.5314179062843323,
94
- "eval_matthews_correlation": 0.4673756340020734,
95
- "eval_precision": 0.7345368354436258,
96
- "eval_recall": 0.7328418719874656,
97
- "eval_runtime": 2.0874,
98
- "eval_samples_per_second": 8372.02,
99
- "eval_steps_per_second": 65.631,
100
- "step": 1400
101
- },
102
- {
103
- "epoch": 1.46,
104
- "eval_accuracy": 0.7369535362783246,
105
- "eval_f1": 0.7369259368540753,
106
- "eval_loss": 0.5230308175086975,
107
- "eval_matthews_correlation": 0.47433705732533343,
108
- "eval_precision": 0.7372567115919358,
109
- "eval_recall": 0.7370803785089889,
110
- "eval_runtime": 2.0869,
111
- "eval_samples_per_second": 8373.996,
112
- "eval_steps_per_second": 65.646,
113
- "step": 1600
114
- },
115
- {
116
- "epoch": 1.65,
117
- "eval_accuracy": 0.7384985122453651,
118
- "eval_f1": 0.7364573406181625,
119
- "eval_loss": 0.5120736956596375,
120
- "eval_matthews_correlation": 0.4825524030233992,
121
- "eval_precision": 0.7447521947091545,
122
- "eval_recall": 0.7378495746895823,
123
- "eval_runtime": 2.087,
124
- "eval_samples_per_second": 8373.798,
125
- "eval_steps_per_second": 65.645,
126
- "step": 1800
127
- },
128
- {
129
- "epoch": 1.83,
130
- "eval_accuracy": 0.7433051041428245,
131
- "eval_f1": 0.7430220511355246,
132
- "eval_loss": 0.5135318636894226,
133
- "eval_matthews_correlation": 0.48699703656933685,
134
- "eval_precision": 0.7439083588851283,
135
- "eval_recall": 0.7430893663416968,
136
- "eval_runtime": 2.0857,
137
- "eval_samples_per_second": 8379.027,
138
- "eval_steps_per_second": 65.686,
139
- "step": 2000
140
- },
141
- {
142
- "epoch": 2.01,
143
- "eval_accuracy": 0.7444495307850767,
144
- "eval_f1": 0.7440317856809506,
145
- "eval_loss": 0.5141501426696777,
146
- "eval_matthews_correlation": 0.4915518994120194,
147
- "eval_precision": 0.74674069083228,
148
- "eval_recall": 0.7448149806589922,
149
- "eval_runtime": 2.0881,
150
- "eval_samples_per_second": 8369.321,
151
- "eval_steps_per_second": 65.61,
152
- "step": 2200
153
- },
154
- {
155
- "epoch": 2.2,
156
- "eval_accuracy": 0.7462233920805676,
157
- "eval_f1": 0.7462187179801155,
158
- "eval_loss": 0.5196935534477234,
159
- "eval_matthews_correlation": 0.49265917852699986,
160
- "eval_precision": 0.7463553642599894,
161
- "eval_recall": 0.7463038169637264,
162
- "eval_runtime": 2.0917,
163
- "eval_samples_per_second": 8354.739,
164
- "eval_steps_per_second": 65.495,
165
- "step": 2400
166
- },
167
- {
168
- "epoch": 2.38,
169
- "eval_accuracy": 0.7490272373540856,
170
- "eval_f1": 0.7490262874013512,
171
- "eval_loss": 0.5187999606132507,
172
- "eval_matthews_correlation": 0.4981056299811567,
173
- "eval_precision": 0.7490470478354829,
174
- "eval_recall": 0.7490585822792231,
175
- "eval_runtime": 2.092,
176
- "eval_samples_per_second": 8353.748,
177
- "eval_steps_per_second": 65.488,
178
- "step": 2600
179
- },
180
- {
181
- "epoch": 2.56,
182
- "eval_accuracy": 0.7502288853284504,
183
- "eval_f1": 0.7500893125061421,
184
- "eval_loss": 0.514468789100647,
185
- "eval_matthews_correlation": 0.5005505338315976,
186
- "eval_precision": 0.7504616294280523,
187
- "eval_recall": 0.7500890430714365,
188
- "eval_runtime": 2.0929,
189
- "eval_samples_per_second": 8350.285,
190
- "eval_steps_per_second": 65.461,
191
- "step": 2800
192
- },
193
- {
194
- "epoch": 2.74,
195
- "eval_accuracy": 0.7515449759670405,
196
- "eval_f1": 0.7511901042005492,
197
- "eval_loss": 0.5151657462120056,
198
- "eval_matthews_correlation": 0.5037250185775379,
199
- "eval_precision": 0.7524327048528103,
200
- "eval_recall": 0.7512936016837559,
201
- "eval_runtime": 2.0902,
202
- "eval_samples_per_second": 8360.747,
203
- "eval_steps_per_second": 65.543,
204
- "step": 3000
205
- }
206
- ],
207
- "logging_steps": 100000,
208
- "max_steps": 3279,
209
- "num_train_epochs": 3,
210
- "save_steps": 200,
211
- "total_flos": 1.972305206129459e+16,
212
- "trial_name": null,
213
- "trial_params": null
214
- }