samtuckervegan commited on
Commit
cd4e30a
·
verified ·
1 Parent(s): 76cd93d

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ tags:
4
+ - autotrain
5
+ - text-regression
6
+ base_model: allenai/longformer-base-4096
7
+ widget:
8
+ - text: "I love AutoTrain"
9
+ datasets:
10
+ - samtuckervegan/text_performance
11
+ ---
12
+
13
+ # Model Trained Using AutoTrain
14
+
15
+ - Problem type: Text Regression
16
+
17
+ ## Validation Metrics
18
+ loss: 0.03396870195865631
19
+
20
+ mse: 0.03396843746304512
21
+
22
+ mae: 0.14112502336502075
23
+
24
+ r2: 0.2725181579589844
25
+
26
+ rmse: 0.18430528332916862
27
+
28
+ explained_variance: 0.273209810256958
checkpoint-7374/config.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "allenai/longformer-base-4096",
3
+ "_num_labels": 1,
4
+ "architectures": [
5
+ "LongformerForSequenceClassification"
6
+ ],
7
+ "attention_mode": "longformer",
8
+ "attention_probs_dropout_prob": 0.1,
9
+ "attention_window": [
10
+ 512,
11
+ 512,
12
+ 512,
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512,
20
+ 512,
21
+ 512
22
+ ],
23
+ "bos_token_id": 0,
24
+ "eos_token_id": 2,
25
+ "gradient_checkpointing": false,
26
+ "hidden_act": "gelu",
27
+ "hidden_dropout_prob": 0.1,
28
+ "hidden_size": 768,
29
+ "id2label": {
30
+ "0": "target"
31
+ },
32
+ "ignore_attention_mask": false,
33
+ "initializer_range": 0.02,
34
+ "intermediate_size": 3072,
35
+ "label2id": {
36
+ "target": 0
37
+ },
38
+ "layer_norm_eps": 1e-05,
39
+ "max_position_embeddings": 4098,
40
+ "model_type": "longformer",
41
+ "num_attention_heads": 12,
42
+ "num_hidden_layers": 12,
43
+ "onnx_export": false,
44
+ "pad_token_id": 1,
45
+ "problem_type": "regression",
46
+ "sep_token_id": 2,
47
+ "torch_dtype": "float32",
48
+ "transformers_version": "4.48.0",
49
+ "type_vocab_size": 1,
50
+ "vocab_size": 50265
51
+ }
checkpoint-7374/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9242fc9cfb1524a85761f4f00c1b4ff8956fb34fb58246044b0f95b84eb92733
3
+ size 594675108
checkpoint-7374/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7aad989632e7911c595b9e247281c744cbc55dc21b17d47ccb9938d793f4ccb9
3
+ size 1189510202
checkpoint-7374/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b34b08ccbb3da22551dde002057c59106d0ab0be2e59f3c5fc70f8d7c2bc8ca
3
+ size 15024
checkpoint-7374/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17d49d11bbb7b0dae6eaa111a9d082af395d3eebdb2f514f692df6e982aa7bf3
3
+ size 15024
checkpoint-7374/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9722302a3709aaaafc5512910e8e745ab2112262187e1023f792897b6426dfe5
3
+ size 15024
checkpoint-7374/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ef783c4497f7e80f3002a7e0e0113ff3604fff25889ca37045e7e49c7c867c1
3
+ size 15024
checkpoint-7374/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:882b9e52894f599097d083730fb17de039c02dd7e0c5be59a4f3eebc3827e542
3
+ size 1064
checkpoint-7374/trainer_state.json ADDED
@@ -0,0 +1,2139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.03396870195865631,
3
+ "best_model_checkpoint": "text-performance-longformer/checkpoint-7374",
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 7374,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.01017087062652563,
13
+ "grad_norm": 13.638067245483398,
14
+ "learning_rate": 1.6260162601626018e-06,
15
+ "loss": 0.3817,
16
+ "step": 25
17
+ },
18
+ {
19
+ "epoch": 0.02034174125305126,
20
+ "grad_norm": 6.143240928649902,
21
+ "learning_rate": 3.3197831978319785e-06,
22
+ "loss": 0.2287,
23
+ "step": 50
24
+ },
25
+ {
26
+ "epoch": 0.030512611879576892,
27
+ "grad_norm": 1.5986428260803223,
28
+ "learning_rate": 5.013550135501355e-06,
29
+ "loss": 0.0691,
30
+ "step": 75
31
+ },
32
+ {
33
+ "epoch": 0.04068348250610252,
34
+ "grad_norm": 0.9884291887283325,
35
+ "learning_rate": 6.707317073170733e-06,
36
+ "loss": 0.0543,
37
+ "step": 100
38
+ },
39
+ {
40
+ "epoch": 0.050854353132628156,
41
+ "grad_norm": 3.724142074584961,
42
+ "learning_rate": 8.401084010840109e-06,
43
+ "loss": 0.0619,
44
+ "step": 125
45
+ },
46
+ {
47
+ "epoch": 0.061025223759153785,
48
+ "grad_norm": 2.291118860244751,
49
+ "learning_rate": 1.0094850948509485e-05,
50
+ "loss": 0.0523,
51
+ "step": 150
52
+ },
53
+ {
54
+ "epoch": 0.07119609438567942,
55
+ "grad_norm": 2.3093833923339844,
56
+ "learning_rate": 1.1788617886178862e-05,
57
+ "loss": 0.0541,
58
+ "step": 175
59
+ },
60
+ {
61
+ "epoch": 0.08136696501220504,
62
+ "grad_norm": 3.3519299030303955,
63
+ "learning_rate": 1.348238482384824e-05,
64
+ "loss": 0.0515,
65
+ "step": 200
66
+ },
67
+ {
68
+ "epoch": 0.09153783563873068,
69
+ "grad_norm": 2.0906357765197754,
70
+ "learning_rate": 1.5176151761517615e-05,
71
+ "loss": 0.0529,
72
+ "step": 225
73
+ },
74
+ {
75
+ "epoch": 0.10170870626525631,
76
+ "grad_norm": 1.1683790683746338,
77
+ "learning_rate": 1.6802168021680217e-05,
78
+ "loss": 0.0573,
79
+ "step": 250
80
+ },
81
+ {
82
+ "epoch": 0.11187957689178193,
83
+ "grad_norm": 1.7971270084381104,
84
+ "learning_rate": 1.8495934959349594e-05,
85
+ "loss": 0.0495,
86
+ "step": 275
87
+ },
88
+ {
89
+ "epoch": 0.12205044751830757,
90
+ "grad_norm": 3.8092596530914307,
91
+ "learning_rate": 2.018970189701897e-05,
92
+ "loss": 0.0462,
93
+ "step": 300
94
+ },
95
+ {
96
+ "epoch": 0.1322213181448332,
97
+ "grad_norm": 2.3589837551116943,
98
+ "learning_rate": 2.1883468834688347e-05,
99
+ "loss": 0.044,
100
+ "step": 325
101
+ },
102
+ {
103
+ "epoch": 0.14239218877135884,
104
+ "grad_norm": 1.4069643020629883,
105
+ "learning_rate": 2.3577235772357724e-05,
106
+ "loss": 0.0458,
107
+ "step": 350
108
+ },
109
+ {
110
+ "epoch": 0.15256305939788445,
111
+ "grad_norm": 0.8279024362564087,
112
+ "learning_rate": 2.5271002710027104e-05,
113
+ "loss": 0.0457,
114
+ "step": 375
115
+ },
116
+ {
117
+ "epoch": 0.16273393002441008,
118
+ "grad_norm": 0.7554256916046143,
119
+ "learning_rate": 2.696476964769648e-05,
120
+ "loss": 0.0431,
121
+ "step": 400
122
+ },
123
+ {
124
+ "epoch": 0.17290480065093572,
125
+ "grad_norm": 0.7221837043762207,
126
+ "learning_rate": 2.8658536585365854e-05,
127
+ "loss": 0.0455,
128
+ "step": 425
129
+ },
130
+ {
131
+ "epoch": 0.18307567127746135,
132
+ "grad_norm": 1.551680088043213,
133
+ "learning_rate": 3.035230352303523e-05,
134
+ "loss": 0.0443,
135
+ "step": 450
136
+ },
137
+ {
138
+ "epoch": 0.193246541903987,
139
+ "grad_norm": 1.2256766557693481,
140
+ "learning_rate": 3.204607046070461e-05,
141
+ "loss": 0.0423,
142
+ "step": 475
143
+ },
144
+ {
145
+ "epoch": 0.20341741253051263,
146
+ "grad_norm": 1.9404680728912354,
147
+ "learning_rate": 3.373983739837399e-05,
148
+ "loss": 0.0454,
149
+ "step": 500
150
+ },
151
+ {
152
+ "epoch": 0.21358828315703823,
153
+ "grad_norm": 1.481037974357605,
154
+ "learning_rate": 3.5433604336043364e-05,
155
+ "loss": 0.0549,
156
+ "step": 525
157
+ },
158
+ {
159
+ "epoch": 0.22375915378356387,
160
+ "grad_norm": 1.0439728498458862,
161
+ "learning_rate": 3.712737127371274e-05,
162
+ "loss": 0.0474,
163
+ "step": 550
164
+ },
165
+ {
166
+ "epoch": 0.2339300244100895,
167
+ "grad_norm": 2.524623394012451,
168
+ "learning_rate": 3.882113821138211e-05,
169
+ "loss": 0.0442,
170
+ "step": 575
171
+ },
172
+ {
173
+ "epoch": 0.24410089503661514,
174
+ "grad_norm": 0.46617817878723145,
175
+ "learning_rate": 4.051490514905149e-05,
176
+ "loss": 0.0416,
177
+ "step": 600
178
+ },
179
+ {
180
+ "epoch": 0.25427176566314075,
181
+ "grad_norm": 0.9644585251808167,
182
+ "learning_rate": 4.220867208672087e-05,
183
+ "loss": 0.0459,
184
+ "step": 625
185
+ },
186
+ {
187
+ "epoch": 0.2644426362896664,
188
+ "grad_norm": 0.7916778922080994,
189
+ "learning_rate": 4.390243902439025e-05,
190
+ "loss": 0.0453,
191
+ "step": 650
192
+ },
193
+ {
194
+ "epoch": 0.274613506916192,
195
+ "grad_norm": 0.41167861223220825,
196
+ "learning_rate": 4.5596205962059624e-05,
197
+ "loss": 0.0447,
198
+ "step": 675
199
+ },
200
+ {
201
+ "epoch": 0.2847843775427177,
202
+ "grad_norm": 2.670891046524048,
203
+ "learning_rate": 4.7289972899729e-05,
204
+ "loss": 0.0436,
205
+ "step": 700
206
+ },
207
+ {
208
+ "epoch": 0.2949552481692433,
209
+ "grad_norm": 0.5455201268196106,
210
+ "learning_rate": 4.898373983739837e-05,
211
+ "loss": 0.046,
212
+ "step": 725
213
+ },
214
+ {
215
+ "epoch": 0.3051261187957689,
216
+ "grad_norm": 0.477638304233551,
217
+ "learning_rate": 4.9924653405666065e-05,
218
+ "loss": 0.0461,
219
+ "step": 750
220
+ },
221
+ {
222
+ "epoch": 0.31529698942229456,
223
+ "grad_norm": 1.8583427667617798,
224
+ "learning_rate": 4.973628691983123e-05,
225
+ "loss": 0.046,
226
+ "step": 775
227
+ },
228
+ {
229
+ "epoch": 0.32546786004882017,
230
+ "grad_norm": 1.4675369262695312,
231
+ "learning_rate": 4.954792043399639e-05,
232
+ "loss": 0.043,
233
+ "step": 800
234
+ },
235
+ {
236
+ "epoch": 0.33563873067534583,
237
+ "grad_norm": 0.5187409520149231,
238
+ "learning_rate": 4.9359553948161545e-05,
239
+ "loss": 0.0465,
240
+ "step": 825
241
+ },
242
+ {
243
+ "epoch": 0.34580960130187144,
244
+ "grad_norm": 1.1287797689437866,
245
+ "learning_rate": 4.917118746232671e-05,
246
+ "loss": 0.0395,
247
+ "step": 850
248
+ },
249
+ {
250
+ "epoch": 0.35598047192839705,
251
+ "grad_norm": 1.6059693098068237,
252
+ "learning_rate": 4.8982820976491866e-05,
253
+ "loss": 0.0463,
254
+ "step": 875
255
+ },
256
+ {
257
+ "epoch": 0.3661513425549227,
258
+ "grad_norm": 0.5075823664665222,
259
+ "learning_rate": 4.8794454490657024e-05,
260
+ "loss": 0.0431,
261
+ "step": 900
262
+ },
263
+ {
264
+ "epoch": 0.3763222131814483,
265
+ "grad_norm": 0.4540039896965027,
266
+ "learning_rate": 4.860608800482219e-05,
267
+ "loss": 0.0444,
268
+ "step": 925
269
+ },
270
+ {
271
+ "epoch": 0.386493083807974,
272
+ "grad_norm": 0.3790999948978424,
273
+ "learning_rate": 4.8417721518987346e-05,
274
+ "loss": 0.0408,
275
+ "step": 950
276
+ },
277
+ {
278
+ "epoch": 0.3966639544344996,
279
+ "grad_norm": 0.35743555426597595,
280
+ "learning_rate": 4.82293550331525e-05,
281
+ "loss": 0.0412,
282
+ "step": 975
283
+ },
284
+ {
285
+ "epoch": 0.40683482506102525,
286
+ "grad_norm": 0.9319044351577759,
287
+ "learning_rate": 4.804098854731766e-05,
288
+ "loss": 0.0383,
289
+ "step": 1000
290
+ },
291
+ {
292
+ "epoch": 0.41700569568755086,
293
+ "grad_norm": 0.9398106336593628,
294
+ "learning_rate": 4.785262206148282e-05,
295
+ "loss": 0.0439,
296
+ "step": 1025
297
+ },
298
+ {
299
+ "epoch": 0.42717656631407647,
300
+ "grad_norm": 0.5996136665344238,
301
+ "learning_rate": 4.766425557564798e-05,
302
+ "loss": 0.0455,
303
+ "step": 1050
304
+ },
305
+ {
306
+ "epoch": 0.43734743694060213,
307
+ "grad_norm": 2.0928783416748047,
308
+ "learning_rate": 4.747588908981314e-05,
309
+ "loss": 0.0451,
310
+ "step": 1075
311
+ },
312
+ {
313
+ "epoch": 0.44751830756712774,
314
+ "grad_norm": 0.42079484462738037,
315
+ "learning_rate": 4.7287522603978304e-05,
316
+ "loss": 0.0476,
317
+ "step": 1100
318
+ },
319
+ {
320
+ "epoch": 0.4576891781936534,
321
+ "grad_norm": 1.7878057956695557,
322
+ "learning_rate": 4.709915611814346e-05,
323
+ "loss": 0.0434,
324
+ "step": 1125
325
+ },
326
+ {
327
+ "epoch": 0.467860048820179,
328
+ "grad_norm": 0.3113216459751129,
329
+ "learning_rate": 4.691078963230862e-05,
330
+ "loss": 0.042,
331
+ "step": 1150
332
+ },
333
+ {
334
+ "epoch": 0.4780309194467046,
335
+ "grad_norm": 0.319442480802536,
336
+ "learning_rate": 4.6722423146473784e-05,
337
+ "loss": 0.0409,
338
+ "step": 1175
339
+ },
340
+ {
341
+ "epoch": 0.4882017900732303,
342
+ "grad_norm": 0.9614000916481018,
343
+ "learning_rate": 4.653405666063894e-05,
344
+ "loss": 0.0391,
345
+ "step": 1200
346
+ },
347
+ {
348
+ "epoch": 0.4983726606997559,
349
+ "grad_norm": 0.6299770474433899,
350
+ "learning_rate": 4.63456901748041e-05,
351
+ "loss": 0.0414,
352
+ "step": 1225
353
+ },
354
+ {
355
+ "epoch": 0.5085435313262815,
356
+ "grad_norm": 0.8669236898422241,
357
+ "learning_rate": 4.615732368896926e-05,
358
+ "loss": 0.0388,
359
+ "step": 1250
360
+ },
361
+ {
362
+ "epoch": 0.5187144019528072,
363
+ "grad_norm": 0.6755848526954651,
364
+ "learning_rate": 4.596895720313442e-05,
365
+ "loss": 0.0421,
366
+ "step": 1275
367
+ },
368
+ {
369
+ "epoch": 0.5288852725793328,
370
+ "grad_norm": 0.5094274282455444,
371
+ "learning_rate": 4.5780590717299585e-05,
372
+ "loss": 0.0408,
373
+ "step": 1300
374
+ },
375
+ {
376
+ "epoch": 0.5390561432058584,
377
+ "grad_norm": 1.5034645795822144,
378
+ "learning_rate": 4.559222423146474e-05,
379
+ "loss": 0.0467,
380
+ "step": 1325
381
+ },
382
+ {
383
+ "epoch": 0.549227013832384,
384
+ "grad_norm": 1.6210927963256836,
385
+ "learning_rate": 4.54038577456299e-05,
386
+ "loss": 0.0443,
387
+ "step": 1350
388
+ },
389
+ {
390
+ "epoch": 0.5593978844589097,
391
+ "grad_norm": 0.4528130292892456,
392
+ "learning_rate": 4.5215491259795064e-05,
393
+ "loss": 0.041,
394
+ "step": 1375
395
+ },
396
+ {
397
+ "epoch": 0.5695687550854354,
398
+ "grad_norm": 0.6066830158233643,
399
+ "learning_rate": 4.5027124773960215e-05,
400
+ "loss": 0.0427,
401
+ "step": 1400
402
+ },
403
+ {
404
+ "epoch": 0.5797396257119609,
405
+ "grad_norm": 0.2950328588485718,
406
+ "learning_rate": 4.483875828812538e-05,
407
+ "loss": 0.0417,
408
+ "step": 1425
409
+ },
410
+ {
411
+ "epoch": 0.5899104963384866,
412
+ "grad_norm": 0.425102561712265,
413
+ "learning_rate": 4.4650391802290537e-05,
414
+ "loss": 0.0391,
415
+ "step": 1450
416
+ },
417
+ {
418
+ "epoch": 0.6000813669650122,
419
+ "grad_norm": 0.5589340329170227,
420
+ "learning_rate": 4.4462025316455694e-05,
421
+ "loss": 0.0406,
422
+ "step": 1475
423
+ },
424
+ {
425
+ "epoch": 0.6102522375915378,
426
+ "grad_norm": 3.0561866760253906,
427
+ "learning_rate": 4.427365883062086e-05,
428
+ "loss": 0.0403,
429
+ "step": 1500
430
+ },
431
+ {
432
+ "epoch": 0.6204231082180635,
433
+ "grad_norm": 0.6851157546043396,
434
+ "learning_rate": 4.4085292344786016e-05,
435
+ "loss": 0.0405,
436
+ "step": 1525
437
+ },
438
+ {
439
+ "epoch": 0.6305939788445891,
440
+ "grad_norm": 0.5816906690597534,
441
+ "learning_rate": 4.389692585895118e-05,
442
+ "loss": 0.0465,
443
+ "step": 1550
444
+ },
445
+ {
446
+ "epoch": 0.6407648494711147,
447
+ "grad_norm": 1.0224462747573853,
448
+ "learning_rate": 4.370855937311634e-05,
449
+ "loss": 0.0419,
450
+ "step": 1575
451
+ },
452
+ {
453
+ "epoch": 0.6509357200976403,
454
+ "grad_norm": 0.4752540588378906,
455
+ "learning_rate": 4.3520192887281495e-05,
456
+ "loss": 0.0405,
457
+ "step": 1600
458
+ },
459
+ {
460
+ "epoch": 0.661106590724166,
461
+ "grad_norm": 1.1999990940093994,
462
+ "learning_rate": 4.333182640144666e-05,
463
+ "loss": 0.041,
464
+ "step": 1625
465
+ },
466
+ {
467
+ "epoch": 0.6712774613506917,
468
+ "grad_norm": 0.40787383913993835,
469
+ "learning_rate": 4.314345991561182e-05,
470
+ "loss": 0.0411,
471
+ "step": 1650
472
+ },
473
+ {
474
+ "epoch": 0.6814483319772172,
475
+ "grad_norm": 0.26120448112487793,
476
+ "learning_rate": 4.2955093429776974e-05,
477
+ "loss": 0.0401,
478
+ "step": 1675
479
+ },
480
+ {
481
+ "epoch": 0.6916192026037429,
482
+ "grad_norm": 0.5405380725860596,
483
+ "learning_rate": 4.276672694394214e-05,
484
+ "loss": 0.0387,
485
+ "step": 1700
486
+ },
487
+ {
488
+ "epoch": 0.7017900732302685,
489
+ "grad_norm": 0.2800443768501282,
490
+ "learning_rate": 4.2578360458107296e-05,
491
+ "loss": 0.0411,
492
+ "step": 1725
493
+ },
494
+ {
495
+ "epoch": 0.7119609438567941,
496
+ "grad_norm": 0.5752384662628174,
497
+ "learning_rate": 4.2389993972272454e-05,
498
+ "loss": 0.0404,
499
+ "step": 1750
500
+ },
501
+ {
502
+ "epoch": 0.7221318144833198,
503
+ "grad_norm": 0.778127133846283,
504
+ "learning_rate": 4.220162748643762e-05,
505
+ "loss": 0.0428,
506
+ "step": 1775
507
+ },
508
+ {
509
+ "epoch": 0.7323026851098454,
510
+ "grad_norm": 1.7695764303207397,
511
+ "learning_rate": 4.2013261000602775e-05,
512
+ "loss": 0.0353,
513
+ "step": 1800
514
+ },
515
+ {
516
+ "epoch": 0.7424735557363711,
517
+ "grad_norm": 0.2690475285053253,
518
+ "learning_rate": 4.182489451476794e-05,
519
+ "loss": 0.0404,
520
+ "step": 1825
521
+ },
522
+ {
523
+ "epoch": 0.7526444263628966,
524
+ "grad_norm": 0.506496012210846,
525
+ "learning_rate": 4.163652802893309e-05,
526
+ "loss": 0.0402,
527
+ "step": 1850
528
+ },
529
+ {
530
+ "epoch": 0.7628152969894223,
531
+ "grad_norm": 0.7220098972320557,
532
+ "learning_rate": 4.1448161543098255e-05,
533
+ "loss": 0.0372,
534
+ "step": 1875
535
+ },
536
+ {
537
+ "epoch": 0.772986167615948,
538
+ "grad_norm": 1.1999934911727905,
539
+ "learning_rate": 4.125979505726341e-05,
540
+ "loss": 0.0429,
541
+ "step": 1900
542
+ },
543
+ {
544
+ "epoch": 0.7831570382424735,
545
+ "grad_norm": 0.5055158734321594,
546
+ "learning_rate": 4.107142857142857e-05,
547
+ "loss": 0.0367,
548
+ "step": 1925
549
+ },
550
+ {
551
+ "epoch": 0.7933279088689992,
552
+ "grad_norm": 0.5015272498130798,
553
+ "learning_rate": 4.0883062085593734e-05,
554
+ "loss": 0.0376,
555
+ "step": 1950
556
+ },
557
+ {
558
+ "epoch": 0.8034987794955248,
559
+ "grad_norm": 0.6382879018783569,
560
+ "learning_rate": 4.069469559975889e-05,
561
+ "loss": 0.0387,
562
+ "step": 1975
563
+ },
564
+ {
565
+ "epoch": 0.8136696501220505,
566
+ "grad_norm": 1.4380210638046265,
567
+ "learning_rate": 4.050632911392405e-05,
568
+ "loss": 0.0433,
569
+ "step": 2000
570
+ },
571
+ {
572
+ "epoch": 0.823840520748576,
573
+ "grad_norm": 0.46868982911109924,
574
+ "learning_rate": 4.031796262808921e-05,
575
+ "loss": 0.0409,
576
+ "step": 2025
577
+ },
578
+ {
579
+ "epoch": 0.8340113913751017,
580
+ "grad_norm": 0.47722935676574707,
581
+ "learning_rate": 4.012959614225437e-05,
582
+ "loss": 0.0367,
583
+ "step": 2050
584
+ },
585
+ {
586
+ "epoch": 0.8441822620016274,
587
+ "grad_norm": 0.66898113489151,
588
+ "learning_rate": 3.9941229656419535e-05,
589
+ "loss": 0.0426,
590
+ "step": 2075
591
+ },
592
+ {
593
+ "epoch": 0.8543531326281529,
594
+ "grad_norm": 0.640957772731781,
595
+ "learning_rate": 3.975286317058469e-05,
596
+ "loss": 0.0374,
597
+ "step": 2100
598
+ },
599
+ {
600
+ "epoch": 0.8645240032546786,
601
+ "grad_norm": 2.0676088333129883,
602
+ "learning_rate": 3.956449668474985e-05,
603
+ "loss": 0.0391,
604
+ "step": 2125
605
+ },
606
+ {
607
+ "epoch": 0.8746948738812043,
608
+ "grad_norm": 0.3028632402420044,
609
+ "learning_rate": 3.9376130198915014e-05,
610
+ "loss": 0.0379,
611
+ "step": 2150
612
+ },
613
+ {
614
+ "epoch": 0.8848657445077298,
615
+ "grad_norm": 3.03582763671875,
616
+ "learning_rate": 3.918776371308017e-05,
617
+ "loss": 0.0396,
618
+ "step": 2175
619
+ },
620
+ {
621
+ "epoch": 0.8950366151342555,
622
+ "grad_norm": 0.5792508721351624,
623
+ "learning_rate": 3.899939722724533e-05,
624
+ "loss": 0.0393,
625
+ "step": 2200
626
+ },
627
+ {
628
+ "epoch": 0.9052074857607811,
629
+ "grad_norm": 0.6225530505180359,
630
+ "learning_rate": 3.8811030741410494e-05,
631
+ "loss": 0.0413,
632
+ "step": 2225
633
+ },
634
+ {
635
+ "epoch": 0.9153783563873068,
636
+ "grad_norm": 1.2196191549301147,
637
+ "learning_rate": 3.862266425557565e-05,
638
+ "loss": 0.0411,
639
+ "step": 2250
640
+ },
641
+ {
642
+ "epoch": 0.9255492270138324,
643
+ "grad_norm": 1.1619917154312134,
644
+ "learning_rate": 3.843429776974081e-05,
645
+ "loss": 0.0372,
646
+ "step": 2275
647
+ },
648
+ {
649
+ "epoch": 0.935720097640358,
650
+ "grad_norm": 0.4266558289527893,
651
+ "learning_rate": 3.8245931283905966e-05,
652
+ "loss": 0.0382,
653
+ "step": 2300
654
+ },
655
+ {
656
+ "epoch": 0.9458909682668837,
657
+ "grad_norm": 0.2716640830039978,
658
+ "learning_rate": 3.8057564798071124e-05,
659
+ "loss": 0.0364,
660
+ "step": 2325
661
+ },
662
+ {
663
+ "epoch": 0.9560618388934092,
664
+ "grad_norm": 0.3579002618789673,
665
+ "learning_rate": 3.787673297166968e-05,
666
+ "loss": 0.0397,
667
+ "step": 2350
668
+ },
669
+ {
670
+ "epoch": 0.9662327095199349,
671
+ "grad_norm": 0.3124788999557495,
672
+ "learning_rate": 3.768836648583484e-05,
673
+ "loss": 0.0367,
674
+ "step": 2375
675
+ },
676
+ {
677
+ "epoch": 0.9764035801464606,
678
+ "grad_norm": 0.7987418174743652,
679
+ "learning_rate": 3.7500000000000003e-05,
680
+ "loss": 0.0425,
681
+ "step": 2400
682
+ },
683
+ {
684
+ "epoch": 0.9865744507729862,
685
+ "grad_norm": 0.623573899269104,
686
+ "learning_rate": 3.731163351416516e-05,
687
+ "loss": 0.0407,
688
+ "step": 2425
689
+ },
690
+ {
691
+ "epoch": 0.9967453213995118,
692
+ "grad_norm": 0.6313973665237427,
693
+ "learning_rate": 3.712326702833032e-05,
694
+ "loss": 0.0359,
695
+ "step": 2450
696
+ },
697
+ {
698
+ "epoch": 1.0,
699
+ "eval_explained_variance": 0.22635483741760254,
700
+ "eval_loss": 0.03618196025490761,
701
+ "eval_mae": 0.1520702838897705,
702
+ "eval_mse": 0.03618059679865837,
703
+ "eval_r2": 0.22514164447784424,
704
+ "eval_rmse": 0.19021197858877967,
705
+ "eval_runtime": 39.05,
706
+ "eval_samples_per_second": 503.405,
707
+ "eval_steps_per_second": 7.887,
708
+ "step": 2458
709
+ },
710
+ {
711
+ "epoch": 1.0069161920260374,
712
+ "grad_norm": 0.5591532588005066,
713
+ "learning_rate": 3.693490054249548e-05,
714
+ "loss": 0.0361,
715
+ "step": 2475
716
+ },
717
+ {
718
+ "epoch": 1.017087062652563,
719
+ "grad_norm": 0.35737472772598267,
720
+ "learning_rate": 3.674653405666064e-05,
721
+ "loss": 0.035,
722
+ "step": 2500
723
+ },
724
+ {
725
+ "epoch": 1.0272579332790888,
726
+ "grad_norm": 1.0595606565475464,
727
+ "learning_rate": 3.6558167570825805e-05,
728
+ "loss": 0.0374,
729
+ "step": 2525
730
+ },
731
+ {
732
+ "epoch": 1.0374288039056143,
733
+ "grad_norm": 0.42890357971191406,
734
+ "learning_rate": 3.636980108499096e-05,
735
+ "loss": 0.0366,
736
+ "step": 2550
737
+ },
738
+ {
739
+ "epoch": 1.0475996745321399,
740
+ "grad_norm": 0.41043972969055176,
741
+ "learning_rate": 3.618143459915612e-05,
742
+ "loss": 0.0377,
743
+ "step": 2575
744
+ },
745
+ {
746
+ "epoch": 1.0577705451586656,
747
+ "grad_norm": 0.5887218117713928,
748
+ "learning_rate": 3.5993068113321284e-05,
749
+ "loss": 0.0391,
750
+ "step": 2600
751
+ },
752
+ {
753
+ "epoch": 1.0679414157851912,
754
+ "grad_norm": 0.245023712515831,
755
+ "learning_rate": 3.5804701627486435e-05,
756
+ "loss": 0.032,
757
+ "step": 2625
758
+ },
759
+ {
760
+ "epoch": 1.0781122864117167,
761
+ "grad_norm": 0.4018308222293854,
762
+ "learning_rate": 3.56163351416516e-05,
763
+ "loss": 0.0401,
764
+ "step": 2650
765
+ },
766
+ {
767
+ "epoch": 1.0882831570382425,
768
+ "grad_norm": 1.287443995475769,
769
+ "learning_rate": 3.5427968655816756e-05,
770
+ "loss": 0.0366,
771
+ "step": 2675
772
+ },
773
+ {
774
+ "epoch": 1.098454027664768,
775
+ "grad_norm": 0.9983925223350525,
776
+ "learning_rate": 3.5239602169981914e-05,
777
+ "loss": 0.0408,
778
+ "step": 2700
779
+ },
780
+ {
781
+ "epoch": 1.1086248982912936,
782
+ "grad_norm": 1.045338749885559,
783
+ "learning_rate": 3.505123568414708e-05,
784
+ "loss": 0.0411,
785
+ "step": 2725
786
+ },
787
+ {
788
+ "epoch": 1.1187957689178194,
789
+ "grad_norm": 0.2734503746032715,
790
+ "learning_rate": 3.4862869198312236e-05,
791
+ "loss": 0.0373,
792
+ "step": 2750
793
+ },
794
+ {
795
+ "epoch": 1.128966639544345,
796
+ "grad_norm": 0.4091496169567108,
797
+ "learning_rate": 3.46745027124774e-05,
798
+ "loss": 0.0362,
799
+ "step": 2775
800
+ },
801
+ {
802
+ "epoch": 1.1391375101708707,
803
+ "grad_norm": 1.026307225227356,
804
+ "learning_rate": 3.448613622664256e-05,
805
+ "loss": 0.036,
806
+ "step": 2800
807
+ },
808
+ {
809
+ "epoch": 1.1493083807973963,
810
+ "grad_norm": 0.5424162149429321,
811
+ "learning_rate": 3.4297769740807715e-05,
812
+ "loss": 0.0362,
813
+ "step": 2825
814
+ },
815
+ {
816
+ "epoch": 1.1594792514239218,
817
+ "grad_norm": 0.4070860743522644,
818
+ "learning_rate": 3.410940325497288e-05,
819
+ "loss": 0.0338,
820
+ "step": 2850
821
+ },
822
+ {
823
+ "epoch": 1.1696501220504476,
824
+ "grad_norm": 0.861303448677063,
825
+ "learning_rate": 3.392103676913804e-05,
826
+ "loss": 0.0371,
827
+ "step": 2875
828
+ },
829
+ {
830
+ "epoch": 1.1798209926769732,
831
+ "grad_norm": 0.6860642433166504,
832
+ "learning_rate": 3.3732670283303194e-05,
833
+ "loss": 0.0369,
834
+ "step": 2900
835
+ },
836
+ {
837
+ "epoch": 1.1899918633034987,
838
+ "grad_norm": 0.6697980761528015,
839
+ "learning_rate": 3.354430379746836e-05,
840
+ "loss": 0.0379,
841
+ "step": 2925
842
+ },
843
+ {
844
+ "epoch": 1.2001627339300245,
845
+ "grad_norm": 0.6728507876396179,
846
+ "learning_rate": 3.3355937311633516e-05,
847
+ "loss": 0.038,
848
+ "step": 2950
849
+ },
850
+ {
851
+ "epoch": 1.21033360455655,
852
+ "grad_norm": 0.4306727349758148,
853
+ "learning_rate": 3.316757082579868e-05,
854
+ "loss": 0.0381,
855
+ "step": 2975
856
+ },
857
+ {
858
+ "epoch": 1.2205044751830756,
859
+ "grad_norm": 0.8999012112617493,
860
+ "learning_rate": 3.297920433996384e-05,
861
+ "loss": 0.0393,
862
+ "step": 3000
863
+ },
864
+ {
865
+ "epoch": 1.2306753458096014,
866
+ "grad_norm": 0.358962744474411,
867
+ "learning_rate": 3.2790837854128995e-05,
868
+ "loss": 0.0374,
869
+ "step": 3025
870
+ },
871
+ {
872
+ "epoch": 1.240846216436127,
873
+ "grad_norm": 0.8197824954986572,
874
+ "learning_rate": 3.260247136829416e-05,
875
+ "loss": 0.0391,
876
+ "step": 3050
877
+ },
878
+ {
879
+ "epoch": 1.2510170870626527,
880
+ "grad_norm": 0.6671149730682373,
881
+ "learning_rate": 3.241410488245931e-05,
882
+ "loss": 0.0357,
883
+ "step": 3075
884
+ },
885
+ {
886
+ "epoch": 1.2611879576891782,
887
+ "grad_norm": 0.8932905197143555,
888
+ "learning_rate": 3.2225738396624475e-05,
889
+ "loss": 0.038,
890
+ "step": 3100
891
+ },
892
+ {
893
+ "epoch": 1.2713588283157038,
894
+ "grad_norm": 0.3033260405063629,
895
+ "learning_rate": 3.203737191078963e-05,
896
+ "loss": 0.0364,
897
+ "step": 3125
898
+ },
899
+ {
900
+ "epoch": 1.2815296989422293,
901
+ "grad_norm": 0.6071414351463318,
902
+ "learning_rate": 3.184900542495479e-05,
903
+ "loss": 0.0325,
904
+ "step": 3150
905
+ },
906
+ {
907
+ "epoch": 1.2917005695687551,
908
+ "grad_norm": 0.28337907791137695,
909
+ "learning_rate": 3.1660638939119954e-05,
910
+ "loss": 0.0373,
911
+ "step": 3175
912
+ },
913
+ {
914
+ "epoch": 1.3018714401952807,
915
+ "grad_norm": 0.5393190979957581,
916
+ "learning_rate": 3.147227245328511e-05,
917
+ "loss": 0.0362,
918
+ "step": 3200
919
+ },
920
+ {
921
+ "epoch": 1.3120423108218064,
922
+ "grad_norm": 1.3214200735092163,
923
+ "learning_rate": 3.128390596745027e-05,
924
+ "loss": 0.0368,
925
+ "step": 3225
926
+ },
927
+ {
928
+ "epoch": 1.322213181448332,
929
+ "grad_norm": 0.5053825974464417,
930
+ "learning_rate": 3.109553948161543e-05,
931
+ "loss": 0.0375,
932
+ "step": 3250
933
+ },
934
+ {
935
+ "epoch": 1.3323840520748575,
936
+ "grad_norm": 0.4198523759841919,
937
+ "learning_rate": 3.090717299578059e-05,
938
+ "loss": 0.0404,
939
+ "step": 3275
940
+ },
941
+ {
942
+ "epoch": 1.342554922701383,
943
+ "grad_norm": 0.9187168478965759,
944
+ "learning_rate": 3.0718806509945755e-05,
945
+ "loss": 0.0374,
946
+ "step": 3300
947
+ },
948
+ {
949
+ "epoch": 1.3527257933279089,
950
+ "grad_norm": 0.3942495882511139,
951
+ "learning_rate": 3.053044002411091e-05,
952
+ "loss": 0.0362,
953
+ "step": 3325
954
+ },
955
+ {
956
+ "epoch": 1.3628966639544344,
957
+ "grad_norm": 0.6447917819023132,
958
+ "learning_rate": 3.0342073538276073e-05,
959
+ "loss": 0.038,
960
+ "step": 3350
961
+ },
962
+ {
963
+ "epoch": 1.3730675345809602,
964
+ "grad_norm": 0.3163827061653137,
965
+ "learning_rate": 3.015370705244123e-05,
966
+ "loss": 0.0369,
967
+ "step": 3375
968
+ },
969
+ {
970
+ "epoch": 1.3832384052074858,
971
+ "grad_norm": 0.3072253465652466,
972
+ "learning_rate": 2.9965340566606392e-05,
973
+ "loss": 0.0385,
974
+ "step": 3400
975
+ },
976
+ {
977
+ "epoch": 1.3934092758340113,
978
+ "grad_norm": 0.4808538556098938,
979
+ "learning_rate": 2.9776974080771553e-05,
980
+ "loss": 0.0378,
981
+ "step": 3425
982
+ },
983
+ {
984
+ "epoch": 1.403580146460537,
985
+ "grad_norm": 0.2448228895664215,
986
+ "learning_rate": 2.9588607594936714e-05,
987
+ "loss": 0.0337,
988
+ "step": 3450
989
+ },
990
+ {
991
+ "epoch": 1.4137510170870626,
992
+ "grad_norm": 0.42077022790908813,
993
+ "learning_rate": 2.940024110910187e-05,
994
+ "loss": 0.0359,
995
+ "step": 3475
996
+ },
997
+ {
998
+ "epoch": 1.4239218877135884,
999
+ "grad_norm": 0.48862871527671814,
1000
+ "learning_rate": 2.921187462326703e-05,
1001
+ "loss": 0.0378,
1002
+ "step": 3500
1003
+ },
1004
+ {
1005
+ "epoch": 1.434092758340114,
1006
+ "grad_norm": 0.5912586450576782,
1007
+ "learning_rate": 2.9023508137432186e-05,
1008
+ "loss": 0.0384,
1009
+ "step": 3525
1010
+ },
1011
+ {
1012
+ "epoch": 1.4442636289666395,
1013
+ "grad_norm": 0.4059402644634247,
1014
+ "learning_rate": 2.8835141651597347e-05,
1015
+ "loss": 0.0381,
1016
+ "step": 3550
1017
+ },
1018
+ {
1019
+ "epoch": 1.454434499593165,
1020
+ "grad_norm": 0.3919837176799774,
1021
+ "learning_rate": 2.8646775165762508e-05,
1022
+ "loss": 0.036,
1023
+ "step": 3575
1024
+ },
1025
+ {
1026
+ "epoch": 1.4646053702196908,
1027
+ "grad_norm": 0.2935680150985718,
1028
+ "learning_rate": 2.845840867992767e-05,
1029
+ "loss": 0.0368,
1030
+ "step": 3600
1031
+ },
1032
+ {
1033
+ "epoch": 1.4747762408462164,
1034
+ "grad_norm": 0.7148743867874146,
1035
+ "learning_rate": 2.8270042194092826e-05,
1036
+ "loss": 0.0386,
1037
+ "step": 3625
1038
+ },
1039
+ {
1040
+ "epoch": 1.4849471114727422,
1041
+ "grad_norm": 1.314514398574829,
1042
+ "learning_rate": 2.8081675708257987e-05,
1043
+ "loss": 0.0375,
1044
+ "step": 3650
1045
+ },
1046
+ {
1047
+ "epoch": 1.4951179820992677,
1048
+ "grad_norm": 1.6261988878250122,
1049
+ "learning_rate": 2.7893309222423148e-05,
1050
+ "loss": 0.0376,
1051
+ "step": 3675
1052
+ },
1053
+ {
1054
+ "epoch": 1.5052888527257933,
1055
+ "grad_norm": 1.105427861213684,
1056
+ "learning_rate": 2.770494273658831e-05,
1057
+ "loss": 0.0354,
1058
+ "step": 3700
1059
+ },
1060
+ {
1061
+ "epoch": 1.5154597233523188,
1062
+ "grad_norm": 0.5577530264854431,
1063
+ "learning_rate": 2.7516576250753466e-05,
1064
+ "loss": 0.0362,
1065
+ "step": 3725
1066
+ },
1067
+ {
1068
+ "epoch": 1.5256305939788446,
1069
+ "grad_norm": 0.49557003378868103,
1070
+ "learning_rate": 2.7328209764918627e-05,
1071
+ "loss": 0.0389,
1072
+ "step": 3750
1073
+ },
1074
+ {
1075
+ "epoch": 1.5358014646053704,
1076
+ "grad_norm": 0.8892014622688293,
1077
+ "learning_rate": 2.7139843279083788e-05,
1078
+ "loss": 0.0379,
1079
+ "step": 3775
1080
+ },
1081
+ {
1082
+ "epoch": 1.545972335231896,
1083
+ "grad_norm": 0.5090736150741577,
1084
+ "learning_rate": 2.695147679324895e-05,
1085
+ "loss": 0.0359,
1086
+ "step": 3800
1087
+ },
1088
+ {
1089
+ "epoch": 1.5561432058584215,
1090
+ "grad_norm": 0.8963241577148438,
1091
+ "learning_rate": 2.6763110307414107e-05,
1092
+ "loss": 0.0421,
1093
+ "step": 3825
1094
+ },
1095
+ {
1096
+ "epoch": 1.566314076484947,
1097
+ "grad_norm": 0.3889683485031128,
1098
+ "learning_rate": 2.6574743821579268e-05,
1099
+ "loss": 0.0362,
1100
+ "step": 3850
1101
+ },
1102
+ {
1103
+ "epoch": 1.5764849471114726,
1104
+ "grad_norm": 0.6879289150238037,
1105
+ "learning_rate": 2.638637733574443e-05,
1106
+ "loss": 0.0367,
1107
+ "step": 3875
1108
+ },
1109
+ {
1110
+ "epoch": 1.5866558177379984,
1111
+ "grad_norm": 1.1574759483337402,
1112
+ "learning_rate": 2.619801084990959e-05,
1113
+ "loss": 0.0387,
1114
+ "step": 3900
1115
+ },
1116
+ {
1117
+ "epoch": 1.5968266883645241,
1118
+ "grad_norm": 1.3032798767089844,
1119
+ "learning_rate": 2.6009644364074747e-05,
1120
+ "loss": 0.0366,
1121
+ "step": 3925
1122
+ },
1123
+ {
1124
+ "epoch": 1.6069975589910497,
1125
+ "grad_norm": 1.0193997621536255,
1126
+ "learning_rate": 2.58212778782399e-05,
1127
+ "loss": 0.0325,
1128
+ "step": 3950
1129
+ },
1130
+ {
1131
+ "epoch": 1.6171684296175752,
1132
+ "grad_norm": 1.767223834991455,
1133
+ "learning_rate": 2.5632911392405062e-05,
1134
+ "loss": 0.0393,
1135
+ "step": 3975
1136
+ },
1137
+ {
1138
+ "epoch": 1.6273393002441008,
1139
+ "grad_norm": 1.016648530960083,
1140
+ "learning_rate": 2.5444544906570223e-05,
1141
+ "loss": 0.0333,
1142
+ "step": 4000
1143
+ },
1144
+ {
1145
+ "epoch": 1.6375101708706266,
1146
+ "grad_norm": 2.0735578536987305,
1147
+ "learning_rate": 2.5256178420735384e-05,
1148
+ "loss": 0.0355,
1149
+ "step": 4025
1150
+ },
1151
+ {
1152
+ "epoch": 1.647681041497152,
1153
+ "grad_norm": 0.8982949256896973,
1154
+ "learning_rate": 2.506781193490054e-05,
1155
+ "loss": 0.0369,
1156
+ "step": 4050
1157
+ },
1158
+ {
1159
+ "epoch": 1.6578519121236779,
1160
+ "grad_norm": 0.324400395154953,
1161
+ "learning_rate": 2.4879445449065702e-05,
1162
+ "loss": 0.0358,
1163
+ "step": 4075
1164
+ },
1165
+ {
1166
+ "epoch": 1.6680227827502034,
1167
+ "grad_norm": 0.32701972126960754,
1168
+ "learning_rate": 2.4691078963230863e-05,
1169
+ "loss": 0.0336,
1170
+ "step": 4100
1171
+ },
1172
+ {
1173
+ "epoch": 1.678193653376729,
1174
+ "grad_norm": 1.151262640953064,
1175
+ "learning_rate": 2.4502712477396024e-05,
1176
+ "loss": 0.0392,
1177
+ "step": 4125
1178
+ },
1179
+ {
1180
+ "epoch": 1.6883645240032545,
1181
+ "grad_norm": 0.5716719627380371,
1182
+ "learning_rate": 2.431434599156118e-05,
1183
+ "loss": 0.0383,
1184
+ "step": 4150
1185
+ },
1186
+ {
1187
+ "epoch": 1.6985353946297803,
1188
+ "grad_norm": 0.8748169541358948,
1189
+ "learning_rate": 2.4125979505726342e-05,
1190
+ "loss": 0.0344,
1191
+ "step": 4175
1192
+ },
1193
+ {
1194
+ "epoch": 1.708706265256306,
1195
+ "grad_norm": 0.25271666049957275,
1196
+ "learning_rate": 2.3937613019891503e-05,
1197
+ "loss": 0.0351,
1198
+ "step": 4200
1199
+ },
1200
+ {
1201
+ "epoch": 1.7188771358828316,
1202
+ "grad_norm": 0.23297059535980225,
1203
+ "learning_rate": 2.3749246534056664e-05,
1204
+ "loss": 0.0337,
1205
+ "step": 4225
1206
+ },
1207
+ {
1208
+ "epoch": 1.7290480065093572,
1209
+ "grad_norm": 0.3409133851528168,
1210
+ "learning_rate": 2.356088004822182e-05,
1211
+ "loss": 0.0346,
1212
+ "step": 4250
1213
+ },
1214
+ {
1215
+ "epoch": 1.7392188771358827,
1216
+ "grad_norm": 0.822523832321167,
1217
+ "learning_rate": 2.337251356238698e-05,
1218
+ "loss": 0.0374,
1219
+ "step": 4275
1220
+ },
1221
+ {
1222
+ "epoch": 1.7493897477624083,
1223
+ "grad_norm": 1.9754129648208618,
1224
+ "learning_rate": 2.318414707655214e-05,
1225
+ "loss": 0.0409,
1226
+ "step": 4300
1227
+ },
1228
+ {
1229
+ "epoch": 1.759560618388934,
1230
+ "grad_norm": 0.49358049035072327,
1231
+ "learning_rate": 2.29957805907173e-05,
1232
+ "loss": 0.0379,
1233
+ "step": 4325
1234
+ },
1235
+ {
1236
+ "epoch": 1.7697314890154598,
1237
+ "grad_norm": 0.6075097918510437,
1238
+ "learning_rate": 2.280741410488246e-05,
1239
+ "loss": 0.0358,
1240
+ "step": 4350
1241
+ },
1242
+ {
1243
+ "epoch": 1.7799023596419854,
1244
+ "grad_norm": 0.5666526556015015,
1245
+ "learning_rate": 2.261904761904762e-05,
1246
+ "loss": 0.0337,
1247
+ "step": 4375
1248
+ },
1249
+ {
1250
+ "epoch": 1.790073230268511,
1251
+ "grad_norm": 0.7485412955284119,
1252
+ "learning_rate": 2.243068113321278e-05,
1253
+ "loss": 0.037,
1254
+ "step": 4400
1255
+ },
1256
+ {
1257
+ "epoch": 1.8002441008950365,
1258
+ "grad_norm": 0.585403323173523,
1259
+ "learning_rate": 2.224231464737794e-05,
1260
+ "loss": 0.0379,
1261
+ "step": 4425
1262
+ },
1263
+ {
1264
+ "epoch": 1.8104149715215623,
1265
+ "grad_norm": 0.7822312712669373,
1266
+ "learning_rate": 2.2053948161543102e-05,
1267
+ "loss": 0.0368,
1268
+ "step": 4450
1269
+ },
1270
+ {
1271
+ "epoch": 1.8205858421480878,
1272
+ "grad_norm": 0.3547162413597107,
1273
+ "learning_rate": 2.186558167570826e-05,
1274
+ "loss": 0.038,
1275
+ "step": 4475
1276
+ },
1277
+ {
1278
+ "epoch": 1.8307567127746136,
1279
+ "grad_norm": 0.5509994626045227,
1280
+ "learning_rate": 2.1677215189873417e-05,
1281
+ "loss": 0.0371,
1282
+ "step": 4500
1283
+ },
1284
+ {
1285
+ "epoch": 1.8409275834011392,
1286
+ "grad_norm": 1.1359673738479614,
1287
+ "learning_rate": 2.1488848704038578e-05,
1288
+ "loss": 0.0321,
1289
+ "step": 4525
1290
+ },
1291
+ {
1292
+ "epoch": 1.8510984540276647,
1293
+ "grad_norm": 0.7981705069541931,
1294
+ "learning_rate": 2.130048221820374e-05,
1295
+ "loss": 0.0369,
1296
+ "step": 4550
1297
+ },
1298
+ {
1299
+ "epoch": 1.8612693246541903,
1300
+ "grad_norm": 0.3582057058811188,
1301
+ "learning_rate": 2.11121157323689e-05,
1302
+ "loss": 0.0412,
1303
+ "step": 4575
1304
+ },
1305
+ {
1306
+ "epoch": 1.871440195280716,
1307
+ "grad_norm": 0.9928992986679077,
1308
+ "learning_rate": 2.0923749246534057e-05,
1309
+ "loss": 0.0352,
1310
+ "step": 4600
1311
+ },
1312
+ {
1313
+ "epoch": 1.8816110659072418,
1314
+ "grad_norm": 0.48575785756111145,
1315
+ "learning_rate": 2.0735382760699218e-05,
1316
+ "loss": 0.035,
1317
+ "step": 4625
1318
+ },
1319
+ {
1320
+ "epoch": 1.8917819365337674,
1321
+ "grad_norm": 0.5365208387374878,
1322
+ "learning_rate": 2.054701627486438e-05,
1323
+ "loss": 0.0379,
1324
+ "step": 4650
1325
+ },
1326
+ {
1327
+ "epoch": 1.901952807160293,
1328
+ "grad_norm": 1.141358494758606,
1329
+ "learning_rate": 2.0358649789029536e-05,
1330
+ "loss": 0.0355,
1331
+ "step": 4675
1332
+ },
1333
+ {
1334
+ "epoch": 1.9121236777868185,
1335
+ "grad_norm": 0.43180742859840393,
1336
+ "learning_rate": 2.0170283303194694e-05,
1337
+ "loss": 0.0354,
1338
+ "step": 4700
1339
+ },
1340
+ {
1341
+ "epoch": 1.922294548413344,
1342
+ "grad_norm": 0.7140740752220154,
1343
+ "learning_rate": 1.9981916817359855e-05,
1344
+ "loss": 0.0355,
1345
+ "step": 4725
1346
+ },
1347
+ {
1348
+ "epoch": 1.9324654190398698,
1349
+ "grad_norm": 0.30647122859954834,
1350
+ "learning_rate": 1.9793550331525016e-05,
1351
+ "loss": 0.0371,
1352
+ "step": 4750
1353
+ },
1354
+ {
1355
+ "epoch": 1.9426362896663956,
1356
+ "grad_norm": 0.42196792364120483,
1357
+ "learning_rate": 1.9605183845690177e-05,
1358
+ "loss": 0.0356,
1359
+ "step": 4775
1360
+ },
1361
+ {
1362
+ "epoch": 1.9528071602929211,
1363
+ "grad_norm": 0.6331903338432312,
1364
+ "learning_rate": 1.9416817359855334e-05,
1365
+ "loss": 0.0352,
1366
+ "step": 4800
1367
+ },
1368
+ {
1369
+ "epoch": 1.9629780309194467,
1370
+ "grad_norm": 0.7057808637619019,
1371
+ "learning_rate": 1.9228450874020495e-05,
1372
+ "loss": 0.0364,
1373
+ "step": 4825
1374
+ },
1375
+ {
1376
+ "epoch": 1.9731489015459722,
1377
+ "grad_norm": 0.49434205889701843,
1378
+ "learning_rate": 1.9040084388185656e-05,
1379
+ "loss": 0.0347,
1380
+ "step": 4850
1381
+ },
1382
+ {
1383
+ "epoch": 1.983319772172498,
1384
+ "grad_norm": 0.3139288127422333,
1385
+ "learning_rate": 1.8851717902350817e-05,
1386
+ "loss": 0.0364,
1387
+ "step": 4875
1388
+ },
1389
+ {
1390
+ "epoch": 1.9934906427990235,
1391
+ "grad_norm": 0.3922992944717407,
1392
+ "learning_rate": 1.8663351416515974e-05,
1393
+ "loss": 0.036,
1394
+ "step": 4900
1395
+ },
1396
+ {
1397
+ "epoch": 2.0,
1398
+ "eval_explained_variance": 0.25461888313293457,
1399
+ "eval_loss": 0.03482421860098839,
1400
+ "eval_mae": 0.14410310983657837,
1401
+ "eval_mse": 0.0348237045109272,
1402
+ "eval_r2": 0.25420135259628296,
1403
+ "eval_rmse": 0.18661110500430353,
1404
+ "eval_runtime": 39.031,
1405
+ "eval_samples_per_second": 503.652,
1406
+ "eval_steps_per_second": 7.891,
1407
+ "step": 4916
1408
+ },
1409
+ {
1410
+ "epoch": 2.0036615134255493,
1411
+ "grad_norm": 0.5134842395782471,
1412
+ "learning_rate": 1.8474984930681132e-05,
1413
+ "loss": 0.0344,
1414
+ "step": 4925
1415
+ },
1416
+ {
1417
+ "epoch": 2.013832384052075,
1418
+ "grad_norm": 0.3377295136451721,
1419
+ "learning_rate": 1.8286618444846293e-05,
1420
+ "loss": 0.0336,
1421
+ "step": 4950
1422
+ },
1423
+ {
1424
+ "epoch": 2.0240032546786004,
1425
+ "grad_norm": 0.3855837285518646,
1426
+ "learning_rate": 1.8098251959011453e-05,
1427
+ "loss": 0.0316,
1428
+ "step": 4975
1429
+ },
1430
+ {
1431
+ "epoch": 2.034174125305126,
1432
+ "grad_norm": 0.4808228313922882,
1433
+ "learning_rate": 1.7909885473176614e-05,
1434
+ "loss": 0.0347,
1435
+ "step": 5000
1436
+ },
1437
+ {
1438
+ "epoch": 2.0443449959316515,
1439
+ "grad_norm": 0.6781342029571533,
1440
+ "learning_rate": 1.7721518987341772e-05,
1441
+ "loss": 0.0366,
1442
+ "step": 5025
1443
+ },
1444
+ {
1445
+ "epoch": 2.0545158665581775,
1446
+ "grad_norm": 0.5457364320755005,
1447
+ "learning_rate": 1.7533152501506933e-05,
1448
+ "loss": 0.0326,
1449
+ "step": 5050
1450
+ },
1451
+ {
1452
+ "epoch": 2.064686737184703,
1453
+ "grad_norm": 1.1539140939712524,
1454
+ "learning_rate": 1.7344786015672094e-05,
1455
+ "loss": 0.032,
1456
+ "step": 5075
1457
+ },
1458
+ {
1459
+ "epoch": 2.0748576078112286,
1460
+ "grad_norm": 0.7635537981987,
1461
+ "learning_rate": 1.7156419529837255e-05,
1462
+ "loss": 0.0318,
1463
+ "step": 5100
1464
+ },
1465
+ {
1466
+ "epoch": 2.085028478437754,
1467
+ "grad_norm": 0.8772742748260498,
1468
+ "learning_rate": 1.6968053044002412e-05,
1469
+ "loss": 0.0337,
1470
+ "step": 5125
1471
+ },
1472
+ {
1473
+ "epoch": 2.0951993490642797,
1474
+ "grad_norm": 0.45236992835998535,
1475
+ "learning_rate": 1.677968655816757e-05,
1476
+ "loss": 0.031,
1477
+ "step": 5150
1478
+ },
1479
+ {
1480
+ "epoch": 2.1053702196908057,
1481
+ "grad_norm": 0.5050310492515564,
1482
+ "learning_rate": 1.659132007233273e-05,
1483
+ "loss": 0.031,
1484
+ "step": 5175
1485
+ },
1486
+ {
1487
+ "epoch": 2.1155410903173313,
1488
+ "grad_norm": 0.442862331867218,
1489
+ "learning_rate": 1.640295358649789e-05,
1490
+ "loss": 0.0342,
1491
+ "step": 5200
1492
+ },
1493
+ {
1494
+ "epoch": 2.125711960943857,
1495
+ "grad_norm": 0.5236470103263855,
1496
+ "learning_rate": 1.6214587100663052e-05,
1497
+ "loss": 0.0372,
1498
+ "step": 5225
1499
+ },
1500
+ {
1501
+ "epoch": 2.1358828315703824,
1502
+ "grad_norm": 0.9813937544822693,
1503
+ "learning_rate": 1.602622061482821e-05,
1504
+ "loss": 0.0326,
1505
+ "step": 5250
1506
+ },
1507
+ {
1508
+ "epoch": 2.146053702196908,
1509
+ "grad_norm": 0.349025696516037,
1510
+ "learning_rate": 1.583785412899337e-05,
1511
+ "loss": 0.0346,
1512
+ "step": 5275
1513
+ },
1514
+ {
1515
+ "epoch": 2.1562245728234335,
1516
+ "grad_norm": 0.35612091422080994,
1517
+ "learning_rate": 1.564948764315853e-05,
1518
+ "loss": 0.0342,
1519
+ "step": 5300
1520
+ },
1521
+ {
1522
+ "epoch": 2.1663954434499595,
1523
+ "grad_norm": 0.5912727117538452,
1524
+ "learning_rate": 1.5461121157323692e-05,
1525
+ "loss": 0.0324,
1526
+ "step": 5325
1527
+ },
1528
+ {
1529
+ "epoch": 2.176566314076485,
1530
+ "grad_norm": 0.2870270609855652,
1531
+ "learning_rate": 1.5272754671488847e-05,
1532
+ "loss": 0.0342,
1533
+ "step": 5350
1534
+ },
1535
+ {
1536
+ "epoch": 2.1867371847030106,
1537
+ "grad_norm": 0.3680706322193146,
1538
+ "learning_rate": 1.5084388185654007e-05,
1539
+ "loss": 0.0329,
1540
+ "step": 5375
1541
+ },
1542
+ {
1543
+ "epoch": 2.196908055329536,
1544
+ "grad_norm": 0.9814783930778503,
1545
+ "learning_rate": 1.4896021699819168e-05,
1546
+ "loss": 0.0293,
1547
+ "step": 5400
1548
+ },
1549
+ {
1550
+ "epoch": 2.2070789259560617,
1551
+ "grad_norm": 0.7239277958869934,
1552
+ "learning_rate": 1.4707655213984328e-05,
1553
+ "loss": 0.0346,
1554
+ "step": 5425
1555
+ },
1556
+ {
1557
+ "epoch": 2.2172497965825873,
1558
+ "grad_norm": 0.44417452812194824,
1559
+ "learning_rate": 1.4519288728149488e-05,
1560
+ "loss": 0.0339,
1561
+ "step": 5450
1562
+ },
1563
+ {
1564
+ "epoch": 2.2274206672091132,
1565
+ "grad_norm": 0.3636336326599121,
1566
+ "learning_rate": 1.4330922242314648e-05,
1567
+ "loss": 0.0327,
1568
+ "step": 5475
1569
+ },
1570
+ {
1571
+ "epoch": 2.237591537835639,
1572
+ "grad_norm": 0.2732349634170532,
1573
+ "learning_rate": 1.4142555756479809e-05,
1574
+ "loss": 0.032,
1575
+ "step": 5500
1576
+ },
1577
+ {
1578
+ "epoch": 2.2477624084621644,
1579
+ "grad_norm": 0.820342481136322,
1580
+ "learning_rate": 1.3954189270644968e-05,
1581
+ "loss": 0.0318,
1582
+ "step": 5525
1583
+ },
1584
+ {
1585
+ "epoch": 2.25793327908869,
1586
+ "grad_norm": 0.31075552105903625,
1587
+ "learning_rate": 1.3765822784810129e-05,
1588
+ "loss": 0.0311,
1589
+ "step": 5550
1590
+ },
1591
+ {
1592
+ "epoch": 2.2681041497152155,
1593
+ "grad_norm": 0.8737571835517883,
1594
+ "learning_rate": 1.3577456298975286e-05,
1595
+ "loss": 0.0356,
1596
+ "step": 5575
1597
+ },
1598
+ {
1599
+ "epoch": 2.2782750203417415,
1600
+ "grad_norm": 0.9981245994567871,
1601
+ "learning_rate": 1.3389089813140445e-05,
1602
+ "loss": 0.0333,
1603
+ "step": 5600
1604
+ },
1605
+ {
1606
+ "epoch": 2.288445890968267,
1607
+ "grad_norm": 0.5384612679481506,
1608
+ "learning_rate": 1.3200723327305606e-05,
1609
+ "loss": 0.0316,
1610
+ "step": 5625
1611
+ },
1612
+ {
1613
+ "epoch": 2.2986167615947926,
1614
+ "grad_norm": 0.6893337965011597,
1615
+ "learning_rate": 1.3012356841470765e-05,
1616
+ "loss": 0.0324,
1617
+ "step": 5650
1618
+ },
1619
+ {
1620
+ "epoch": 2.308787632221318,
1621
+ "grad_norm": 0.449916273355484,
1622
+ "learning_rate": 1.2823990355635926e-05,
1623
+ "loss": 0.0325,
1624
+ "step": 5675
1625
+ },
1626
+ {
1627
+ "epoch": 2.3189585028478437,
1628
+ "grad_norm": 0.38824161887168884,
1629
+ "learning_rate": 1.2635623869801086e-05,
1630
+ "loss": 0.0339,
1631
+ "step": 5700
1632
+ },
1633
+ {
1634
+ "epoch": 2.329129373474369,
1635
+ "grad_norm": 0.7458836436271667,
1636
+ "learning_rate": 1.2447257383966246e-05,
1637
+ "loss": 0.0355,
1638
+ "step": 5725
1639
+ },
1640
+ {
1641
+ "epoch": 2.339300244100895,
1642
+ "grad_norm": 0.47954612970352173,
1643
+ "learning_rate": 1.2258890898131404e-05,
1644
+ "loss": 0.0323,
1645
+ "step": 5750
1646
+ },
1647
+ {
1648
+ "epoch": 2.3494711147274208,
1649
+ "grad_norm": 0.42400848865509033,
1650
+ "learning_rate": 1.2070524412296565e-05,
1651
+ "loss": 0.0302,
1652
+ "step": 5775
1653
+ },
1654
+ {
1655
+ "epoch": 2.3596419853539463,
1656
+ "grad_norm": 1.189965009689331,
1657
+ "learning_rate": 1.1882157926461724e-05,
1658
+ "loss": 0.0338,
1659
+ "step": 5800
1660
+ },
1661
+ {
1662
+ "epoch": 2.369812855980472,
1663
+ "grad_norm": 0.5762277841567993,
1664
+ "learning_rate": 1.1693791440626885e-05,
1665
+ "loss": 0.0332,
1666
+ "step": 5825
1667
+ },
1668
+ {
1669
+ "epoch": 2.3799837266069974,
1670
+ "grad_norm": 0.5994691848754883,
1671
+ "learning_rate": 1.1505424954792044e-05,
1672
+ "loss": 0.0364,
1673
+ "step": 5850
1674
+ },
1675
+ {
1676
+ "epoch": 2.390154597233523,
1677
+ "grad_norm": 0.9533575773239136,
1678
+ "learning_rate": 1.1317058468957203e-05,
1679
+ "loss": 0.0326,
1680
+ "step": 5875
1681
+ },
1682
+ {
1683
+ "epoch": 2.400325467860049,
1684
+ "grad_norm": 0.4238649308681488,
1685
+ "learning_rate": 1.1128691983122364e-05,
1686
+ "loss": 0.034,
1687
+ "step": 5900
1688
+ },
1689
+ {
1690
+ "epoch": 2.4104963384865745,
1691
+ "grad_norm": 0.8726415038108826,
1692
+ "learning_rate": 1.0940325497287523e-05,
1693
+ "loss": 0.0327,
1694
+ "step": 5925
1695
+ },
1696
+ {
1697
+ "epoch": 2.4206672091131,
1698
+ "grad_norm": 0.5922726988792419,
1699
+ "learning_rate": 1.0751959011452683e-05,
1700
+ "loss": 0.0337,
1701
+ "step": 5950
1702
+ },
1703
+ {
1704
+ "epoch": 2.4308380797396256,
1705
+ "grad_norm": 0.3707614839076996,
1706
+ "learning_rate": 1.0563592525617842e-05,
1707
+ "loss": 0.0338,
1708
+ "step": 5975
1709
+ },
1710
+ {
1711
+ "epoch": 2.441008950366151,
1712
+ "grad_norm": 0.4853639602661133,
1713
+ "learning_rate": 1.0375226039783003e-05,
1714
+ "loss": 0.0317,
1715
+ "step": 6000
1716
+ },
1717
+ {
1718
+ "epoch": 2.451179820992677,
1719
+ "grad_norm": 0.8022235631942749,
1720
+ "learning_rate": 1.0186859553948162e-05,
1721
+ "loss": 0.032,
1722
+ "step": 6025
1723
+ },
1724
+ {
1725
+ "epoch": 2.4613506916192027,
1726
+ "grad_norm": 0.8553130030632019,
1727
+ "learning_rate": 9.998493068113323e-06,
1728
+ "loss": 0.0312,
1729
+ "step": 6050
1730
+ },
1731
+ {
1732
+ "epoch": 2.4715215622457283,
1733
+ "grad_norm": 0.4112774431705475,
1734
+ "learning_rate": 9.81012658227848e-06,
1735
+ "loss": 0.0349,
1736
+ "step": 6075
1737
+ },
1738
+ {
1739
+ "epoch": 2.481692432872254,
1740
+ "grad_norm": 0.8546609282493591,
1741
+ "learning_rate": 9.621760096443641e-06,
1742
+ "loss": 0.0332,
1743
+ "step": 6100
1744
+ },
1745
+ {
1746
+ "epoch": 2.4918633034987794,
1747
+ "grad_norm": 0.8445001840591431,
1748
+ "learning_rate": 9.4333936106088e-06,
1749
+ "loss": 0.0324,
1750
+ "step": 6125
1751
+ },
1752
+ {
1753
+ "epoch": 2.5020341741253054,
1754
+ "grad_norm": 0.4580422043800354,
1755
+ "learning_rate": 9.245027124773961e-06,
1756
+ "loss": 0.0346,
1757
+ "step": 6150
1758
+ },
1759
+ {
1760
+ "epoch": 2.5122050447518305,
1761
+ "grad_norm": 0.6121585369110107,
1762
+ "learning_rate": 9.05666063893912e-06,
1763
+ "loss": 0.0345,
1764
+ "step": 6175
1765
+ },
1766
+ {
1767
+ "epoch": 2.5223759153783565,
1768
+ "grad_norm": 0.5637044906616211,
1769
+ "learning_rate": 8.86829415310428e-06,
1770
+ "loss": 0.0315,
1771
+ "step": 6200
1772
+ },
1773
+ {
1774
+ "epoch": 2.532546786004882,
1775
+ "grad_norm": 0.6579483151435852,
1776
+ "learning_rate": 8.67992766726944e-06,
1777
+ "loss": 0.0345,
1778
+ "step": 6225
1779
+ },
1780
+ {
1781
+ "epoch": 2.5427176566314076,
1782
+ "grad_norm": 0.30682843923568726,
1783
+ "learning_rate": 8.499095840867993e-06,
1784
+ "loss": 0.0334,
1785
+ "step": 6250
1786
+ },
1787
+ {
1788
+ "epoch": 2.552888527257933,
1789
+ "grad_norm": 1.7261478900909424,
1790
+ "learning_rate": 8.310729355033153e-06,
1791
+ "loss": 0.0337,
1792
+ "step": 6275
1793
+ },
1794
+ {
1795
+ "epoch": 2.5630593978844587,
1796
+ "grad_norm": 0.7609931826591492,
1797
+ "learning_rate": 8.122362869198312e-06,
1798
+ "loss": 0.0329,
1799
+ "step": 6300
1800
+ },
1801
+ {
1802
+ "epoch": 2.5732302685109847,
1803
+ "grad_norm": 1.1947487592697144,
1804
+ "learning_rate": 7.933996383363473e-06,
1805
+ "loss": 0.0338,
1806
+ "step": 6325
1807
+ },
1808
+ {
1809
+ "epoch": 2.5834011391375102,
1810
+ "grad_norm": 0.5045105814933777,
1811
+ "learning_rate": 7.745629897528632e-06,
1812
+ "loss": 0.0336,
1813
+ "step": 6350
1814
+ },
1815
+ {
1816
+ "epoch": 2.593572009764036,
1817
+ "grad_norm": 0.8998399972915649,
1818
+ "learning_rate": 7.557263411693792e-06,
1819
+ "loss": 0.0334,
1820
+ "step": 6375
1821
+ },
1822
+ {
1823
+ "epoch": 2.6037428803905613,
1824
+ "grad_norm": 0.3800385594367981,
1825
+ "learning_rate": 7.368896925858952e-06,
1826
+ "loss": 0.0306,
1827
+ "step": 6400
1828
+ },
1829
+ {
1830
+ "epoch": 2.613913751017087,
1831
+ "grad_norm": 0.35073891282081604,
1832
+ "learning_rate": 7.180530440024111e-06,
1833
+ "loss": 0.0342,
1834
+ "step": 6425
1835
+ },
1836
+ {
1837
+ "epoch": 2.624084621643613,
1838
+ "grad_norm": 0.35614126920700073,
1839
+ "learning_rate": 6.992163954189271e-06,
1840
+ "loss": 0.0317,
1841
+ "step": 6450
1842
+ },
1843
+ {
1844
+ "epoch": 2.6342554922701384,
1845
+ "grad_norm": 1.0959842205047607,
1846
+ "learning_rate": 6.8037974683544305e-06,
1847
+ "loss": 0.0328,
1848
+ "step": 6475
1849
+ },
1850
+ {
1851
+ "epoch": 2.644426362896664,
1852
+ "grad_norm": 0.9010970592498779,
1853
+ "learning_rate": 6.6154309825195905e-06,
1854
+ "loss": 0.0364,
1855
+ "step": 6500
1856
+ },
1857
+ {
1858
+ "epoch": 2.6545972335231895,
1859
+ "grad_norm": 0.8300909996032715,
1860
+ "learning_rate": 6.42706449668475e-06,
1861
+ "loss": 0.0312,
1862
+ "step": 6525
1863
+ },
1864
+ {
1865
+ "epoch": 2.664768104149715,
1866
+ "grad_norm": 0.7244754433631897,
1867
+ "learning_rate": 6.23869801084991e-06,
1868
+ "loss": 0.0319,
1869
+ "step": 6550
1870
+ },
1871
+ {
1872
+ "epoch": 2.674938974776241,
1873
+ "grad_norm": 1.3230552673339844,
1874
+ "learning_rate": 6.05033152501507e-06,
1875
+ "loss": 0.0328,
1876
+ "step": 6575
1877
+ },
1878
+ {
1879
+ "epoch": 2.685109845402766,
1880
+ "grad_norm": 0.437537282705307,
1881
+ "learning_rate": 5.861965039180229e-06,
1882
+ "loss": 0.0325,
1883
+ "step": 6600
1884
+ },
1885
+ {
1886
+ "epoch": 2.695280716029292,
1887
+ "grad_norm": 0.4210902154445648,
1888
+ "learning_rate": 5.673598553345389e-06,
1889
+ "loss": 0.0362,
1890
+ "step": 6625
1891
+ },
1892
+ {
1893
+ "epoch": 2.7054515866558178,
1894
+ "grad_norm": 0.3914755880832672,
1895
+ "learning_rate": 5.485232067510549e-06,
1896
+ "loss": 0.0329,
1897
+ "step": 6650
1898
+ },
1899
+ {
1900
+ "epoch": 2.7156224572823433,
1901
+ "grad_norm": 0.9759465456008911,
1902
+ "learning_rate": 5.296865581675708e-06,
1903
+ "loss": 0.0343,
1904
+ "step": 6675
1905
+ },
1906
+ {
1907
+ "epoch": 2.725793327908869,
1908
+ "grad_norm": 0.34633737802505493,
1909
+ "learning_rate": 5.108499095840868e-06,
1910
+ "loss": 0.0308,
1911
+ "step": 6700
1912
+ },
1913
+ {
1914
+ "epoch": 2.7359641985353944,
1915
+ "grad_norm": 0.5408746600151062,
1916
+ "learning_rate": 4.9201326100060275e-06,
1917
+ "loss": 0.0285,
1918
+ "step": 6725
1919
+ },
1920
+ {
1921
+ "epoch": 2.7461350691619204,
1922
+ "grad_norm": 0.3921310007572174,
1923
+ "learning_rate": 4.7317661241711876e-06,
1924
+ "loss": 0.0307,
1925
+ "step": 6750
1926
+ },
1927
+ {
1928
+ "epoch": 2.756305939788446,
1929
+ "grad_norm": 0.6094385981559753,
1930
+ "learning_rate": 4.543399638336348e-06,
1931
+ "loss": 0.0303,
1932
+ "step": 6775
1933
+ },
1934
+ {
1935
+ "epoch": 2.7664768104149715,
1936
+ "grad_norm": 0.5900077819824219,
1937
+ "learning_rate": 4.355033152501508e-06,
1938
+ "loss": 0.0355,
1939
+ "step": 6800
1940
+ },
1941
+ {
1942
+ "epoch": 2.776647681041497,
1943
+ "grad_norm": 0.4339945912361145,
1944
+ "learning_rate": 4.166666666666667e-06,
1945
+ "loss": 0.0351,
1946
+ "step": 6825
1947
+ },
1948
+ {
1949
+ "epoch": 2.7868185516680226,
1950
+ "grad_norm": 0.9042001962661743,
1951
+ "learning_rate": 3.978300180831827e-06,
1952
+ "loss": 0.033,
1953
+ "step": 6850
1954
+ },
1955
+ {
1956
+ "epoch": 2.7969894222945486,
1957
+ "grad_norm": 0.5715941190719604,
1958
+ "learning_rate": 3.789933694996986e-06,
1959
+ "loss": 0.0325,
1960
+ "step": 6875
1961
+ },
1962
+ {
1963
+ "epoch": 2.807160292921074,
1964
+ "grad_norm": 0.40120917558670044,
1965
+ "learning_rate": 3.601567209162146e-06,
1966
+ "loss": 0.0324,
1967
+ "step": 6900
1968
+ },
1969
+ {
1970
+ "epoch": 2.8173311635475997,
1971
+ "grad_norm": 0.636159360408783,
1972
+ "learning_rate": 3.413200723327306e-06,
1973
+ "loss": 0.0311,
1974
+ "step": 6925
1975
+ },
1976
+ {
1977
+ "epoch": 2.8275020341741253,
1978
+ "grad_norm": 0.79677414894104,
1979
+ "learning_rate": 3.2248342374924654e-06,
1980
+ "loss": 0.0298,
1981
+ "step": 6950
1982
+ },
1983
+ {
1984
+ "epoch": 2.837672904800651,
1985
+ "grad_norm": 0.6220082640647888,
1986
+ "learning_rate": 3.036467751657625e-06,
1987
+ "loss": 0.0315,
1988
+ "step": 6975
1989
+ },
1990
+ {
1991
+ "epoch": 2.847843775427177,
1992
+ "grad_norm": 0.4538786709308624,
1993
+ "learning_rate": 2.848101265822785e-06,
1994
+ "loss": 0.032,
1995
+ "step": 7000
1996
+ },
1997
+ {
1998
+ "epoch": 2.858014646053702,
1999
+ "grad_norm": 0.44975048303604126,
2000
+ "learning_rate": 2.6597347799879447e-06,
2001
+ "loss": 0.0314,
2002
+ "step": 7025
2003
+ },
2004
+ {
2005
+ "epoch": 2.868185516680228,
2006
+ "grad_norm": 0.2438650280237198,
2007
+ "learning_rate": 2.4713682941531043e-06,
2008
+ "loss": 0.0322,
2009
+ "step": 7050
2010
+ },
2011
+ {
2012
+ "epoch": 2.8783563873067535,
2013
+ "grad_norm": 0.9189873337745667,
2014
+ "learning_rate": 2.2830018083182644e-06,
2015
+ "loss": 0.0315,
2016
+ "step": 7075
2017
+ },
2018
+ {
2019
+ "epoch": 2.888527257933279,
2020
+ "grad_norm": 0.31788191199302673,
2021
+ "learning_rate": 2.094635322483424e-06,
2022
+ "loss": 0.0287,
2023
+ "step": 7100
2024
+ },
2025
+ {
2026
+ "epoch": 2.8986981285598046,
2027
+ "grad_norm": 0.7033805847167969,
2028
+ "learning_rate": 1.9062688366485836e-06,
2029
+ "loss": 0.0331,
2030
+ "step": 7125
2031
+ },
2032
+ {
2033
+ "epoch": 2.90886899918633,
2034
+ "grad_norm": 0.3187176287174225,
2035
+ "learning_rate": 1.7179023508137434e-06,
2036
+ "loss": 0.0349,
2037
+ "step": 7150
2038
+ },
2039
+ {
2040
+ "epoch": 2.919039869812856,
2041
+ "grad_norm": 0.3502849042415619,
2042
+ "learning_rate": 1.529535864978903e-06,
2043
+ "loss": 0.0314,
2044
+ "step": 7175
2045
+ },
2046
+ {
2047
+ "epoch": 2.9292107404393817,
2048
+ "grad_norm": 0.38132113218307495,
2049
+ "learning_rate": 1.3411693791440627e-06,
2050
+ "loss": 0.0307,
2051
+ "step": 7200
2052
+ },
2053
+ {
2054
+ "epoch": 2.9393816110659072,
2055
+ "grad_norm": 0.335792601108551,
2056
+ "learning_rate": 1.1528028933092225e-06,
2057
+ "loss": 0.0329,
2058
+ "step": 7225
2059
+ },
2060
+ {
2061
+ "epoch": 2.949552481692433,
2062
+ "grad_norm": 0.43150436878204346,
2063
+ "learning_rate": 9.644364074743821e-07,
2064
+ "loss": 0.0279,
2065
+ "step": 7250
2066
+ },
2067
+ {
2068
+ "epoch": 2.9597233523189583,
2069
+ "grad_norm": 0.43568554520606995,
2070
+ "learning_rate": 7.76069921639542e-07,
2071
+ "loss": 0.0297,
2072
+ "step": 7275
2073
+ },
2074
+ {
2075
+ "epoch": 2.9698942229454843,
2076
+ "grad_norm": 0.2997362017631531,
2077
+ "learning_rate": 5.877034358047017e-07,
2078
+ "loss": 0.0315,
2079
+ "step": 7300
2080
+ },
2081
+ {
2082
+ "epoch": 2.98006509357201,
2083
+ "grad_norm": 0.555476725101471,
2084
+ "learning_rate": 3.993369499698613e-07,
2085
+ "loss": 0.0296,
2086
+ "step": 7325
2087
+ },
2088
+ {
2089
+ "epoch": 2.9902359641985354,
2090
+ "grad_norm": 0.31480032205581665,
2091
+ "learning_rate": 2.1097046413502108e-07,
2092
+ "loss": 0.0334,
2093
+ "step": 7350
2094
+ },
2095
+ {
2096
+ "epoch": 3.0,
2097
+ "eval_explained_variance": 0.273209810256958,
2098
+ "eval_loss": 0.03396870195865631,
2099
+ "eval_mae": 0.14112502336502075,
2100
+ "eval_mse": 0.03396843746304512,
2101
+ "eval_r2": 0.2725181579589844,
2102
+ "eval_rmse": 0.18430528332916862,
2103
+ "eval_runtime": 39.0131,
2104
+ "eval_samples_per_second": 503.882,
2105
+ "eval_steps_per_second": 7.895,
2106
+ "step": 7374
2107
+ }
2108
+ ],
2109
+ "logging_steps": 25,
2110
+ "max_steps": 7374,
2111
+ "num_input_tokens_seen": 0,
2112
+ "num_train_epochs": 3,
2113
+ "save_steps": 500,
2114
+ "stateful_callbacks": {
2115
+ "EarlyStoppingCallback": {
2116
+ "args": {
2117
+ "early_stopping_patience": 5,
2118
+ "early_stopping_threshold": 0.01
2119
+ },
2120
+ "attributes": {
2121
+ "early_stopping_patience_counter": 2
2122
+ }
2123
+ },
2124
+ "TrainerControl": {
2125
+ "args": {
2126
+ "should_epoch_stop": false,
2127
+ "should_evaluate": false,
2128
+ "should_log": false,
2129
+ "should_save": true,
2130
+ "should_training_stop": true
2131
+ },
2132
+ "attributes": {}
2133
+ }
2134
+ },
2135
+ "total_flos": 1.937435559513293e+16,
2136
+ "train_batch_size": 8,
2137
+ "trial_name": null,
2138
+ "trial_params": null
2139
+ }
checkpoint-7374/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48650044b1efcc5cbfbdc6ba568b695fdd8577f68a6f2cd3a98c3c5b1b5be2c4
3
+ size 5368
config.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "allenai/longformer-base-4096",
3
+ "_num_labels": 1,
4
+ "architectures": [
5
+ "LongformerForSequenceClassification"
6
+ ],
7
+ "attention_mode": "longformer",
8
+ "attention_probs_dropout_prob": 0.1,
9
+ "attention_window": [
10
+ 512,
11
+ 512,
12
+ 512,
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512,
20
+ 512,
21
+ 512
22
+ ],
23
+ "bos_token_id": 0,
24
+ "eos_token_id": 2,
25
+ "gradient_checkpointing": false,
26
+ "hidden_act": "gelu",
27
+ "hidden_dropout_prob": 0.1,
28
+ "hidden_size": 768,
29
+ "id2label": {
30
+ "0": "target"
31
+ },
32
+ "ignore_attention_mask": false,
33
+ "initializer_range": 0.02,
34
+ "intermediate_size": 3072,
35
+ "label2id": {
36
+ "target": 0
37
+ },
38
+ "layer_norm_eps": 1e-05,
39
+ "max_position_embeddings": 4098,
40
+ "model_type": "longformer",
41
+ "num_attention_heads": 12,
42
+ "num_hidden_layers": 12,
43
+ "onnx_export": false,
44
+ "pad_token_id": 1,
45
+ "problem_type": "regression",
46
+ "sep_token_id": 2,
47
+ "torch_dtype": "float32",
48
+ "transformers_version": "4.48.0",
49
+ "type_vocab_size": 1,
50
+ "vocab_size": 50265
51
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9242fc9cfb1524a85761f4f00c1b4ff8956fb34fb58246044b0f95b84eb92733
3
+ size 594675108
runs/Apr04_07-57-07_r-samtuckervegan-autotrain-advanced-3lepu8o1-e749d-y1ybo/events.out.tfevents.1743753431.r-samtuckervegan-autotrain-advanced-3lepu8o1-e749d-y1ybo.216.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4fc5f0e4a2dc0975a4ef0900d090a8393a9d009389d6edf1d5f4d125cfde19a4
3
- size 63364
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:250f9914259741f9fa3fcc5fdde59155eb9579c874d609546a17e8c87b325d55
3
+ size 69303
runs/Apr04_07-57-07_r-samtuckervegan-autotrain-advanced-3lepu8o1-e749d-y1ybo/events.out.tfevents.1743756139.r-samtuckervegan-autotrain-advanced-3lepu8o1-e749d-y1ybo.216.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2b5cf7b48ec8903d8ed75029ec22b5e1cc4c561e160df0503fdabf63c47543b
3
+ size 609
special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<pad>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "50264": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "<s>",
46
+ "clean_up_tokenization_spaces": false,
47
+ "cls_token": "<s>",
48
+ "eos_token": "</s>",
49
+ "errors": "replace",
50
+ "extra_special_tokens": {},
51
+ "mask_token": "<mask>",
52
+ "model_max_length": 1000000000000000019884624838656,
53
+ "pad_token": "<pad>",
54
+ "sep_token": "</s>",
55
+ "tokenizer_class": "LongformerTokenizer",
56
+ "trim_offsets": true,
57
+ "unk_token": "<unk>"
58
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48650044b1efcc5cbfbdc6ba568b695fdd8577f68a6f2cd3a98c3c5b1b5be2c4
3
+ size 5368
training_params.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data_path": "samtuckervegan/text_performance",
3
+ "model": "allenai/longformer-base-4096",
4
+ "lr": 5e-05,
5
+ "epochs": 3,
6
+ "max_seq_length": 128,
7
+ "batch_size": 8,
8
+ "warmup_ratio": 0.1,
9
+ "gradient_accumulation": 1,
10
+ "optimizer": "adamw_torch",
11
+ "scheduler": "linear",
12
+ "weight_decay": 0.0,
13
+ "max_grad_norm": 1.0,
14
+ "seed": 42,
15
+ "train_split": "train",
16
+ "valid_split": "test",
17
+ "text_column": "text",
18
+ "target_column": "target",
19
+ "logging_steps": -1,
20
+ "project_name": "text-performance-longformer",
21
+ "auto_find_batch_size": false,
22
+ "mixed_precision": "fp16",
23
+ "save_total_limit": 1,
24
+ "push_to_hub": true,
25
+ "eval_strategy": "epoch",
26
+ "username": "samtuckervegan",
27
+ "log": "tensorboard",
28
+ "early_stopping_patience": 5,
29
+ "early_stopping_threshold": 0.01
30
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff