mtzig commited on
Commit
b79a0ee
·
verified ·
1 Parent(s): 58790f0

End of training: push final checkpoint

Browse files
Files changed (4) hide show
  1. README.md +1 -1
  2. all_results.json +15 -0
  3. train_results.json +15 -0
  4. trainer_state.json +203 -0
README.md CHANGED
@@ -14,7 +14,7 @@ should probably proofread and complete it, then remove this comment. -->
14
 
15
  This model is a fine-tuned version of [](https://huggingface.co/) on the None dataset.
16
  It achieves the following results on the evaluation set:
17
- - Loss: 11.9038
18
 
19
  ## Model description
20
 
 
14
 
15
  This model is a fine-tuned version of [](https://huggingface.co/) on the None dataset.
16
  It achieves the following results on the evaluation set:
17
+ - Loss: 11.9037
18
 
19
  ## Model description
20
 
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9922480620155039,
3
+ "eval_epoch": 0.9922480620155039,
4
+ "eval_eval_loss": 11.903702735900879,
5
+ "eval_eval_runtime": 0.5035,
6
+ "eval_eval_samples_per_second": 9.931,
7
+ "eval_eval_steps_per_second": 3.972,
8
+ "eval_perplexity": 147812.92543935214,
9
+ "total_flos": 26286684635136.0,
10
+ "train_loss": 95.25646018981934,
11
+ "train_runtime": 18.4927,
12
+ "train_samples": 515,
13
+ "train_samples_per_second": 27.849,
14
+ "train_steps_per_second": 0.865
15
+ }
train_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9922480620155039,
3
+ "eval_epoch": 0.9922480620155039,
4
+ "eval_eval_loss": 11.903702735900879,
5
+ "eval_eval_runtime": 0.5035,
6
+ "eval_eval_samples_per_second": 9.931,
7
+ "eval_eval_steps_per_second": 3.972,
8
+ "eval_perplexity": 147812.92543935214,
9
+ "total_flos": 26286684635136.0,
10
+ "train_loss": 95.25646018981934,
11
+ "train_runtime": 18.4927,
12
+ "train_samples": 515,
13
+ "train_samples_per_second": 27.849,
14
+ "train_steps_per_second": 0.865
15
+ }
trainer_state.json ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.9922480620155039,
6
+ "eval_steps": 3,
7
+ "global_step": 16,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.06201550387596899,
14
+ "grad_norm": 1.324246883392334,
15
+ "learning_rate": 0.0,
16
+ "loss": 95.471,
17
+ "step": 1
18
+ },
19
+ {
20
+ "epoch": 0.12403100775193798,
21
+ "grad_norm": 1.289596676826477,
22
+ "learning_rate": 0.0002,
23
+ "loss": 95.437,
24
+ "step": 2
25
+ },
26
+ {
27
+ "epoch": 0.18604651162790697,
28
+ "grad_norm": 1.3081743717193604,
29
+ "learning_rate": 0.00019781476007338058,
30
+ "loss": 95.3922,
31
+ "step": 3
32
+ },
33
+ {
34
+ "epoch": 0.18604651162790697,
35
+ "eval_loss": 11.929765701293945,
36
+ "eval_runtime": 0.1016,
37
+ "eval_samples_per_second": 49.2,
38
+ "eval_steps_per_second": 19.68,
39
+ "step": 3
40
+ },
41
+ {
42
+ "epoch": 0.24806201550387597,
43
+ "grad_norm": 1.3165432214736938,
44
+ "learning_rate": 0.0001913545457642601,
45
+ "loss": 95.3637,
46
+ "step": 4
47
+ },
48
+ {
49
+ "epoch": 0.31007751937984496,
50
+ "grad_norm": 1.312530755996704,
51
+ "learning_rate": 0.00018090169943749476,
52
+ "loss": 95.3947,
53
+ "step": 5
54
+ },
55
+ {
56
+ "epoch": 0.37209302325581395,
57
+ "grad_norm": 1.3151068687438965,
58
+ "learning_rate": 0.00016691306063588583,
59
+ "loss": 95.3294,
60
+ "step": 6
61
+ },
62
+ {
63
+ "epoch": 0.37209302325581395,
64
+ "eval_loss": 11.918438911437988,
65
+ "eval_runtime": 0.0905,
66
+ "eval_samples_per_second": 55.233,
67
+ "eval_steps_per_second": 22.093,
68
+ "step": 6
69
+ },
70
+ {
71
+ "epoch": 0.43410852713178294,
72
+ "grad_norm": 1.3396779298782349,
73
+ "learning_rate": 0.00015000000000000001,
74
+ "loss": 95.2012,
75
+ "step": 7
76
+ },
77
+ {
78
+ "epoch": 0.49612403100775193,
79
+ "grad_norm": 1.3890634775161743,
80
+ "learning_rate": 0.00013090169943749476,
81
+ "loss": 95.2384,
82
+ "step": 8
83
+ },
84
+ {
85
+ "epoch": 0.5581395348837209,
86
+ "grad_norm": 1.3167331218719482,
87
+ "learning_rate": 0.00011045284632676536,
88
+ "loss": 95.1952,
89
+ "step": 9
90
+ },
91
+ {
92
+ "epoch": 0.5581395348837209,
93
+ "eval_loss": 11.909274101257324,
94
+ "eval_runtime": 0.083,
95
+ "eval_samples_per_second": 60.261,
96
+ "eval_steps_per_second": 24.104,
97
+ "step": 9
98
+ },
99
+ {
100
+ "epoch": 0.6201550387596899,
101
+ "grad_norm": 1.3647342920303345,
102
+ "learning_rate": 8.954715367323468e-05,
103
+ "loss": 95.1965,
104
+ "step": 10
105
+ },
106
+ {
107
+ "epoch": 0.6821705426356589,
108
+ "grad_norm": 1.311335563659668,
109
+ "learning_rate": 6.909830056250527e-05,
110
+ "loss": 95.1483,
111
+ "step": 11
112
+ },
113
+ {
114
+ "epoch": 0.7441860465116279,
115
+ "grad_norm": 1.2832609415054321,
116
+ "learning_rate": 5.000000000000002e-05,
117
+ "loss": 95.1862,
118
+ "step": 12
119
+ },
120
+ {
121
+ "epoch": 0.7441860465116279,
122
+ "eval_loss": 11.905265808105469,
123
+ "eval_runtime": 0.0675,
124
+ "eval_samples_per_second": 74.071,
125
+ "eval_steps_per_second": 29.628,
126
+ "step": 12
127
+ },
128
+ {
129
+ "epoch": 0.8062015503875969,
130
+ "grad_norm": 1.3671646118164062,
131
+ "learning_rate": 3.308693936411421e-05,
132
+ "loss": 95.1522,
133
+ "step": 13
134
+ },
135
+ {
136
+ "epoch": 0.8682170542635659,
137
+ "grad_norm": 1.2626255750656128,
138
+ "learning_rate": 1.9098300562505266e-05,
139
+ "loss": 95.1792,
140
+ "step": 14
141
+ },
142
+ {
143
+ "epoch": 0.9302325581395349,
144
+ "grad_norm": 1.3233616352081299,
145
+ "learning_rate": 8.645454235739903e-06,
146
+ "loss": 95.0949,
147
+ "step": 15
148
+ },
149
+ {
150
+ "epoch": 0.9302325581395349,
151
+ "eval_loss": 11.903751373291016,
152
+ "eval_runtime": 0.0733,
153
+ "eval_samples_per_second": 68.257,
154
+ "eval_steps_per_second": 27.303,
155
+ "step": 15
156
+ },
157
+ {
158
+ "epoch": 0.9922480620155039,
159
+ "grad_norm": 1.3294514417648315,
160
+ "learning_rate": 2.1852399266194314e-06,
161
+ "loss": 95.1233,
162
+ "step": 16
163
+ },
164
+ {
165
+ "epoch": 0.9922480620155039,
166
+ "step": 16,
167
+ "total_flos": 26286684635136.0,
168
+ "train_loss": 95.25646018981934,
169
+ "train_runtime": 18.4927,
170
+ "train_samples_per_second": 27.849,
171
+ "train_steps_per_second": 0.865
172
+ },
173
+ {
174
+ "epoch": 0.9922480620155039,
175
+ "eval_loss": 11.903702735900879,
176
+ "eval_runtime": 0.5035,
177
+ "eval_samples_per_second": 9.931,
178
+ "eval_steps_per_second": 3.972,
179
+ "step": 16
180
+ }
181
+ ],
182
+ "logging_steps": 1,
183
+ "max_steps": 16,
184
+ "num_input_tokens_seen": 0,
185
+ "num_train_epochs": 1,
186
+ "save_steps": 2000,
187
+ "stateful_callbacks": {
188
+ "TrainerControl": {
189
+ "args": {
190
+ "should_epoch_stop": false,
191
+ "should_evaluate": false,
192
+ "should_log": false,
193
+ "should_save": true,
194
+ "should_training_stop": true
195
+ },
196
+ "attributes": {}
197
+ }
198
+ },
199
+ "total_flos": 26286684635136.0,
200
+ "train_batch_size": 4,
201
+ "trial_name": null,
202
+ "trial_params": null
203
+ }