MrMoeeee commited on
Commit
dd6ab4a
·
verified ·
1 Parent(s): c1c0a99

Upload logs/checkpoint-250/trainer_state.json with huggingface_hub

Browse files
logs/checkpoint-250/trainer_state.json ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 250,
3
+ "best_metric": 0.03154641017317772,
4
+ "best_model_checkpoint": "/workspace/lampAI/finetuning/outputs/lamp-qwen-1.5b-full/checkpoint-250",
5
+ "epoch": 1.7619047619047619,
6
+ "eval_steps": 50,
7
+ "global_step": 250,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.07054673721340388,
14
+ "grad_norm": 4.5,
15
+ "learning_rate": 1.267605633802817e-05,
16
+ "loss": 1.1558,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.14109347442680775,
21
+ "grad_norm": 1.1953125,
22
+ "learning_rate": 2.676056338028169e-05,
23
+ "loss": 0.4274,
24
+ "step": 20
25
+ },
26
+ {
27
+ "epoch": 0.21164021164021163,
28
+ "grad_norm": 0.302734375,
29
+ "learning_rate": 4.0845070422535214e-05,
30
+ "loss": 0.0549,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 0.2821869488536155,
35
+ "grad_norm": 1.21875,
36
+ "learning_rate": 5.492957746478874e-05,
37
+ "loss": 0.0471,
38
+ "step": 40
39
+ },
40
+ {
41
+ "epoch": 0.3527336860670194,
42
+ "grad_norm": 0.2119140625,
43
+ "learning_rate": 6.901408450704226e-05,
44
+ "loss": 0.0402,
45
+ "step": 50
46
+ },
47
+ {
48
+ "epoch": 0.3527336860670194,
49
+ "eval_loss": 0.03969533368945122,
50
+ "eval_runtime": 6.3012,
51
+ "eval_samples_per_second": 40.151,
52
+ "eval_steps_per_second": 10.157,
53
+ "step": 50
54
+ },
55
+ {
56
+ "epoch": 0.42328042328042326,
57
+ "grad_norm": 0.193359375,
58
+ "learning_rate": 8.309859154929578e-05,
59
+ "loss": 0.0377,
60
+ "step": 60
61
+ },
62
+ {
63
+ "epoch": 0.49382716049382713,
64
+ "grad_norm": 0.1787109375,
65
+ "learning_rate": 9.718309859154931e-05,
66
+ "loss": 0.0343,
67
+ "step": 70
68
+ },
69
+ {
70
+ "epoch": 0.564373897707231,
71
+ "grad_norm": 0.19921875,
72
+ "learning_rate": 0.00011126760563380282,
73
+ "loss": 0.0356,
74
+ "step": 80
75
+ },
76
+ {
77
+ "epoch": 0.6349206349206349,
78
+ "grad_norm": 3.1875,
79
+ "learning_rate": 0.00012535211267605635,
80
+ "loss": 0.0855,
81
+ "step": 90
82
+ },
83
+ {
84
+ "epoch": 0.7054673721340388,
85
+ "grad_norm": 0.66796875,
86
+ "learning_rate": 0.00013943661971830987,
87
+ "loss": 0.0868,
88
+ "step": 100
89
+ },
90
+ {
91
+ "epoch": 0.7054673721340388,
92
+ "eval_loss": 0.05627487599849701,
93
+ "eval_runtime": 6.311,
94
+ "eval_samples_per_second": 40.089,
95
+ "eval_steps_per_second": 10.141,
96
+ "step": 100
97
+ },
98
+ {
99
+ "epoch": 0.7760141093474426,
100
+ "grad_norm": 0.8046875,
101
+ "learning_rate": 0.00015352112676056339,
102
+ "loss": 0.0538,
103
+ "step": 110
104
+ },
105
+ {
106
+ "epoch": 0.8465608465608465,
107
+ "grad_norm": 0.5234375,
108
+ "learning_rate": 0.0001676056338028169,
109
+ "loss": 0.0561,
110
+ "step": 120
111
+ },
112
+ {
113
+ "epoch": 0.9171075837742504,
114
+ "grad_norm": 1.8828125,
115
+ "learning_rate": 0.00018169014084507045,
116
+ "loss": 0.0641,
117
+ "step": 130
118
+ },
119
+ {
120
+ "epoch": 0.9876543209876543,
121
+ "grad_norm": 37.0,
122
+ "learning_rate": 0.00019577464788732396,
123
+ "loss": 0.0863,
124
+ "step": 140
125
+ },
126
+ {
127
+ "epoch": 1.056437389770723,
128
+ "grad_norm": 1.0546875,
129
+ "learning_rate": 0.00019999667815369528,
130
+ "loss": 0.3147,
131
+ "step": 150
132
+ },
133
+ {
134
+ "epoch": 1.056437389770723,
135
+ "eval_loss": 0.10409189015626907,
136
+ "eval_runtime": 6.0488,
137
+ "eval_samples_per_second": 41.826,
138
+ "eval_steps_per_second": 10.581,
139
+ "step": 150
140
+ },
141
+ {
142
+ "epoch": 1.126984126984127,
143
+ "grad_norm": 0.8671875,
144
+ "learning_rate": 0.00019998040841735952,
145
+ "loss": 0.0868,
146
+ "step": 160
147
+ },
148
+ {
149
+ "epoch": 1.1975308641975309,
150
+ "grad_norm": 0.2109375,
151
+ "learning_rate": 0.00019995058285912832,
152
+ "loss": 0.0527,
153
+ "step": 170
154
+ },
155
+ {
156
+ "epoch": 1.2680776014109347,
157
+ "grad_norm": 0.1435546875,
158
+ "learning_rate": 0.00019990720552289347,
159
+ "loss": 0.0418,
160
+ "step": 180
161
+ },
162
+ {
163
+ "epoch": 1.3386243386243386,
164
+ "grad_norm": 0.09912109375,
165
+ "learning_rate": 0.00019985028228996173,
166
+ "loss": 0.0328,
167
+ "step": 190
168
+ },
169
+ {
170
+ "epoch": 1.4091710758377425,
171
+ "grad_norm": 0.12890625,
172
+ "learning_rate": 0.00019977982087825713,
173
+ "loss": 0.0389,
174
+ "step": 200
175
+ },
176
+ {
177
+ "epoch": 1.4091710758377425,
178
+ "eval_loss": 0.03491974622011185,
179
+ "eval_runtime": 6.042,
180
+ "eval_samples_per_second": 41.874,
181
+ "eval_steps_per_second": 10.593,
182
+ "step": 200
183
+ },
184
+ {
185
+ "epoch": 1.4797178130511464,
186
+ "grad_norm": 0.10498046875,
187
+ "learning_rate": 0.00019969583084127485,
188
+ "loss": 0.0338,
189
+ "step": 210
190
+ },
191
+ {
192
+ "epoch": 1.5502645502645502,
193
+ "grad_norm": 0.0859375,
194
+ "learning_rate": 0.00019959832356678583,
195
+ "loss": 0.033,
196
+ "step": 220
197
+ },
198
+ {
199
+ "epoch": 1.620811287477954,
200
+ "grad_norm": 0.12255859375,
201
+ "learning_rate": 0.00019948731227529258,
202
+ "loss": 0.0313,
203
+ "step": 230
204
+ },
205
+ {
206
+ "epoch": 1.691358024691358,
207
+ "grad_norm": 0.10400390625,
208
+ "learning_rate": 0.00019936281201823688,
209
+ "loss": 0.0288,
210
+ "step": 240
211
+ },
212
+ {
213
+ "epoch": 1.7619047619047619,
214
+ "grad_norm": 0.1396484375,
215
+ "learning_rate": 0.00019922483967595893,
216
+ "loss": 0.0322,
217
+ "step": 250
218
+ },
219
+ {
220
+ "epoch": 1.7619047619047619,
221
+ "eval_loss": 0.03154641017317772,
222
+ "eval_runtime": 6.0077,
223
+ "eval_samples_per_second": 42.113,
224
+ "eval_steps_per_second": 10.653,
225
+ "step": 250
226
+ }
227
+ ],
228
+ "logging_steps": 10,
229
+ "max_steps": 2840,
230
+ "num_input_tokens_seen": 0,
231
+ "num_train_epochs": 20,
232
+ "save_steps": 50,
233
+ "stateful_callbacks": {
234
+ "EarlyStoppingCallback": {
235
+ "args": {
236
+ "early_stopping_patience": 5,
237
+ "early_stopping_threshold": 0.0
238
+ },
239
+ "attributes": {
240
+ "early_stopping_patience_counter": 0
241
+ }
242
+ },
243
+ "TrainerControl": {
244
+ "args": {
245
+ "should_epoch_stop": false,
246
+ "should_evaluate": false,
247
+ "should_log": false,
248
+ "should_save": true,
249
+ "should_training_stop": false
250
+ },
251
+ "attributes": {}
252
+ }
253
+ },
254
+ "total_flos": 5.77903606608937e+16,
255
+ "train_batch_size": 4,
256
+ "trial_name": null,
257
+ "trial_params": null
258
+ }