sinem02 commited on
Commit
a2b62cf
·
verified ·
1 Parent(s): 63f37e7

Upload folder using huggingface_hub

Browse files
logs/test_checkpoint_results.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "checkpoint": "qwen25_coder_15b_l4/checkpoint-100",
4
+ "step": 100,
5
+ "test_loss": 0.8084650635719299
6
+ },
7
+ {
8
+ "checkpoint": "qwen25_coder_15b_l4/checkpoint-200",
9
+ "step": 200,
10
+ "test_loss": 0.6265722513198853
11
+ },
12
+ {
13
+ "checkpoint": "qwen25_coder_15b_l4/checkpoint-300",
14
+ "step": 300,
15
+ "test_loss": 0.44794780015945435
16
+ },
17
+ {
18
+ "checkpoint": "qwen25_coder_15b_l4/checkpoint-400",
19
+ "step": 400,
20
+ "test_loss": 0.3127075731754303
21
+ },
22
+ {
23
+ "checkpoint": "qwen25_coder_15b_l4/checkpoint-500",
24
+ "step": 500,
25
+ "test_loss": 0.21881423890590668
26
+ },
27
+ {
28
+ "checkpoint": "qwen25_coder_15b_l4/checkpoint-600",
29
+ "step": 600,
30
+ "test_loss": 0.1875201165676117
31
+ },
32
+ {
33
+ "checkpoint": "qwen25_coder_15b_l4/checkpoint-700",
34
+ "step": 700,
35
+ "test_loss": 0.1734929233789444
36
+ },
37
+ {
38
+ "checkpoint": "qwen25_coder_15b_l4/checkpoint-750",
39
+ "step": 750,
40
+ "test_loss": 0.17274102568626404
41
+ }
42
+ ]
logs/training_validation_logs.json ADDED
@@ -0,0 +1,317 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "epoch": 0.08,
4
+ "grad_norm": 0.28229808807373047,
5
+ "learning_rate": 0.0003304347826086957,
6
+ "loss": 1.0758,
7
+ "step": 20
8
+ },
9
+ {
10
+ "epoch": 0.16,
11
+ "grad_norm": 0.3910837173461914,
12
+ "learning_rate": 0.0003995221430894122,
13
+ "loss": 0.9148,
14
+ "step": 40
15
+ },
16
+ {
17
+ "epoch": 0.24,
18
+ "grad_norm": 0.37663960456848145,
19
+ "learning_rate": 0.00039758476229578745,
20
+ "loss": 0.8888,
21
+ "step": 60
22
+ },
23
+ {
24
+ "epoch": 0.32,
25
+ "grad_norm": 0.2441498339176178,
26
+ "learning_rate": 0.0003941724426452488,
27
+ "loss": 0.8392,
28
+ "step": 80
29
+ },
30
+ {
31
+ "epoch": 0.4,
32
+ "grad_norm": 0.32805338501930237,
33
+ "learning_rate": 0.0003893106565618147,
34
+ "loss": 0.8178,
35
+ "step": 100
36
+ },
37
+ {
38
+ "epoch": 0.4,
39
+ "eval_loss": 0.8086594939231873,
40
+ "eval_runtime": 19.7643,
41
+ "eval_samples_per_second": 25.298,
42
+ "eval_steps_per_second": 3.188,
43
+ "step": 100
44
+ },
45
+ {
46
+ "epoch": 0.48,
47
+ "grad_norm": 0.32320863008499146,
48
+ "learning_rate": 0.0003830356965061241,
49
+ "loss": 0.7901,
50
+ "step": 120
51
+ },
52
+ {
53
+ "epoch": 0.56,
54
+ "grad_norm": 0.35026517510414124,
55
+ "learning_rate": 0.0003753944040579839,
56
+ "loss": 0.7661,
57
+ "step": 140
58
+ },
59
+ {
60
+ "epoch": 0.64,
61
+ "grad_norm": 0.3722197115421295,
62
+ "learning_rate": 0.00036644382025141837,
63
+ "loss": 0.7126,
64
+ "step": 160
65
+ },
66
+ {
67
+ "epoch": 0.72,
68
+ "grad_norm": 0.4616721570491791,
69
+ "learning_rate": 0.0003562507597724135,
70
+ "loss": 0.6517,
71
+ "step": 180
72
+ },
73
+ {
74
+ "epoch": 0.8,
75
+ "grad_norm": 0.40086525678634644,
76
+ "learning_rate": 0.0003448913121979015,
77
+ "loss": 0.6215,
78
+ "step": 200
79
+ },
80
+ {
81
+ "epoch": 0.8,
82
+ "eval_loss": 0.6392109394073486,
83
+ "eval_runtime": 19.766,
84
+ "eval_samples_per_second": 25.296,
85
+ "eval_steps_per_second": 3.187,
86
+ "step": 200
87
+ },
88
+ {
89
+ "epoch": 0.88,
90
+ "grad_norm": 0.4292043447494507,
91
+ "learning_rate": 0.00033245027399915895,
92
+ "loss": 0.5898,
93
+ "step": 220
94
+ },
95
+ {
96
+ "epoch": 0.96,
97
+ "grad_norm": 0.5536438226699829,
98
+ "learning_rate": 0.0003190205155496219,
99
+ "loss": 0.5709,
100
+ "step": 240
101
+ },
102
+ {
103
+ "epoch": 1.04,
104
+ "grad_norm": 0.6616698503494263,
105
+ "learning_rate": 0.00030470228786230405,
106
+ "loss": 0.4551,
107
+ "step": 260
108
+ },
109
+ {
110
+ "epoch": 1.12,
111
+ "grad_norm": 0.5763731598854065,
112
+ "learning_rate": 0.0002896024742319127,
113
+ "loss": 0.3672,
114
+ "step": 280
115
+ },
116
+ {
117
+ "epoch": 1.2,
118
+ "grad_norm": 0.5821401476860046,
119
+ "learning_rate": 0.0002738337923680367,
120
+ "loss": 0.3648,
121
+ "step": 300
122
+ },
123
+ {
124
+ "epoch": 1.2,
125
+ "eval_loss": 0.4687094986438751,
126
+ "eval_runtime": 19.7698,
127
+ "eval_samples_per_second": 25.291,
128
+ "eval_steps_per_second": 3.187,
129
+ "step": 300
130
+ },
131
+ {
132
+ "epoch": 1.28,
133
+ "grad_norm": 0.47784799337387085,
134
+ "learning_rate": 0.00025751395297535327,
135
+ "loss": 0.3424,
136
+ "step": 320
137
+ },
138
+ {
139
+ "epoch": 1.3599999999999999,
140
+ "grad_norm": 0.5968295931816101,
141
+ "learning_rate": 0.00024076478106192076,
142
+ "loss": 0.3253,
143
+ "step": 340
144
+ },
145
+ {
146
+ "epoch": 1.44,
147
+ "grad_norm": 0.5316683053970337,
148
+ "learning_rate": 0.00022371130653484945,
149
+ "loss": 0.2952,
150
+ "step": 360
151
+ },
152
+ {
153
+ "epoch": 1.52,
154
+ "grad_norm": 0.4502660930156708,
155
+ "learning_rate": 0.0002064808308719107,
156
+ "loss": 0.2728,
157
+ "step": 380
158
+ },
159
+ {
160
+ "epoch": 1.6,
161
+ "grad_norm": 0.512885570526123,
162
+ "learning_rate": 0.00018920197683623203,
163
+ "loss": 0.2615,
164
+ "step": 400
165
+ },
166
+ {
167
+ "epoch": 1.6,
168
+ "eval_loss": 0.3294866383075714,
169
+ "eval_runtime": 19.7741,
170
+ "eval_samples_per_second": 25.286,
171
+ "eval_steps_per_second": 3.186,
172
+ "step": 400
173
+ },
174
+ {
175
+ "epoch": 1.6800000000000002,
176
+ "grad_norm": 0.510636568069458,
177
+ "learning_rate": 0.00017200372832780684,
178
+ "loss": 0.2468,
179
+ "step": 420
180
+ },
181
+ {
182
+ "epoch": 1.76,
183
+ "grad_norm": 0.38415294885635376,
184
+ "learning_rate": 0.00015501446753917467,
185
+ "loss": 0.2153,
186
+ "step": 440
187
+ },
188
+ {
189
+ "epoch": 1.8399999999999999,
190
+ "grad_norm": 0.4481910765171051,
191
+ "learning_rate": 0.00013836101660275217,
192
+ "loss": 0.1996,
193
+ "step": 460
194
+ },
195
+ {
196
+ "epoch": 1.92,
197
+ "grad_norm": 0.516516387462616,
198
+ "learning_rate": 0.000122167690883765,
199
+ "loss": 0.1803,
200
+ "step": 480
201
+ },
202
+ {
203
+ "epoch": 2.0,
204
+ "grad_norm": 0.5795238018035889,
205
+ "learning_rate": 0.00010655537098579868,
206
+ "loss": 0.1915,
207
+ "step": 500
208
+ },
209
+ {
210
+ "epoch": 2.0,
211
+ "eval_loss": 0.2246081531047821,
212
+ "eval_runtime": 19.7628,
213
+ "eval_samples_per_second": 25.3,
214
+ "eval_steps_per_second": 3.188,
215
+ "step": 500
216
+ },
217
+ {
218
+ "epoch": 2.08,
219
+ "grad_norm": 0.46038225293159485,
220
+ "learning_rate": 9.164060039629896e-05,
221
+ "loss": 0.1179,
222
+ "step": 520
223
+ },
224
+ {
225
+ "epoch": 2.16,
226
+ "grad_norm": 0.43895432353019714,
227
+ "learning_rate": 7.753471550795519e-05,
228
+ "loss": 0.1153,
229
+ "step": 540
230
+ },
231
+ {
232
+ "epoch": 2.24,
233
+ "grad_norm": 0.7098507285118103,
234
+ "learning_rate": 6.434301451021892e-05,
235
+ "loss": 0.1261,
236
+ "step": 560
237
+ },
238
+ {
239
+ "epoch": 2.32,
240
+ "grad_norm": 0.3989202082157135,
241
+ "learning_rate": 5.216397135505024e-05,
242
+ "loss": 0.1121,
243
+ "step": 580
244
+ },
245
+ {
246
+ "epoch": 2.4,
247
+ "grad_norm": 0.525729775428772,
248
+ "learning_rate": 4.108850066451255e-05,
249
+ "loss": 0.1186,
250
+ "step": 600
251
+ },
252
+ {
253
+ "epoch": 2.4,
254
+ "eval_loss": 0.19236330687999725,
255
+ "eval_runtime": 19.7599,
256
+ "eval_samples_per_second": 25.304,
257
+ "eval_steps_per_second": 3.188,
258
+ "step": 600
259
+ },
260
+ {
261
+ "epoch": 2.48,
262
+ "grad_norm": 0.33528250455856323,
263
+ "learning_rate": 3.1199279067563706e-05,
264
+ "loss": 0.1075,
265
+ "step": 620
266
+ },
267
+ {
268
+ "epoch": 2.56,
269
+ "grad_norm": 0.35482296347618103,
270
+ "learning_rate": 2.2570128032157568e-05,
271
+ "loss": 0.1106,
272
+ "step": 640
273
+ },
274
+ {
275
+ "epoch": 2.64,
276
+ "grad_norm": 0.2427404522895813,
277
+ "learning_rate": 1.526546279971466e-05,
278
+ "loss": 0.0932,
279
+ "step": 660
280
+ },
281
+ {
282
+ "epoch": 2.7199999999999998,
283
+ "grad_norm": 0.27014562487602234,
284
+ "learning_rate": 9.339811535579768e-06,
285
+ "loss": 0.1005,
286
+ "step": 680
287
+ },
288
+ {
289
+ "epoch": 2.8,
290
+ "grad_norm": 0.3576093018054962,
291
+ "learning_rate": 4.837408284931444e-06,
292
+ "loss": 0.0948,
293
+ "step": 700
294
+ },
295
+ {
296
+ "epoch": 2.8,
297
+ "eval_loss": 0.17951039969921112,
298
+ "eval_runtime": 19.7723,
299
+ "eval_samples_per_second": 25.288,
300
+ "eval_steps_per_second": 3.186,
301
+ "step": 700
302
+ },
303
+ {
304
+ "epoch": 2.88,
305
+ "grad_norm": 0.3262103199958801,
306
+ "learning_rate": 1.7918627726630777e-06,
307
+ "loss": 0.0939,
308
+ "step": 720
309
+ },
310
+ {
311
+ "epoch": 2.96,
312
+ "grad_norm": 0.2655605971813202,
313
+ "learning_rate": 2.259095121265542e-07,
314
+ "loss": 0.0953,
315
+ "step": 740
316
+ }
317
+ ]