kevint00 commited on
Commit
6302233
·
verified ·
1 Parent(s): 984b08b

Upload sweep/baseline_40m/result.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. sweep/baseline_40m/result.json +310 -310
sweep/baseline_40m/result.json CHANGED
@@ -4,11 +4,11 @@
4
  "model": "baseline",
5
  "seed": 42,
6
  "n_params": 59294720,
7
- "final_val_loss": 4.262436828613281,
8
- "final_ppl": 70.9827456585148,
9
  "total_steps": 3000,
10
- "time_hours": 0.241436600751347,
11
- "tokens_per_sec": 452403.1001379035,
12
  "config": {
13
  "n_layers": 8,
14
  "n_heads": 8,
@@ -27,314 +27,314 @@
27
  },
28
  "history": {
29
  "train_loss": [
30
- 10.927788162231446,
31
- 10.82743148803711,
32
- 10.613127279281617,
33
- 10.331812953948974,
34
- 10.11144700050354,
35
- 9.958681631088258,
36
- 9.839975261688233,
37
- 9.718097257614136,
38
- 9.571317529678344,
39
- 9.415836143493653,
40
- 9.226445579528809,
41
- 9.03026885986328,
42
- 8.795363330841065,
43
- 8.579822635650634,
44
- 8.38386902809143,
45
- 8.192279577255249,
46
- 8.020373725891114,
47
- 7.859238481521606,
48
- 7.710979557037353,
49
- 7.563355231285096,
50
- 7.478874373435974,
51
- 7.356767702102661,
52
- 7.285527491569519,
53
- 7.188714480400085,
54
- 7.126113963127136,
55
- 7.059055685997009,
56
- 7.014069247245788,
57
- 6.950145268440247,
58
- 6.877819466590881,
59
- 6.82345552444458,
60
- 6.7744334697723385,
61
- 6.685016441345215,
62
- 6.668521618843078,
63
- 6.605617356300354,
64
- 6.558784747123719,
65
- 6.527754640579223,
66
- 6.50184736251831,
67
- 6.452904272079468,
68
- 6.405883169174194,
69
- 6.38767638206482,
70
- 6.348726224899292,
71
- 6.335273432731628,
72
- 6.292200779914856,
73
- 6.254116201400757,
74
- 6.250170803070068,
75
- 6.21578209400177,
76
- 6.204549193382263,
77
- 6.134960770606995,
78
- 6.1193499088287355,
79
- 6.090740776062011,
80
- 6.069341015815735,
81
- 6.026736211776734,
82
- 6.03068597316742,
83
- 6.00741446018219,
84
- 5.981505489349365,
85
- 5.967508721351623,
86
- 5.93429172039032,
87
- 5.885955548286438,
88
- 5.922497510910034,
89
- 5.874273800849915,
90
- 5.85152416229248,
91
- 5.821193695068359,
92
- 5.8280246496200565,
93
- 5.813751816749573,
94
- 5.791246247291565,
95
- 5.767430520057678,
96
- 5.71628212928772,
97
- 5.706824922561646,
98
- 5.679594397544861,
99
- 5.681227397918701,
100
- 5.659778475761414,
101
- 5.634682846069336,
102
- 5.6370806217193605,
103
- 5.60820951461792,
104
- 5.605899858474731,
105
- 5.558463406562805,
106
- 5.540622115135193,
107
- 5.5469331502914425,
108
- 5.513700318336487,
109
- 5.507756447792053,
110
- 5.51136999130249,
111
- 5.492908000946045,
112
- 5.479893898963928,
113
- 5.476272416114807,
114
- 5.448544383049011,
115
- 5.425948858261108,
116
- 5.427353811264038,
117
- 5.412839388847351,
118
- 5.388048672676087,
119
- 5.377066254615784,
120
- 5.36482572555542,
121
- 5.371043539047241,
122
- 5.368273186683655,
123
- 5.3256947755813595,
124
- 5.307721757888794,
125
- 5.318835425376892,
126
- 5.289116311073303,
127
- 5.272861742973328,
128
- 5.276089310646057,
129
- 5.265490460395813,
130
- 5.2506572008132935,
131
- 5.237578439712524,
132
- 5.215294694900512,
133
- 5.228858590126038,
134
- 5.2053611278533936,
135
- 5.198213219642639,
136
- 5.177952170372009,
137
- 5.157907652854919,
138
- 5.160167956352234,
139
- 5.136298632621765,
140
- 5.134593725204468,
141
- 5.129997110366821,
142
- 5.1140142440795895,
143
- 5.098948001861572,
144
- 5.105801224708557,
145
- 5.086677241325378,
146
- 5.105670213699341,
147
- 5.086080074310303,
148
- 5.063486886024475,
149
- 5.039502429962158,
150
- 5.054127407073975,
151
- 5.0334865093231205,
152
- 5.0221271276473995,
153
- 5.02755913734436,
154
- 5.012960624694824,
155
- 5.003441286087036,
156
- 4.989929986000061,
157
- 4.994864392280578,
158
- 4.983890652656555,
159
- 4.960720038414001,
160
- 4.939655661582947,
161
- 4.9273720502853395,
162
- 4.96423921585083,
163
- 4.92990505695343,
164
- 4.916189646720886,
165
- 4.900088691711426,
166
- 4.9144501209259035,
167
- 4.902526545524597,
168
- 4.887704992294312,
169
- 4.872406768798828,
170
- 4.852422189712525,
171
- 4.864386940002442,
172
- 4.858460855484009,
173
- 4.842334938049317,
174
- 4.843454217910766,
175
- 4.817466378211975,
176
- 4.820430397987366,
177
- 4.801653957366943,
178
- 4.792956805229187,
179
- 4.797436952590942,
180
- 4.781968450546264,
181
- 4.789061427116394,
182
- 4.7693219661712645,
183
- 4.782662010192871,
184
- 4.765616488456726,
185
- 4.753713536262512,
186
- 4.731625652313232,
187
- 4.758085608482361,
188
- 4.733190107345581,
189
- 4.732776117324829,
190
- 4.723483061790466,
191
- 4.718178009986877,
192
- 4.706092762947082,
193
- 4.698446464538574,
194
- 4.701998090744018,
195
- 4.675306391716004,
196
- 4.67594690322876,
197
- 4.673136043548584,
198
- 4.676000332832336,
199
- 4.662511658668518,
200
- 4.663333058357239,
201
- 4.639285588264466,
202
- 4.643643140792847,
203
- 4.642273378372193,
204
- 4.644450044631958,
205
- 4.650630140304566,
206
- 4.6467742919921875,
207
- 4.632501029968262,
208
- 4.620224380493164,
209
- 4.625468230247497,
210
- 4.5897074937820435,
211
- 4.588979768753052,
212
- 4.6058248519897464,
213
- 4.587015867233276,
214
- 4.61704375743866,
215
- 4.579612851142883,
216
- 4.558562755584717,
217
- 4.596074819564819,
218
- 4.5844930648803714,
219
- 4.569266223907471,
220
- 4.577209210395813,
221
- 4.577761220932007,
222
- 4.555995655059815,
223
- 4.552287364006043,
224
- 4.541914534568787,
225
- 4.549658226966858,
226
- 4.562751293182373,
227
- 4.562846899032593,
228
- 4.541490626335144,
229
- 4.528701710700989,
230
- 4.544593930244446,
231
- 4.536577987670898,
232
- 4.520785689353943,
233
- 4.5242571353912355,
234
- 4.540758347511291,
235
- 4.504750514030457,
236
- 4.528533911705017,
237
- 4.5280516147613525,
238
- 4.498441386222839,
239
- 4.514826536178589,
240
- 4.493865728378296,
241
- 4.508604049682617,
242
- 4.51331889629364,
243
- 4.506322932243347,
244
- 4.5069844961166385,
245
- 4.513097023963928,
246
- 4.4849427223205565,
247
- 4.505059719085693,
248
- 4.482535195350647,
249
- 4.463908529281616,
250
- 4.478529906272888,
251
- 4.485102605819702,
252
- 4.464245009422302,
253
- 4.4766833782196045,
254
- 4.48006227016449,
255
- 4.46816234588623,
256
- 4.4690571308135985,
257
- 4.460832238197327,
258
- 4.465590906143189,
259
- 4.469543504714966,
260
- 4.454380369186401,
261
- 4.471775102615356,
262
- 4.463762474060059,
263
- 4.470025753974914,
264
- 4.452491307258606,
265
- 4.4533980131149296,
266
- 4.457679700851441,
267
- 4.454637956619263,
268
- 4.45936803817749,
269
- 4.445066690444946,
270
- 4.45391001701355,
271
- 4.423792219161987,
272
- 4.408037424087524,
273
- 4.426368927955627,
274
- 4.426535439491272,
275
- 4.451635932922363,
276
- 4.461174488067627,
277
- 4.443306803703308,
278
- 4.434109568595886,
279
- 4.415992355346679,
280
- 4.439617848396301,
281
- 4.395599150657654,
282
- 4.4118435144424435,
283
- 4.416483449935913,
284
- 4.42143759727478,
285
- 4.408699345588684,
286
- 4.410844421386718,
287
- 4.423719692230224,
288
- 4.419705867767334,
289
- 4.41556077003479,
290
- 4.413246774673462,
291
- 4.408759260177613,
292
- 4.42314383983612,
293
- 4.424891471862793,
294
- 4.405255389213562,
295
- 4.401962566375732,
296
- 4.404637169837952,
297
- 4.406912827491761,
298
- 4.388018369674683,
299
- 4.404927229881286,
300
- 4.384101796150207,
301
- 4.377739930152893,
302
- 4.3903972864151,
303
- 4.385074853897095,
304
- 4.389943695068359,
305
- 4.400576543807984,
306
- 4.356423425674438,
307
- 4.3956664323806764,
308
- 4.38970718383789,
309
- 4.3759850025177,
310
- 4.383376860618592,
311
- 4.396907472610474,
312
- 4.393727779388428,
313
- 4.383356761932373,
314
- 4.374659609794617,
315
- 4.3866536855697635,
316
- 4.392642092704773,
317
- 4.376036214828491,
318
- 4.369334197044372,
319
- 4.378073072433471,
320
- 4.367864370346069,
321
- 4.383692765235901,
322
- 4.371319842338562,
323
- 4.3599893808364865,
324
- 4.390848541259766,
325
- 4.374786972999573,
326
- 4.381334376335144,
327
- 4.379550957679749,
328
- 4.375093150138855,
329
- 4.391530227661133
330
  ],
331
  "val_loss": [
332
- 6.038729429244995,
333
- 5.1768466663360595,
334
- 4.684602499008179,
335
- 4.418132734298706,
336
- 4.304454736709594,
337
- 4.262436828613281
338
  ],
339
  "steps": [
340
  10,
 
4
  "model": "baseline",
5
  "seed": 42,
6
  "n_params": 59294720,
7
+ "final_val_loss": 4.233367519378662,
8
+ "final_ppl": 68.9490288556975,
9
  "total_steps": 3000,
10
+ "time_hours": 0.5394660167561637,
11
+ "tokens_per_sec": 202471.82078947642,
12
  "config": {
13
  "n_layers": 8,
14
  "n_heads": 8,
 
27
  },
28
  "history": {
29
  "train_loss": [
30
+ 10.927530813217164,
31
+ 10.826293659210204,
32
+ 10.613470816612244,
33
+ 10.331651449203491,
34
+ 10.110949373245239,
35
+ 9.958672904968262,
36
+ 9.839383387565613,
37
+ 9.718096780776978,
38
+ 9.571044754981994,
39
+ 9.415904355049133,
40
+ 9.226257967948914,
41
+ 9.030017983913421,
42
+ 8.794704365730286,
43
+ 8.57885262966156,
44
+ 8.382428240776061,
45
+ 8.189792448282242,
46
+ 8.018463099002838,
47
+ 7.857675814628601,
48
+ 7.709551095962524,
49
+ 7.561921155452728,
50
+ 7.477040851116181,
51
+ 7.355278390645981,
52
+ 7.284095978736877,
53
+ 7.187307608127594,
54
+ 7.125060200691223,
55
+ 7.057865798473358,
56
+ 7.012505376338959,
57
+ 6.948506551980972,
58
+ 6.876198577880859,
59
+ 6.822176653146744,
60
+ 6.773482286930085,
61
+ 6.684595227241516,
62
+ 6.667623269557953,
63
+ 6.6060503959655765,
64
+ 6.558893990516663,
65
+ 6.5269287586212155,
66
+ 6.501987665891647,
67
+ 6.452165180444718,
68
+ 6.404278558492661,
69
+ 6.386325860023499,
70
+ 6.3472913801670074,
71
+ 6.3349658966064455,
72
+ 6.287427937984466,
73
+ 6.252253836393356,
74
+ 6.247970241308212,
75
+ 6.2124589622020725,
76
+ 6.201394194364548,
77
+ 6.132259547710419,
78
+ 6.115846771001816,
79
+ 6.088389253616333,
80
+ 6.065799176692963,
81
+ 6.023272043466568,
82
+ 6.02790796160698,
83
+ 6.003626132011414,
84
+ 5.980417060852051,
85
+ 5.965319687128067,
86
+ 5.9287806928157805,
87
+ 5.881725150346756,
88
+ 5.925188660621643,
89
+ 5.8744639456272125,
90
+ 5.848873859643936,
91
+ 5.818784230947495,
92
+ 5.826754355430603,
93
+ 5.810910212993622,
94
+ 5.788806092739105,
95
+ 5.765624535083771,
96
+ 5.713429546356201,
97
+ 5.705234467983246,
98
+ 5.6773538291454315,
99
+ 5.676201885938644,
100
+ 5.659579473733902,
101
+ 5.630713140964508,
102
+ 5.6361576318740845,
103
+ 5.607533866167069,
104
+ 5.604413938522339,
105
+ 5.555145579576492,
106
+ 5.539613455533981,
107
+ 5.54616471529007,
108
+ 5.511329019069672,
109
+ 5.5064324796199795,
110
+ 5.509959208965301,
111
+ 5.492373234033584,
112
+ 5.47820674777031,
113
+ 5.474586945772171,
114
+ 5.446962082386017,
115
+ 5.425144374370575,
116
+ 5.424832212924957,
117
+ 5.4103613018989565,
118
+ 5.385396325588227,
119
+ 5.3762620985507965,
120
+ 5.363343620300293,
121
+ 5.369414287805557,
122
+ 5.367125082015991,
123
+ 5.325003254413605,
124
+ 5.307112008333206,
125
+ 5.31769849061966,
126
+ 5.286369174718857,
127
+ 5.2711376547813416,
128
+ 5.275247663259506,
129
+ 5.264838606119156,
130
+ 5.249980568885803,
131
+ 5.235380667448044,
132
+ 5.214563733339309,
133
+ 5.229698014259339,
134
+ 5.2045911848545074,
135
+ 5.196132844686508,
136
+ 5.178686946630478,
137
+ 5.156260287761688,
138
+ 5.160557287931442,
139
+ 5.136663693189621,
140
+ 5.134931546449661,
141
+ 5.129724872112274,
142
+ 5.112880122661591,
143
+ 5.099378442764282,
144
+ 5.10523362159729,
145
+ 5.085989451408386,
146
+ 5.104570031166077,
147
+ 5.087291306257248,
148
+ 5.062313461303711,
149
+ 5.039769595861435,
150
+ 5.057129102945328,
151
+ 5.035041064023972,
152
+ 5.021690434217453,
153
+ 5.029307162761688,
154
+ 5.015169143676758,
155
+ 5.004697853326798,
156
+ 4.991213339567184,
157
+ 4.996236568689346,
158
+ 4.985553902387619,
159
+ 4.963201266527176,
160
+ 4.942382895946503,
161
+ 4.930625879764557,
162
+ 4.966241490840912,
163
+ 4.930865317583084,
164
+ 4.919443368911743,
165
+ 4.902375680208206,
166
+ 4.916122484207153,
167
+ 4.906648200750351,
168
+ 4.891059231758118,
169
+ 4.876785057783127,
170
+ 4.856038808822632,
171
+ 4.86856461763382,
172
+ 4.861787569522858,
173
+ 4.847309970855713,
174
+ 4.8470313310623165,
175
+ 4.823130464553833,
176
+ 4.825788927078247,
177
+ 4.807092648744583,
178
+ 4.7993528068065645,
179
+ 4.803900754451751,
180
+ 4.7876395046710964,
181
+ 4.795553350448609,
182
+ 4.773492980003357,
183
+ 4.788048326969147,
184
+ 4.770508575439453,
185
+ 4.76012333035469,
186
+ 4.7385109007358555,
187
+ 4.765730607509613,
188
+ 4.7379465341568,
189
+ 4.738092434406281,
190
+ 4.7284609317779545,
191
+ 4.72404305934906,
192
+ 4.712569177150726,
193
+ 4.7043911755084995,
194
+ 4.708009678125381,
195
+ 4.6813356339931484,
196
+ 4.681916469335556,
197
+ 4.6801539838314055,
198
+ 4.682833117246628,
199
+ 4.668183767795563,
200
+ 4.668356126546859,
201
+ 4.644948256015778,
202
+ 4.649244034290314,
203
+ 4.64717156291008,
204
+ 4.649398684501648,
205
+ 4.656712931394577,
206
+ 4.652565515041351,
207
+ 4.638376301527023,
208
+ 4.625971180200577,
209
+ 4.631635862588882,
210
+ 4.597406750917434,
211
+ 4.594275557994843,
212
+ 4.610821264982223,
213
+ 4.592344808578491,
214
+ 4.621552324295044,
215
+ 4.5853946030139925,
216
+ 4.564627516269684,
217
+ 4.601570558547974,
218
+ 4.590592992305756,
219
+ 4.57503188252449,
220
+ 4.583286285400391,
221
+ 4.5832530677318575,
222
+ 4.5610675573348995,
223
+ 4.558764892816543,
224
+ 4.5478429913520815,
225
+ 4.5562922835350035,
226
+ 4.567357301712036,
227
+ 4.568180549144745,
228
+ 4.547011572122574,
229
+ 4.534507042169571,
230
+ 4.548529040813446,
231
+ 4.541348105669021,
232
+ 4.526279681921006,
233
+ 4.5297070980072025,
234
+ 4.545705795288086,
235
+ 4.509708696603775,
236
+ 4.533126693964005,
237
+ 4.5335634529590605,
238
+ 4.503700315952301,
239
+ 4.519868350028991,
240
+ 4.498268526792526,
241
+ 4.5140829205513,
242
+ 4.517552202939987,
243
+ 4.510715544223785,
244
+ 4.512187397480011,
245
+ 4.517309862375259,
246
+ 4.489432048797608,
247
+ 4.51059992313385,
248
+ 4.48784299492836,
249
+ 4.466718208789826,
250
+ 4.482305586338043,
251
+ 4.488626945018768,
252
+ 4.469105690717697,
253
+ 4.480705058574676,
254
+ 4.484020268917083,
255
+ 4.471919769048691,
256
+ 4.4729400157928465,
257
+ 4.4654629826545715,
258
+ 4.46952211856842,
259
+ 4.473682689666748,
260
+ 4.457893824577331,
261
+ 4.475264972448349,
262
+ 4.4683712244033815,
263
+ 4.472925233840942,
264
+ 4.455631273984909,
265
+ 4.458080977201462,
266
+ 4.461629629135132,
267
+ 4.458289343118667,
268
+ 4.462007248401642,
269
+ 4.448783230781555,
270
+ 4.456719762086868,
271
+ 4.42740181684494,
272
+ 4.4121558427810665,
273
+ 4.429766219854355,
274
+ 4.4310003280639645,
275
+ 4.455491322278976,
276
+ 4.4640020549297335,
277
+ 4.44659206867218,
278
+ 4.437281060218811,
279
+ 4.420365136861801,
280
+ 4.4440817594528195,
281
+ 4.399286568164825,
282
+ 4.4147800505161285,
283
+ 4.420287960767746,
284
+ 4.425147753953934,
285
+ 4.412303626537323,
286
+ 4.414164221286773,
287
+ 4.427308332920075,
288
+ 4.423098802566528,
289
+ 4.419688934087754,
290
+ 4.4166245520114895,
291
+ 4.413107579946518,
292
+ 4.426975536346435,
293
+ 4.429745012521744,
294
+ 4.409277826547623,
295
+ 4.405870348215103,
296
+ 4.408495014905929,
297
+ 4.4112107932567595,
298
+ 4.388924932479858,
299
+ 4.408998346328735,
300
+ 4.388690495491028,
301
+ 4.382031446695327,
302
+ 4.390588700771332,
303
+ 4.389434742927551,
304
+ 4.395962595939636,
305
+ 4.402186918258667,
306
+ 4.359916639328003,
307
+ 4.397599583864212,
308
+ 4.395163881778717,
309
+ 4.379324901103973,
310
+ 4.384779387712479,
311
+ 4.401144474744797,
312
+ 4.39257373213768,
313
+ 4.387924355268479,
314
+ 4.378289061784744,
315
+ 4.389167779684067,
316
+ 4.399120503664017,
317
+ 4.377688091993332,
318
+ 4.369447124004364,
319
+ 4.383059096336365,
320
+ 4.374087977409363,
321
+ 4.382080936431885,
322
+ 4.377086210250854,
323
+ 4.3609716355800625,
324
+ 4.394048166275025,
325
+ 4.378798735141754,
326
+ 4.38559120297432,
327
+ 4.383440864086151,
328
+ 4.377417886257172,
329
+ 4.394392079114914
330
  ],
331
  "val_loss": [
332
+ 6.030185861587524,
333
+ 5.151836605072021,
334
+ 4.662097959518433,
335
+ 4.390134248733521,
336
+ 4.277197365760803,
337
+ 4.233367519378662
338
  ],
339
  "steps": [
340
  10,