| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4.9638554216867465, |
| "eval_steps": 500, |
| "global_step": 515, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0963855421686747, |
| "grad_norm": 0.9117392307242835, |
| "learning_rate": 9.990699835799469e-05, |
| "loss": 1.9779, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.1927710843373494, |
| "grad_norm": 0.9121432880636753, |
| "learning_rate": 9.96283394041954e-05, |
| "loss": 1.6698, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.2891566265060241, |
| "grad_norm": 0.8113864009102607, |
| "learning_rate": 9.916505976821263e-05, |
| "loss": 1.5706, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.3855421686746988, |
| "grad_norm": 0.7722456373389991, |
| "learning_rate": 9.851888288072053e-05, |
| "loss": 1.5093, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.4819277108433735, |
| "grad_norm": 0.840195041159072, |
| "learning_rate": 9.769221256218164e-05, |
| "loss": 1.463, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.5783132530120482, |
| "grad_norm": 0.9154016698600571, |
| "learning_rate": 9.668812408047679e-05, |
| "loss": 1.4829, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.6746987951807228, |
| "grad_norm": 0.890732828345114, |
| "learning_rate": 9.551035271070664e-05, |
| "loss": 1.4683, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.7710843373493976, |
| "grad_norm": 0.9157645257382339, |
| "learning_rate": 9.416327983972304e-05, |
| "loss": 1.3965, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.8674698795180723, |
| "grad_norm": 0.8897525548859271, |
| "learning_rate": 9.265191666708209e-05, |
| "loss": 1.4475, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.963855421686747, |
| "grad_norm": 0.861081307668928, |
| "learning_rate": 9.098188556305263e-05, |
| "loss": 1.4124, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.0602409638554218, |
| "grad_norm": 0.85116234930626, |
| "learning_rate": 8.915939915302968e-05, |
| "loss": 1.3647, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.1566265060240963, |
| "grad_norm": 0.9740957258545101, |
| "learning_rate": 8.71912372061598e-05, |
| "loss": 1.3204, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.2530120481927711, |
| "grad_norm": 1.0864987018116612, |
| "learning_rate": 8.508472141415467e-05, |
| "loss": 1.335, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.3493975903614457, |
| "grad_norm": 1.0426867861383922, |
| "learning_rate": 8.284768815411692e-05, |
| "loss": 1.2918, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.4457831325301205, |
| "grad_norm": 1.1563010080529215, |
| "learning_rate": 8.048845933670273e-05, |
| "loss": 1.3137, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.5421686746987953, |
| "grad_norm": 1.1357035664877724, |
| "learning_rate": 7.801581144806752e-05, |
| "loss": 1.2938, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.6385542168674698, |
| "grad_norm": 1.1274251128308745, |
| "learning_rate": 7.543894290076103e-05, |
| "loss": 1.3013, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.7349397590361446, |
| "grad_norm": 1.1376429010873366, |
| "learning_rate": 7.276743981502856e-05, |
| "loss": 1.3333, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.8313253012048194, |
| "grad_norm": 1.1848553562459525, |
| "learning_rate": 7.00112403578139e-05, |
| "loss": 1.2946, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.927710843373494, |
| "grad_norm": 1.1037089869727106, |
| "learning_rate": 6.718059777212567e-05, |
| "loss": 1.2859, |
| "step": 200 |
| }, |
| { |
| "epoch": 2.0240963855421685, |
| "grad_norm": 1.1061364608256432, |
| "learning_rate": 6.42860422342998e-05, |
| "loss": 1.2725, |
| "step": 210 |
| }, |
| { |
| "epoch": 2.1204819277108435, |
| "grad_norm": 1.3057975365378993, |
| "learning_rate": 6.133834168105206e-05, |
| "loss": 1.1922, |
| "step": 220 |
| }, |
| { |
| "epoch": 2.216867469879518, |
| "grad_norm": 1.289860777590967, |
| "learning_rate": 5.8348461752046116e-05, |
| "loss": 1.1768, |
| "step": 230 |
| }, |
| { |
| "epoch": 2.3132530120481927, |
| "grad_norm": 1.393963581012029, |
| "learning_rate": 5.532752499699381e-05, |
| "loss": 1.1828, |
| "step": 240 |
| }, |
| { |
| "epoch": 2.4096385542168672, |
| "grad_norm": 1.4444324116034009, |
| "learning_rate": 5.228676949903973e-05, |
| "loss": 1.1874, |
| "step": 250 |
| }, |
| { |
| "epoch": 2.5060240963855422, |
| "grad_norm": 1.4377450110610097, |
| "learning_rate": 4.923750706835371e-05, |
| "loss": 1.1709, |
| "step": 260 |
| }, |
| { |
| "epoch": 2.602409638554217, |
| "grad_norm": 1.4052399247965977, |
| "learning_rate": 4.619108116145411e-05, |
| "loss": 1.1545, |
| "step": 270 |
| }, |
| { |
| "epoch": 2.6987951807228914, |
| "grad_norm": 1.4874281591631962, |
| "learning_rate": 4.31588246828045e-05, |
| "loss": 1.1515, |
| "step": 280 |
| }, |
| { |
| "epoch": 2.7951807228915664, |
| "grad_norm": 1.510345480121206, |
| "learning_rate": 4.015201782566471e-05, |
| "loss": 1.1655, |
| "step": 290 |
| }, |
| { |
| "epoch": 2.891566265060241, |
| "grad_norm": 1.4989598810015492, |
| "learning_rate": 3.7181846109031005e-05, |
| "loss": 1.1565, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.9879518072289155, |
| "grad_norm": 1.499903571294907, |
| "learning_rate": 3.4259358766770766e-05, |
| "loss": 1.189, |
| "step": 310 |
| }, |
| { |
| "epoch": 3.0843373493975905, |
| "grad_norm": 1.5828347380531096, |
| "learning_rate": 3.1395427643746796e-05, |
| "loss": 1.0681, |
| "step": 320 |
| }, |
| { |
| "epoch": 3.180722891566265, |
| "grad_norm": 1.68873547972742, |
| "learning_rate": 2.860070675184036e-05, |
| "loss": 1.0827, |
| "step": 330 |
| }, |
| { |
| "epoch": 3.2771084337349397, |
| "grad_norm": 1.6928065503330352, |
| "learning_rate": 2.588559263632719e-05, |
| "loss": 1.055, |
| "step": 340 |
| }, |
| { |
| "epoch": 3.3734939759036147, |
| "grad_norm": 1.656803450443418, |
| "learning_rate": 2.3260185700046294e-05, |
| "loss": 1.0615, |
| "step": 350 |
| }, |
| { |
| "epoch": 3.4698795180722892, |
| "grad_norm": 1.7763695778702038, |
| "learning_rate": 2.0734252629237894e-05, |
| "loss": 1.054, |
| "step": 360 |
| }, |
| { |
| "epoch": 3.566265060240964, |
| "grad_norm": 1.7692488057265054, |
| "learning_rate": 1.831719006082924e-05, |
| "loss": 1.0659, |
| "step": 370 |
| }, |
| { |
| "epoch": 3.662650602409639, |
| "grad_norm": 1.7554003807515917, |
| "learning_rate": 1.601798962632799e-05, |
| "loss": 1.0694, |
| "step": 380 |
| }, |
| { |
| "epoch": 3.7590361445783134, |
| "grad_norm": 1.822610764821149, |
| "learning_rate": 1.384520450236244e-05, |
| "loss": 1.0566, |
| "step": 390 |
| }, |
| { |
| "epoch": 3.855421686746988, |
| "grad_norm": 1.8319206289975956, |
| "learning_rate": 1.1806917592302762e-05, |
| "loss": 1.0494, |
| "step": 400 |
| }, |
| { |
| "epoch": 3.9518072289156625, |
| "grad_norm": 1.9096676463205011, |
| "learning_rate": 9.91071145732948e-06, |
| "loss": 1.0577, |
| "step": 410 |
| }, |
| { |
| "epoch": 4.048192771084337, |
| "grad_norm": 1.7249401183320767, |
| "learning_rate": 8.163640108807896e-06, |
| "loss": 1.0293, |
| "step": 420 |
| }, |
| { |
| "epoch": 4.144578313253012, |
| "grad_norm": 1.826717977143128, |
| "learning_rate": 6.572202766902569e-06, |
| "loss": 1.0078, |
| "step": 430 |
| }, |
| { |
| "epoch": 4.240963855421687, |
| "grad_norm": 1.8877052786758952, |
| "learning_rate": 5.1423196830513e-06, |
| "loss": 0.9904, |
| "step": 440 |
| }, |
| { |
| "epoch": 4.337349397590361, |
| "grad_norm": 1.8808793283796603, |
| "learning_rate": 3.879310116241042e-06, |
| "loss": 0.9815, |
| "step": 450 |
| }, |
| { |
| "epoch": 4.433734939759036, |
| "grad_norm": 1.882737371988685, |
| "learning_rate": 2.787872545015069e-06, |
| "loss": 0.9995, |
| "step": 460 |
| }, |
| { |
| "epoch": 4.530120481927711, |
| "grad_norm": 1.9125920590825105, |
| "learning_rate": 1.8720671888242059e-06, |
| "loss": 1.0179, |
| "step": 470 |
| }, |
| { |
| "epoch": 4.626506024096385, |
| "grad_norm": 1.8949023884002059, |
| "learning_rate": 1.1353009037437523e-06, |
| "loss": 1.002, |
| "step": 480 |
| }, |
| { |
| "epoch": 4.72289156626506, |
| "grad_norm": 1.8902529975556408, |
| "learning_rate": 5.803145087451945e-07, |
| "loss": 0.9862, |
| "step": 490 |
| }, |
| { |
| "epoch": 4.8192771084337345, |
| "grad_norm": 1.915141448607233, |
| "learning_rate": 2.0917258966953733e-07, |
| "loss": 1.0089, |
| "step": 500 |
| }, |
| { |
| "epoch": 4.9156626506024095, |
| "grad_norm": 1.8921692295680217, |
| "learning_rate": 2.3255818832423894e-08, |
| "loss": 0.9775, |
| "step": 510 |
| }, |
| { |
| "epoch": 4.9638554216867465, |
| "step": 515, |
| "total_flos": 72399436972032.0, |
| "train_loss": 1.2166850728896057, |
| "train_runtime": 4721.4385, |
| "train_samples_per_second": 0.879, |
| "train_steps_per_second": 0.109 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 515, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 72399436972032.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|