| { | |
| "best_global_step": 6000, | |
| "best_metric": 1.4575997591018677, | |
| "best_model_checkpoint": "/content/marocAI-finetuned/checkpoint-2821/checkpoint-6000", | |
| "epoch": 7.0, | |
| "eval_steps": 500, | |
| "global_step": 6062, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.4618937644341801, | |
| "grad_norm": 1.0584869384765625, | |
| "learning_rate": 1.695852534562212e-05, | |
| "loss": 2.9443, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.9237875288683602, | |
| "grad_norm": 1.051153302192688, | |
| "learning_rate": 1.3886328725038403e-05, | |
| "loss": 2.3079, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.3833718244803694, | |
| "grad_norm": 0.9572842121124268, | |
| "learning_rate": 1.0814132104454686e-05, | |
| "loss": 2.2004, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.8452655889145495, | |
| "grad_norm": 0.7911381125450134, | |
| "learning_rate": 7.741935483870968e-06, | |
| "loss": 2.0987, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.304849884526559, | |
| "grad_norm": 1.0212643146514893, | |
| "learning_rate": 4.669738863287251e-06, | |
| "loss": 2.0724, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.304849884526559, | |
| "eval_loss": 1.7146347761154175, | |
| "eval_runtime": 3.6485, | |
| "eval_samples_per_second": 50.157, | |
| "eval_steps_per_second": 12.608, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.766743648960739, | |
| "grad_norm": 1.0736603736877441, | |
| "learning_rate": 1.5975422427035332e-06, | |
| "loss": 2.0376, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 3.2263279445727484, | |
| "grad_norm": 0.960295557975769, | |
| "learning_rate": 9.262672811059909e-06, | |
| "loss": 2.0108, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 3.6882217090069283, | |
| "grad_norm": 0.8508874773979187, | |
| "learning_rate": 7.726574500768049e-06, | |
| "loss": 2.0039, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 4.147806004618937, | |
| "grad_norm": 0.9244301319122314, | |
| "learning_rate": 6.1904761904761914e-06, | |
| "loss": 2.0071, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 4.609699769053118, | |
| "grad_norm": 0.8824007511138916, | |
| "learning_rate": 4.654377880184332e-06, | |
| "loss": 1.9621, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 4.609699769053118, | |
| "eval_loss": 1.6397035121917725, | |
| "eval_runtime": 3.6176, | |
| "eval_samples_per_second": 50.586, | |
| "eval_steps_per_second": 12.716, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 5.069284064665127, | |
| "grad_norm": 0.8873424530029297, | |
| "learning_rate": 3.1182795698924735e-06, | |
| "loss": 1.9262, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 5.531177829099307, | |
| "grad_norm": 0.8406194448471069, | |
| "learning_rate": 1.5821812596006145e-06, | |
| "loss": 1.9235, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 5.993071593533488, | |
| "grad_norm": 0.8757996559143066, | |
| "learning_rate": 4.608294930875576e-08, | |
| "loss": 1.9402, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 6.452655889145497, | |
| "grad_norm": 0.8397168517112732, | |
| "learning_rate": 1.0081531371853954e-05, | |
| "loss": 1.9086, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 6.914549653579677, | |
| "grad_norm": 0.8500417470932007, | |
| "learning_rate": 9.372562920950018e-06, | |
| "loss": 1.9123, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 6.914549653579677, | |
| "eval_loss": 1.5952860116958618, | |
| "eval_runtime": 3.6287, | |
| "eval_samples_per_second": 50.432, | |
| "eval_steps_per_second": 12.677, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 7.374133949191686, | |
| "grad_norm": 0.8373268246650696, | |
| "learning_rate": 8.663594470046084e-06, | |
| "loss": 1.8666, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 7.836027713625866, | |
| "grad_norm": 0.8652852773666382, | |
| "learning_rate": 7.954626019142148e-06, | |
| "loss": 1.8377, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 8.295612009237875, | |
| "grad_norm": 0.8815492391586304, | |
| "learning_rate": 7.245657568238214e-06, | |
| "loss": 1.8395, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 8.757505773672055, | |
| "grad_norm": 0.8464174270629883, | |
| "learning_rate": 6.53668911733428e-06, | |
| "loss": 1.8285, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 9.217090069284065, | |
| "grad_norm": 0.818134069442749, | |
| "learning_rate": 5.827720666430344e-06, | |
| "loss": 1.8244, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 9.217090069284065, | |
| "eval_loss": 1.5474414825439453, | |
| "eval_runtime": 3.7454, | |
| "eval_samples_per_second": 48.86, | |
| "eval_steps_per_second": 12.282, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 9.678983833718245, | |
| "grad_norm": 0.8570533394813538, | |
| "learning_rate": 5.118752215526409e-06, | |
| "loss": 1.8044, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 10.138568129330254, | |
| "grad_norm": 0.8370910286903381, | |
| "learning_rate": 4.409783764622475e-06, | |
| "loss": 1.7769, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 10.600461893764434, | |
| "grad_norm": 0.8950196504592896, | |
| "learning_rate": 3.70081531371854e-06, | |
| "loss": 1.7914, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 11.060046189376443, | |
| "grad_norm": 0.7839242815971375, | |
| "learning_rate": 2.9918468628146054e-06, | |
| "loss": 1.7767, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 11.521939953810623, | |
| "grad_norm": 0.8934968709945679, | |
| "learning_rate": 2.28287841191067e-06, | |
| "loss": 1.7807, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 11.521939953810623, | |
| "eval_loss": 1.5255564451217651, | |
| "eval_runtime": 3.6222, | |
| "eval_samples_per_second": 50.522, | |
| "eval_steps_per_second": 12.7, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 11.983833718244803, | |
| "grad_norm": 0.9384580254554749, | |
| "learning_rate": 1.5739099610067355e-06, | |
| "loss": 1.7571, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 12.443418013856814, | |
| "grad_norm": 0.838097333908081, | |
| "learning_rate": 8.649415101028006e-07, | |
| "loss": 1.7684, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 12.905311778290994, | |
| "grad_norm": 0.807694673538208, | |
| "learning_rate": 1.5597305919886567e-07, | |
| "loss": 1.7643, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 3.348729792147806, | |
| "grad_norm": 1.560190200805664, | |
| "learning_rate": 1.0610533378061055e-05, | |
| "loss": 1.7522, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 3.464203233256351, | |
| "grad_norm": 1.5953682661056519, | |
| "learning_rate": 1.0275075478027507e-05, | |
| "loss": 1.7661, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 3.464203233256351, | |
| "eval_loss": 1.5233831405639648, | |
| "eval_runtime": 3.6827, | |
| "eval_samples_per_second": 49.692, | |
| "eval_steps_per_second": 12.491, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 3.579676674364896, | |
| "grad_norm": 1.5370293855667114, | |
| "learning_rate": 9.939617577993964e-06, | |
| "loss": 1.7759, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 3.695150115473441, | |
| "grad_norm": 1.470035195350647, | |
| "learning_rate": 9.604159677960416e-06, | |
| "loss": 1.7367, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 3.8106235565819864, | |
| "grad_norm": 1.5553827285766602, | |
| "learning_rate": 9.26870177792687e-06, | |
| "loss": 1.7614, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 3.9260969976905313, | |
| "grad_norm": 1.7353712320327759, | |
| "learning_rate": 8.933243877893324e-06, | |
| "loss": 1.7301, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 4.041570438799076, | |
| "grad_norm": 1.8023557662963867, | |
| "learning_rate": 8.59778597785978e-06, | |
| "loss": 1.7797, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 4.041570438799076, | |
| "eval_loss": 1.5006413459777832, | |
| "eval_runtime": 3.6376, | |
| "eval_samples_per_second": 50.308, | |
| "eval_steps_per_second": 12.646, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 4.157043879907621, | |
| "grad_norm": 1.4707646369934082, | |
| "learning_rate": 8.262328077826235e-06, | |
| "loss": 1.7436, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 4.272517321016166, | |
| "grad_norm": 1.8790034055709839, | |
| "learning_rate": 7.926870177792688e-06, | |
| "loss": 1.7703, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 4.387990762124711, | |
| "grad_norm": 1.7127020359039307, | |
| "learning_rate": 7.591412277759142e-06, | |
| "loss": 1.7297, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 4.503464203233256, | |
| "grad_norm": 1.575723648071289, | |
| "learning_rate": 7.255954377725596e-06, | |
| "loss": 1.6714, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 4.618937644341801, | |
| "grad_norm": 1.6164535284042358, | |
| "learning_rate": 6.92049647769205e-06, | |
| "loss": 1.6929, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 4.618937644341801, | |
| "eval_loss": 1.4842592477798462, | |
| "eval_runtime": 3.769, | |
| "eval_samples_per_second": 48.554, | |
| "eval_steps_per_second": 12.205, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 4.734411085450346, | |
| "grad_norm": 1.4100682735443115, | |
| "learning_rate": 6.585038577658505e-06, | |
| "loss": 1.7042, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 4.849884526558892, | |
| "grad_norm": 1.533423900604248, | |
| "learning_rate": 6.249580677624959e-06, | |
| "loss": 1.6724, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 4.965357967667437, | |
| "grad_norm": 1.7051324844360352, | |
| "learning_rate": 5.9141227775914126e-06, | |
| "loss": 1.6685, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 5.080831408775982, | |
| "grad_norm": 1.5413386821746826, | |
| "learning_rate": 5.578664877557867e-06, | |
| "loss": 1.6784, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 5.196304849884527, | |
| "grad_norm": 1.7100459337234497, | |
| "learning_rate": 5.243206977524321e-06, | |
| "loss": 1.6237, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 5.196304849884527, | |
| "eval_loss": 1.4744157791137695, | |
| "eval_runtime": 3.6566, | |
| "eval_samples_per_second": 50.047, | |
| "eval_steps_per_second": 12.58, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 5.311778290993072, | |
| "grad_norm": 1.8523712158203125, | |
| "learning_rate": 4.907749077490776e-06, | |
| "loss": 1.7114, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 5.427251732101617, | |
| "grad_norm": 1.578623652458191, | |
| "learning_rate": 4.572291177457229e-06, | |
| "loss": 1.6396, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 5.542725173210162, | |
| "grad_norm": 1.6069693565368652, | |
| "learning_rate": 4.2368332774236835e-06, | |
| "loss": 1.6942, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 5.658198614318707, | |
| "grad_norm": 1.2671387195587158, | |
| "learning_rate": 3.901375377390138e-06, | |
| "loss": 1.6999, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 5.773672055427252, | |
| "grad_norm": 1.5692400932312012, | |
| "learning_rate": 3.5659174773565918e-06, | |
| "loss": 1.6395, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 5.773672055427252, | |
| "eval_loss": 1.464585542678833, | |
| "eval_runtime": 3.6703, | |
| "eval_samples_per_second": 49.859, | |
| "eval_steps_per_second": 12.533, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 5.8891454965357966, | |
| "grad_norm": 1.6615720987319946, | |
| "learning_rate": 3.230459577323046e-06, | |
| "loss": 1.6839, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 6.0046189376443415, | |
| "grad_norm": 1.6811989545822144, | |
| "learning_rate": 2.8950016772895005e-06, | |
| "loss": 1.7058, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 6.1200923787528865, | |
| "grad_norm": 1.569676399230957, | |
| "learning_rate": 2.5595437772559544e-06, | |
| "loss": 1.6426, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 6.235565819861431, | |
| "grad_norm": 1.753227949142456, | |
| "learning_rate": 2.2240858772224088e-06, | |
| "loss": 1.646, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 6.351039260969977, | |
| "grad_norm": 1.3525090217590332, | |
| "learning_rate": 1.888627977188863e-06, | |
| "loss": 1.6184, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 6.351039260969977, | |
| "eval_loss": 1.459729790687561, | |
| "eval_runtime": 3.6443, | |
| "eval_samples_per_second": 50.216, | |
| "eval_steps_per_second": 12.623, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 6.466512702078522, | |
| "grad_norm": 1.6190038919448853, | |
| "learning_rate": 1.553170077155317e-06, | |
| "loss": 1.6743, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 6.581986143187067, | |
| "grad_norm": 1.5956363677978516, | |
| "learning_rate": 1.2177121771217714e-06, | |
| "loss": 1.6474, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 6.697459584295612, | |
| "grad_norm": 1.6586838960647583, | |
| "learning_rate": 8.822542770882254e-07, | |
| "loss": 1.7052, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 6.812933025404157, | |
| "grad_norm": 1.3021780252456665, | |
| "learning_rate": 5.467963770546797e-07, | |
| "loss": 1.6333, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 6.928406466512702, | |
| "grad_norm": 1.7689718008041382, | |
| "learning_rate": 2.1133847702113386e-07, | |
| "loss": 1.6687, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 6.928406466512702, | |
| "eval_loss": 1.4575997591018677, | |
| "eval_runtime": 3.7418, | |
| "eval_samples_per_second": 48.907, | |
| "eval_steps_per_second": 12.294, | |
| "step": 6000 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 6062, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 7, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.5149450723328e+16, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |