| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 4029, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02482005460412013, | |
| "grad_norm": 0.1369238644838333, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 2.4042, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.04964010920824026, | |
| "grad_norm": 0.17341101169586182, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 2.3581, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.07446016381236038, | |
| "grad_norm": 0.30298689007759094, | |
| "learning_rate": 2e-05, | |
| "loss": 2.2979, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.09928021841648052, | |
| "grad_norm": 0.4181392788887024, | |
| "learning_rate": 1.9964532702725803e-05, | |
| "loss": 2.2736, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.12410027302060064, | |
| "grad_norm": 0.4833754301071167, | |
| "learning_rate": 1.9858382396738395e-05, | |
| "loss": 2.2352, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.14892032762472077, | |
| "grad_norm": 0.5508949756622314, | |
| "learning_rate": 1.9682302054929414e-05, | |
| "loss": 2.1951, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.17374038222884092, | |
| "grad_norm": 0.5856565833091736, | |
| "learning_rate": 1.943754069606428e-05, | |
| "loss": 2.1662, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.19856043683296104, | |
| "grad_norm": 0.5611233115196228, | |
| "learning_rate": 1.9125834524918215e-05, | |
| "loss": 2.1815, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.22338049143708116, | |
| "grad_norm": 0.6802138090133667, | |
| "learning_rate": 1.8749394616578068e-05, | |
| "loss": 2.1675, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.2482005460412013, | |
| "grad_norm": 0.6513592004776001, | |
| "learning_rate": 1.8310891232270827e-05, | |
| "loss": 2.1402, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.2730206006453214, | |
| "grad_norm": 0.6889598369598389, | |
| "learning_rate": 1.781343487797389e-05, | |
| "loss": 2.1334, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.29784065524944153, | |
| "grad_norm": 0.7928256988525391, | |
| "learning_rate": 1.7260554240167017e-05, | |
| "loss": 2.1295, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.32266070985356166, | |
| "grad_norm": 0.7162489295005798, | |
| "learning_rate": 1.665617115523785e-05, | |
| "loss": 2.1232, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.34748076445768183, | |
| "grad_norm": 0.7136086225509644, | |
| "learning_rate": 1.6004572790094535e-05, | |
| "loss": 2.1148, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.37230081906180196, | |
| "grad_norm": 0.7688263654708862, | |
| "learning_rate": 1.531038123132105e-05, | |
| "loss": 2.0873, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.3971208736659221, | |
| "grad_norm": 0.772521436214447, | |
| "learning_rate": 1.4578520698593441e-05, | |
| "loss": 2.117, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.4219409282700422, | |
| "grad_norm": 1.010330080986023, | |
| "learning_rate": 1.3814182614927217e-05, | |
| "loss": 2.071, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.4467609828741623, | |
| "grad_norm": 0.6752054691314697, | |
| "learning_rate": 1.3022788781528653e-05, | |
| "loss": 2.0636, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.47158103747828245, | |
| "grad_norm": 0.841232180595398, | |
| "learning_rate": 1.220995291846777e-05, | |
| "loss": 2.0532, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.4964010920824026, | |
| "grad_norm": 0.7984778881072998, | |
| "learning_rate": 1.1381440843982634e-05, | |
| "loss": 2.0438, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.5212211466865228, | |
| "grad_norm": 0.8068585395812988, | |
| "learning_rate": 1.0543129574881446e-05, | |
| "loss": 2.0687, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.5460412012906428, | |
| "grad_norm": 0.8497598767280579, | |
| "learning_rate": 9.700965638162112e-06, | |
| "loss": 2.0477, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.570861255894763, | |
| "grad_norm": 0.7474705576896667, | |
| "learning_rate": 8.860922889564078e-06, | |
| "loss": 2.0429, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.5956813104988831, | |
| "grad_norm": 1.0781651735305786, | |
| "learning_rate": 8.028960138264857e-06, | |
| "loss": 2.0389, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.6205013651030032, | |
| "grad_norm": 0.8750322461128235, | |
| "learning_rate": 7.21097887830873e-06, | |
| "loss": 2.046, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.6453214197071233, | |
| "grad_norm": 0.9259145855903625, | |
| "learning_rate": 6.4127814265980095e-06, | |
| "loss": 2.0243, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.6701414743112435, | |
| "grad_norm": 1.1625196933746338, | |
| "learning_rate": 5.640029764393366e-06, | |
| "loss": 2.0513, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.6949615289153637, | |
| "grad_norm": 0.8271129727363586, | |
| "learning_rate": 4.8982053742793025e-06, | |
| "loss": 2.0228, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.7197815835194837, | |
| "grad_norm": 0.7196031212806702, | |
| "learning_rate": 4.1925703574897115e-06, | |
| "loss": 2.0496, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.7446016381236039, | |
| "grad_norm": 0.7880265712738037, | |
| "learning_rate": 3.528130107406099e-06, | |
| "loss": 2.0145, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.769421692727724, | |
| "grad_norm": 0.909106433391571, | |
| "learning_rate": 2.909597804002603e-06, | |
| "loss": 2.0437, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.7942417473318442, | |
| "grad_norm": 1.2606161832809448, | |
| "learning_rate": 2.341360981094921e-06, | |
| "loss": 2.0443, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.8190618019359642, | |
| "grad_norm": 0.795652449131012, | |
| "learning_rate": 1.8274504035470942e-06, | |
| "loss": 2.0568, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.8438818565400844, | |
| "grad_norm": 0.8904260993003845, | |
| "learning_rate": 1.3715114752043746e-06, | |
| "loss": 2.0787, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.8687019111442045, | |
| "grad_norm": 1.0925287008285522, | |
| "learning_rate": 9.767783803688414e-07, | |
| "loss": 2.045, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.8935219657483247, | |
| "grad_norm": 0.799608588218689, | |
| "learning_rate": 6.460511422441984e-07, | |
| "loss": 2.0167, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.9183420203524447, | |
| "grad_norm": 0.9094216227531433, | |
| "learning_rate": 3.8167576108468994e-07, | |
| "loss": 2.057, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.9431620749565649, | |
| "grad_norm": 0.8395094871520996, | |
| "learning_rate": 1.855275729374284e-07, | |
| "loss": 2.0425, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.9679821295606851, | |
| "grad_norm": 0.8606423735618591, | |
| "learning_rate": 5.89979470221802e-08, | |
| "loss": 2.0208, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.9928021841648051, | |
| "grad_norm": 0.8908767700195312, | |
| "learning_rate": 2.9844161102077218e-09, | |
| "loss": 2.0512, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 4029, | |
| "total_flos": 7.32108351012864e+16, | |
| "train_loss": 2.105005581936963, | |
| "train_runtime": 1251.4031, | |
| "train_samples_per_second": 6.438, | |
| "train_steps_per_second": 3.22 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 4029, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7.32108351012864e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |