| { |
| "best_metric": 1.7200040817260742, |
| "best_model_checkpoint": "./results/cluster2_batch1_prop0.2/checkpoint-1500", |
| "epoch": 0.9999231616422918, |
| "eval_steps": 500, |
| "global_step": 2440, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.04098045744435622, |
| "grad_norm": 0.3399759829044342, |
| "learning_rate": 9.997020702755353e-05, |
| "loss": 1.9745, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.08196091488871245, |
| "grad_norm": 1.2304991483688354, |
| "learning_rate": 9.930186708264901e-05, |
| "loss": 1.7722, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.12294137233306866, |
| "grad_norm": 1.1636056900024414, |
| "learning_rate": 9.776557563346957e-05, |
| "loss": 1.7359, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.1639218297774249, |
| "grad_norm": 1.0206753015518188, |
| "learning_rate": 9.538837884587511e-05, |
| "loss": 1.6943, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.2049022872217811, |
| "grad_norm": 1.0695409774780273, |
| "learning_rate": 9.221212689004862e-05, |
| "loss": 1.6755, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.2049022872217811, |
| "eval_loss": 1.7410061359405518, |
| "eval_runtime": 1240.4448, |
| "eval_samples_per_second": 4.04, |
| "eval_steps_per_second": 2.02, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.24588274466613733, |
| "grad_norm": 1.3881298303604126, |
| "learning_rate": 8.82927371749271e-05, |
| "loss": 1.6707, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.28686320211049354, |
| "grad_norm": 0.9833546876907349, |
| "learning_rate": 8.369920993113824e-05, |
| "loss": 1.6787, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.3278436595548498, |
| "grad_norm": 0.9632484912872314, |
| "learning_rate": 7.851241347294876e-05, |
| "loss": 1.683, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.36882411699920603, |
| "grad_norm": 0.9162977337837219, |
| "learning_rate": 7.28236605244935e-05, |
| "loss": 1.6609, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.4098045744435622, |
| "grad_norm": 1.2075759172439575, |
| "learning_rate": 6.673310067383545e-05, |
| "loss": 1.6527, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.4098045744435622, |
| "eval_loss": 1.7281365394592285, |
| "eval_runtime": 1238.7549, |
| "eval_samples_per_second": 4.045, |
| "eval_steps_per_second": 2.023, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.45078503188791846, |
| "grad_norm": 1.0344264507293701, |
| "learning_rate": 6.034795725544571e-05, |
| "loss": 1.6614, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.49176548933227465, |
| "grad_norm": 1.024488925933838, |
| "learning_rate": 5.378063970050694e-05, |
| "loss": 1.6817, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.5327459467766309, |
| "grad_norm": 1.174185037612915, |
| "learning_rate": 4.7146764586811296e-05, |
| "loss": 1.6607, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.5737264042209871, |
| "grad_norm": 1.417396903038025, |
| "learning_rate": 4.056312022735417e-05, |
| "loss": 1.6499, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.6147068616653434, |
| "grad_norm": 0.985633909702301, |
| "learning_rate": 3.414561063071644e-05, |
| "loss": 1.6598, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.6147068616653434, |
| "eval_loss": 1.7200040817260742, |
| "eval_runtime": 1239.5137, |
| "eval_samples_per_second": 4.043, |
| "eval_steps_per_second": 2.022, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.6556873191096996, |
| "grad_norm": 1.1905289888381958, |
| "learning_rate": 2.8007215029485057e-05, |
| "loss": 1.6755, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.6966677765540558, |
| "grad_norm": 1.6823028326034546, |
| "learning_rate": 2.2255998898888165e-05, |
| "loss": 1.6654, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.7376482339984121, |
| "grad_norm": 1.2739040851593018, |
| "learning_rate": 1.6993211481344824e-05, |
| "loss": 1.6453, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.7786286914427683, |
| "grad_norm": 1.4436434507369995, |
| "learning_rate": 1.2311503309705629e-05, |
| "loss": 1.6359, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.8196091488871244, |
| "grad_norm": 1.336064100265503, |
| "learning_rate": 8.293295109403504e-06, |
| "loss": 1.665, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.8196091488871244, |
| "eval_loss": 1.7243653535842896, |
| "eval_runtime": 1238.6702, |
| "eval_samples_per_second": 4.045, |
| "eval_steps_per_second": 2.023, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.8605896063314806, |
| "grad_norm": 0.9651763439178467, |
| "learning_rate": 5.009326794732072e-06, |
| "loss": 1.6241, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.9015700637758369, |
| "grad_norm": 1.135158896446228, |
| "learning_rate": 2.5174121039404643e-06, |
| "loss": 1.6527, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.9425505212201931, |
| "grad_norm": 1.232653260231018, |
| "learning_rate": 8.614207975952082e-07, |
| "loss": 1.6381, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.9835309786645493, |
| "grad_norm": 0.941377580165863, |
| "learning_rate": 7.050633844443711e-08, |
| "loss": 1.6307, |
| "step": 2400 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 2440, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.1491335851408384e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|