| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 391, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02557544757033248, |
| "grad_norm": 1.875, |
| "learning_rate": 2.5e-05, |
| "loss": 0.8604, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.05115089514066496, |
| "grad_norm": 1.03125, |
| "learning_rate": 5e-05, |
| "loss": 0.6219, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.07672634271099744, |
| "grad_norm": 0.80078125, |
| "learning_rate": 7.500000000000001e-05, |
| "loss": 0.5142, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.10230179028132992, |
| "grad_norm": 0.71484375, |
| "learning_rate": 0.0001, |
| "loss": 0.4471, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.1278772378516624, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.715099715099715e-05, |
| "loss": 0.4005, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.1534526854219949, |
| "grad_norm": 0.58984375, |
| "learning_rate": 9.430199430199431e-05, |
| "loss": 0.3637, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.17902813299232737, |
| "grad_norm": 0.57421875, |
| "learning_rate": 9.145299145299146e-05, |
| "loss": 0.35, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.20460358056265984, |
| "grad_norm": 0.5078125, |
| "learning_rate": 8.860398860398861e-05, |
| "loss": 0.3432, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.23017902813299232, |
| "grad_norm": 0.50390625, |
| "learning_rate": 8.575498575498576e-05, |
| "loss": 0.3338, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.2557544757033248, |
| "grad_norm": 0.58203125, |
| "learning_rate": 8.290598290598292e-05, |
| "loss": 0.3331, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.2813299232736573, |
| "grad_norm": 0.55859375, |
| "learning_rate": 8.005698005698006e-05, |
| "loss": 0.3215, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.3069053708439898, |
| "grad_norm": 0.546875, |
| "learning_rate": 7.720797720797721e-05, |
| "loss": 0.3185, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.33248081841432225, |
| "grad_norm": 0.51953125, |
| "learning_rate": 7.435897435897436e-05, |
| "loss": 0.3186, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.35805626598465473, |
| "grad_norm": 0.46484375, |
| "learning_rate": 7.150997150997152e-05, |
| "loss": 0.3156, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.3836317135549872, |
| "grad_norm": 0.5859375, |
| "learning_rate": 6.866096866096867e-05, |
| "loss": 0.3106, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.4092071611253197, |
| "grad_norm": 0.48046875, |
| "learning_rate": 6.581196581196581e-05, |
| "loss": 0.3146, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.43478260869565216, |
| "grad_norm": 0.53515625, |
| "learning_rate": 6.296296296296296e-05, |
| "loss": 0.3135, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.46035805626598464, |
| "grad_norm": 0.490234375, |
| "learning_rate": 6.011396011396012e-05, |
| "loss": 0.3059, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.4859335038363171, |
| "grad_norm": 0.609375, |
| "learning_rate": 5.726495726495726e-05, |
| "loss": 0.3049, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.5115089514066496, |
| "grad_norm": 0.53125, |
| "learning_rate": 5.441595441595442e-05, |
| "loss": 0.3032, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5370843989769821, |
| "grad_norm": 0.53125, |
| "learning_rate": 5.156695156695157e-05, |
| "loss": 0.2889, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.5626598465473146, |
| "grad_norm": 0.50390625, |
| "learning_rate": 4.871794871794872e-05, |
| "loss": 0.2971, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.5882352941176471, |
| "grad_norm": 0.546875, |
| "learning_rate": 4.586894586894587e-05, |
| "loss": 0.3088, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.6138107416879796, |
| "grad_norm": 0.57421875, |
| "learning_rate": 4.301994301994302e-05, |
| "loss": 0.2977, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.639386189258312, |
| "grad_norm": 0.578125, |
| "learning_rate": 4.0170940170940174e-05, |
| "loss": 0.2956, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.6649616368286445, |
| "grad_norm": 0.546875, |
| "learning_rate": 3.732193732193732e-05, |
| "loss": 0.2953, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.690537084398977, |
| "grad_norm": 0.49609375, |
| "learning_rate": 3.4472934472934476e-05, |
| "loss": 0.2955, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.7161125319693095, |
| "grad_norm": 0.49609375, |
| "learning_rate": 3.162393162393162e-05, |
| "loss": 0.2892, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.7416879795396419, |
| "grad_norm": 0.486328125, |
| "learning_rate": 2.8774928774928778e-05, |
| "loss": 0.281, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.7672634271099744, |
| "grad_norm": 0.4921875, |
| "learning_rate": 2.5925925925925925e-05, |
| "loss": 0.2911, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.7928388746803069, |
| "grad_norm": 0.61328125, |
| "learning_rate": 2.307692307692308e-05, |
| "loss": 0.2943, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.8184143222506394, |
| "grad_norm": 0.53125, |
| "learning_rate": 2.022792022792023e-05, |
| "loss": 0.293, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.8439897698209718, |
| "grad_norm": 0.4609375, |
| "learning_rate": 1.737891737891738e-05, |
| "loss": 0.2815, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.8695652173913043, |
| "grad_norm": 0.4609375, |
| "learning_rate": 1.4529914529914531e-05, |
| "loss": 0.2871, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.8951406649616368, |
| "grad_norm": 0.55078125, |
| "learning_rate": 1.168091168091168e-05, |
| "loss": 0.2832, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.9207161125319693, |
| "grad_norm": 0.55859375, |
| "learning_rate": 8.831908831908831e-06, |
| "loss": 0.289, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.9462915601023018, |
| "grad_norm": 0.53125, |
| "learning_rate": 5.982905982905984e-06, |
| "loss": 0.2936, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.9718670076726342, |
| "grad_norm": 0.484375, |
| "learning_rate": 3.133903133903134e-06, |
| "loss": 0.2828, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.9974424552429667, |
| "grad_norm": 0.53515625, |
| "learning_rate": 2.8490028490028494e-07, |
| "loss": 0.2916, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 391, |
| "total_flos": 1.382893920190464e+16, |
| "train_loss": 0.3390924896273162, |
| "train_runtime": 504.4341, |
| "train_samples_per_second": 49.56, |
| "train_steps_per_second": 0.775 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 391, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.382893920190464e+16, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|