| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 391, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02557544757033248, | |
| "grad_norm": 1.875, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.8604, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.05115089514066496, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 5e-05, | |
| "loss": 0.6219, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.07672634271099744, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 7.500000000000001e-05, | |
| "loss": 0.5142, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.10230179028132992, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.4471, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.1278772378516624, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 9.715099715099715e-05, | |
| "loss": 0.4005, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.1534526854219949, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 9.430199430199431e-05, | |
| "loss": 0.3637, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.17902813299232737, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 9.145299145299146e-05, | |
| "loss": 0.35, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.20460358056265984, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 8.860398860398861e-05, | |
| "loss": 0.3432, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.23017902813299232, | |
| "grad_norm": 0.50390625, | |
| "learning_rate": 8.575498575498576e-05, | |
| "loss": 0.3338, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.2557544757033248, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 8.290598290598292e-05, | |
| "loss": 0.3331, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2813299232736573, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 8.005698005698006e-05, | |
| "loss": 0.3215, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.3069053708439898, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 7.720797720797721e-05, | |
| "loss": 0.3185, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.33248081841432225, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 7.435897435897436e-05, | |
| "loss": 0.3186, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.35805626598465473, | |
| "grad_norm": 0.46484375, | |
| "learning_rate": 7.150997150997152e-05, | |
| "loss": 0.3156, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.3836317135549872, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 6.866096866096867e-05, | |
| "loss": 0.3106, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.4092071611253197, | |
| "grad_norm": 0.48046875, | |
| "learning_rate": 6.581196581196581e-05, | |
| "loss": 0.3146, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.43478260869565216, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 6.296296296296296e-05, | |
| "loss": 0.3135, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.46035805626598464, | |
| "grad_norm": 0.490234375, | |
| "learning_rate": 6.011396011396012e-05, | |
| "loss": 0.3059, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.4859335038363171, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 5.726495726495726e-05, | |
| "loss": 0.3049, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.5115089514066496, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 5.441595441595442e-05, | |
| "loss": 0.3032, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5370843989769821, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 5.156695156695157e-05, | |
| "loss": 0.2889, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.5626598465473146, | |
| "grad_norm": 0.50390625, | |
| "learning_rate": 4.871794871794872e-05, | |
| "loss": 0.2971, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.5882352941176471, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 4.586894586894587e-05, | |
| "loss": 0.3088, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.6138107416879796, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 4.301994301994302e-05, | |
| "loss": 0.2977, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.639386189258312, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 4.0170940170940174e-05, | |
| "loss": 0.2956, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.6649616368286445, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 3.732193732193732e-05, | |
| "loss": 0.2953, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.690537084398977, | |
| "grad_norm": 0.49609375, | |
| "learning_rate": 3.4472934472934476e-05, | |
| "loss": 0.2955, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.7161125319693095, | |
| "grad_norm": 0.49609375, | |
| "learning_rate": 3.162393162393162e-05, | |
| "loss": 0.2892, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.7416879795396419, | |
| "grad_norm": 0.486328125, | |
| "learning_rate": 2.8774928774928778e-05, | |
| "loss": 0.281, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.7672634271099744, | |
| "grad_norm": 0.4921875, | |
| "learning_rate": 2.5925925925925925e-05, | |
| "loss": 0.2911, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.7928388746803069, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 2.307692307692308e-05, | |
| "loss": 0.2943, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.8184143222506394, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 2.022792022792023e-05, | |
| "loss": 0.293, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.8439897698209718, | |
| "grad_norm": 0.4609375, | |
| "learning_rate": 1.737891737891738e-05, | |
| "loss": 0.2815, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.8695652173913043, | |
| "grad_norm": 0.4609375, | |
| "learning_rate": 1.4529914529914531e-05, | |
| "loss": 0.2871, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.8951406649616368, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 1.168091168091168e-05, | |
| "loss": 0.2832, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.9207161125319693, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 8.831908831908831e-06, | |
| "loss": 0.289, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.9462915601023018, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 5.982905982905984e-06, | |
| "loss": 0.2936, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.9718670076726342, | |
| "grad_norm": 0.484375, | |
| "learning_rate": 3.133903133903134e-06, | |
| "loss": 0.2828, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.9974424552429667, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 2.8490028490028494e-07, | |
| "loss": 0.2916, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 391, | |
| "total_flos": 1.382893920190464e+16, | |
| "train_loss": 0.3390924896273162, | |
| "train_runtime": 504.4341, | |
| "train_samples_per_second": 49.56, | |
| "train_steps_per_second": 0.775 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 391, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.382893920190464e+16, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |