| { |
| "best_global_step": 200, |
| "best_metric": 0.2029074728488922, |
| "best_model_checkpoint": "outputs/checkpoint-200", |
| "epoch": 0.7490636704119851, |
| "eval_steps": 100, |
| "global_step": 200, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.03745318352059925, |
| "grad_norm": 2.6471238136291504, |
| "learning_rate": 1.9865168539325844e-05, |
| "loss": 3.9924, |
| "mean_token_accuracy": 0.3569513201713562, |
| "num_tokens": 1110.0, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0749063670411985, |
| "grad_norm": 2.9193994998931885, |
| "learning_rate": 1.9715355805243446e-05, |
| "loss": 2.5013, |
| "mean_token_accuracy": 0.5000596195459366, |
| "num_tokens": 2220.0, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.11235955056179775, |
| "grad_norm": 1.090408444404602, |
| "learning_rate": 1.956554307116105e-05, |
| "loss": 1.2021, |
| "mean_token_accuracy": 0.7512393116950988, |
| "num_tokens": 3329.0, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.149812734082397, |
| "grad_norm": 1.412244200706482, |
| "learning_rate": 1.9415730337078652e-05, |
| "loss": 0.6237, |
| "mean_token_accuracy": 0.8658290803432465, |
| "num_tokens": 4437.0, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.18726591760299627, |
| "grad_norm": 0.9774134755134583, |
| "learning_rate": 1.9265917602996254e-05, |
| "loss": 0.4264, |
| "mean_token_accuracy": 0.9105254471302032, |
| "num_tokens": 5553.0, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.2247191011235955, |
| "grad_norm": 0.6166325211524963, |
| "learning_rate": 1.9116104868913857e-05, |
| "loss": 0.3806, |
| "mean_token_accuracy": 0.8969066739082336, |
| "num_tokens": 6660.0, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.26217228464419473, |
| "grad_norm": 0.5820680856704712, |
| "learning_rate": 1.8966292134831463e-05, |
| "loss": 0.3484, |
| "mean_token_accuracy": 0.8972096979618073, |
| "num_tokens": 7769.0, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.299625468164794, |
| "grad_norm": 0.31422552466392517, |
| "learning_rate": 1.8816479400749066e-05, |
| "loss": 0.3196, |
| "mean_token_accuracy": 0.898263669013977, |
| "num_tokens": 8880.0, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.33707865168539325, |
| "grad_norm": 0.5825852155685425, |
| "learning_rate": 1.866666666666667e-05, |
| "loss": 0.2965, |
| "mean_token_accuracy": 0.9046498596668243, |
| "num_tokens": 9992.0, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.37453183520599254, |
| "grad_norm": 0.38430944085121155, |
| "learning_rate": 1.851685393258427e-05, |
| "loss": 0.2839, |
| "mean_token_accuracy": 0.9051393151283265, |
| "num_tokens": 11098.0, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.37453183520599254, |
| "eval_loss": 0.2852214574813843, |
| "eval_mean_token_accuracy": 0.9032742083072662, |
| "eval_num_tokens": 11098.0, |
| "eval_runtime": 2.4929, |
| "eval_samples_per_second": 11.633, |
| "eval_steps_per_second": 1.605, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.41198501872659177, |
| "grad_norm": 0.312187522649765, |
| "learning_rate": 1.8367041198501874e-05, |
| "loss": 0.2752, |
| "mean_token_accuracy": 0.9036725044250489, |
| "num_tokens": 12207.0, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.449438202247191, |
| "grad_norm": 0.3875369131565094, |
| "learning_rate": 1.8217228464419477e-05, |
| "loss": 0.2659, |
| "mean_token_accuracy": 0.9044483065605163, |
| "num_tokens": 13316.0, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.4868913857677903, |
| "grad_norm": 0.6050882339477539, |
| "learning_rate": 1.8067415730337083e-05, |
| "loss": 0.258, |
| "mean_token_accuracy": 0.9100114285945893, |
| "num_tokens": 14426.0, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.5243445692883895, |
| "grad_norm": 0.5287177562713623, |
| "learning_rate": 1.7917602996254685e-05, |
| "loss": 0.2455, |
| "mean_token_accuracy": 0.9222747385501862, |
| "num_tokens": 15539.0, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.5617977528089888, |
| "grad_norm": 0.5224889516830444, |
| "learning_rate": 1.7767790262172285e-05, |
| "loss": 0.2368, |
| "mean_token_accuracy": 0.9263923704624176, |
| "num_tokens": 16647.0, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.599250936329588, |
| "grad_norm": 0.4501174986362457, |
| "learning_rate": 1.7617977528089887e-05, |
| "loss": 0.2299, |
| "mean_token_accuracy": 0.9313735246658326, |
| "num_tokens": 17760.0, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.6367041198501873, |
| "grad_norm": 0.43853962421417236, |
| "learning_rate": 1.746816479400749e-05, |
| "loss": 0.2222, |
| "mean_token_accuracy": 0.9402973234653473, |
| "num_tokens": 18869.0, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.6741573033707865, |
| "grad_norm": 0.31908461451530457, |
| "learning_rate": 1.7318352059925093e-05, |
| "loss": 0.2117, |
| "mean_token_accuracy": 0.9458102405071258, |
| "num_tokens": 19977.0, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.7116104868913857, |
| "grad_norm": 0.2825154662132263, |
| "learning_rate": 1.71685393258427e-05, |
| "loss": 0.2094, |
| "mean_token_accuracy": 0.938564246892929, |
| "num_tokens": 21088.0, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.7490636704119851, |
| "grad_norm": 0.2939445674419403, |
| "learning_rate": 1.70187265917603e-05, |
| "loss": 0.2051, |
| "mean_token_accuracy": 0.9392363965511322, |
| "num_tokens": 22195.0, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.7490636704119851, |
| "eval_loss": 0.2029074728488922, |
| "eval_mean_token_accuracy": 0.9482556581497192, |
| "eval_num_tokens": 22195.0, |
| "eval_runtime": 2.4927, |
| "eval_samples_per_second": 11.634, |
| "eval_steps_per_second": 1.605, |
| "step": 200 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1335, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1018124504064000.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|