| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 100, | |
| "global_step": 111, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "entropy": 1.418435550481081, | |
| "epoch": 0.27303754266211605, | |
| "grad_norm": 0.056396484375, | |
| "learning_rate": 0.00019959742939952392, | |
| "loss": 1.6161483764648437, | |
| "mean_token_accuracy": 0.6638647213578224, | |
| "num_tokens": 151209.0, | |
| "step": 10 | |
| }, | |
| { | |
| "entropy": 1.097722884826362, | |
| "epoch": 0.5460750853242321, | |
| "grad_norm": 0.033203125, | |
| "learning_rate": 0.00019253043004739968, | |
| "loss": 1.1661317825317383, | |
| "mean_token_accuracy": 0.7458787495270371, | |
| "num_tokens": 320046.0, | |
| "step": 20 | |
| }, | |
| { | |
| "entropy": 1.063631435856223, | |
| "epoch": 0.8191126279863481, | |
| "grad_norm": 0.0247802734375, | |
| "learning_rate": 0.00017724169592245995, | |
| "loss": 1.118991756439209, | |
| "mean_token_accuracy": 0.7535936305299401, | |
| "num_tokens": 492438.0, | |
| "step": 30 | |
| }, | |
| { | |
| "entropy": 0.9953024551852957, | |
| "epoch": 1.0819112627986347, | |
| "grad_norm": 0.0255126953125, | |
| "learning_rate": 0.00015508969814521025, | |
| "loss": 1.0578977584838867, | |
| "mean_token_accuracy": 0.7685656903626082, | |
| "num_tokens": 642939.0, | |
| "step": 40 | |
| }, | |
| { | |
| "entropy": 0.9422929083928466, | |
| "epoch": 1.3549488054607508, | |
| "grad_norm": 0.025390625, | |
| "learning_rate": 0.00012804273893060028, | |
| "loss": 1.0265376091003418, | |
| "mean_token_accuracy": 0.7775206623598934, | |
| "num_tokens": 817432.0, | |
| "step": 50 | |
| }, | |
| { | |
| "entropy": 0.9282504981383681, | |
| "epoch": 1.627986348122867, | |
| "grad_norm": 0.025390625, | |
| "learning_rate": 9.850405929847366e-05, | |
| "loss": 0.9812464714050293, | |
| "mean_token_accuracy": 0.7824548855423927, | |
| "num_tokens": 971956.0, | |
| "step": 60 | |
| }, | |
| { | |
| "entropy": 0.9165813697502017, | |
| "epoch": 1.901023890784983, | |
| "grad_norm": 0.028076171875, | |
| "learning_rate": 6.909830056250527e-05, | |
| "loss": 0.9443504333496093, | |
| "mean_token_accuracy": 0.7839378425851464, | |
| "num_tokens": 1134082.0, | |
| "step": 70 | |
| }, | |
| { | |
| "entropy": 0.946953180354911, | |
| "epoch": 2.1638225255972694, | |
| "grad_norm": 0.02880859375, | |
| "learning_rate": 4.2438293431432665e-05, | |
| "loss": 0.968726921081543, | |
| "mean_token_accuracy": 0.7822197033213331, | |
| "num_tokens": 1290177.0, | |
| "step": 80 | |
| }, | |
| { | |
| "entropy": 0.8450189002789557, | |
| "epoch": 2.4368600682593855, | |
| "grad_norm": 0.031982421875, | |
| "learning_rate": 2.0892896534365904e-05, | |
| "loss": 0.9016420364379882, | |
| "mean_token_accuracy": 0.7995872467756271, | |
| "num_tokens": 1461672.0, | |
| "step": 90 | |
| }, | |
| { | |
| "entropy": 0.8309150729328394, | |
| "epoch": 2.7098976109215016, | |
| "grad_norm": 0.037353515625, | |
| "learning_rate": 6.37651293602628e-06, | |
| "loss": 0.8841998100280761, | |
| "mean_token_accuracy": 0.8054205430671573, | |
| "num_tokens": 1624257.0, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.7098976109215016, | |
| "eval_entropy": 0.9007232843926458, | |
| "eval_loss": 0.9238418340682983, | |
| "eval_mean_token_accuracy": 0.7843361054406022, | |
| "eval_num_tokens": 1624257.0, | |
| "eval_runtime": 42.5945, | |
| "eval_samples_per_second": 1.549, | |
| "eval_steps_per_second": 1.549, | |
| "step": 100 | |
| }, | |
| { | |
| "entropy": 0.8297194179147482, | |
| "epoch": 2.9829351535836177, | |
| "grad_norm": 0.0322265625, | |
| "learning_rate": 1.7898702322648453e-07, | |
| "loss": 0.8666117668151856, | |
| "mean_token_accuracy": 0.8052121920511126, | |
| "num_tokens": 1778979.0, | |
| "step": 110 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 111, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7.754556700156723e+16, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |