Invalid JSON: Unexpected token 'N', ..."al_loss": NaN,
"... is not valid JSON
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 156, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.1927710843373494, | |
| "grad_norm": 95.26730346679688, | |
| "learning_rate": 0.0004180909090909091, | |
| "loss": 22.4465, | |
| "mean_token_accuracy": 0.08969678990542888, | |
| "num_tokens": 1310720.0, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.3855421686746988, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0005103526968014265, | |
| "loss": 0.8012, | |
| "mean_token_accuracy": 0.13804710581898688, | |
| "num_tokens": 2621440.0, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.5783132530120482, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0005077286477833616, | |
| "loss": 452.9577, | |
| "mean_token_accuracy": 0.05160275483503938, | |
| "num_tokens": 3932160.0, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.7710843373493976, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0005031081504278389, | |
| "loss": 470.5136, | |
| "mean_token_accuracy": 0.03822226445190609, | |
| "num_tokens": 5242880.0, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.963855421686747, | |
| "grad_norm": 26.303752899169922, | |
| "learning_rate": 0.0004965277770447238, | |
| "loss": 167.1384, | |
| "mean_token_accuracy": 0.057517293840646744, | |
| "num_tokens": 6553600.0, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": NaN, | |
| "eval_mean_token_accuracy": 0.13677339731378757, | |
| "eval_num_tokens": 6713344.0, | |
| "eval_runtime": 8.9806, | |
| "eval_samples_per_second": 41.089, | |
| "eval_steps_per_second": 5.233, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 1.1542168674698796, | |
| "grad_norm": 28.755094528198242, | |
| "learning_rate": 0.00048803961281790017, | |
| "loss": 27.9726, | |
| "mean_token_accuracy": 0.03075966710531259, | |
| "num_tokens": 7761920.0, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.346987951807229, | |
| "grad_norm": 0.9693858027458191, | |
| "learning_rate": 0.000477710843538941, | |
| "loss": 2.2869, | |
| "mean_token_accuracy": 0.10747051909565926, | |
| "num_tokens": 9072640.0, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.5397590361445783, | |
| "grad_norm": 0.36548200249671936, | |
| "learning_rate": 0.0004656232238159615, | |
| "loss": 60.0031, | |
| "mean_token_accuracy": 0.10124717205762863, | |
| "num_tokens": 10383360.0, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.7325301204819277, | |
| "grad_norm": 0.8749092817306519, | |
| "learning_rate": 0.0004518724299669051, | |
| "loss": 0.8994, | |
| "mean_token_accuracy": 0.1551567144691944, | |
| "num_tokens": 11694080.0, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.9253012048192772, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0004365673027192623, | |
| "loss": 2.2759, | |
| "mean_token_accuracy": 0.13096993789076805, | |
| "num_tokens": 13004800.0, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": NaN, | |
| "eval_mean_token_accuracy": 0.14378934084100925, | |
| "eval_num_tokens": 13426688.0, | |
| "eval_runtime": 8.9689, | |
| "eval_samples_per_second": 41.142, | |
| "eval_steps_per_second": 5.24, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 2.1156626506024097, | |
| "grad_norm": 0.6895984411239624, | |
| "learning_rate": 0.0004198289857104298, | |
| "loss": 0.2064, | |
| "mean_token_accuracy": 0.08719592305678356, | |
| "num_tokens": 14213120.0, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 2.3084337349397592, | |
| "grad_norm": 0.7038294672966003, | |
| "learning_rate": 0.0004017899666076801, | |
| "loss": 1.3155, | |
| "mean_token_accuracy": 0.1053241491317749, | |
| "num_tokens": 15523840.0, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.5012048192771084, | |
| "grad_norm": 27.594745635986328, | |
| "learning_rate": 0.0003825930284374996, | |
| "loss": 0.0836, | |
| "mean_token_accuracy": 0.07201291918754578, | |
| "num_tokens": 16834560.0, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 2.693975903614458, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00036239011942476655, | |
| "loss": 1.364, | |
| "mean_token_accuracy": 0.15817394778132438, | |
| "num_tokens": 18145280.0, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.886746987951807, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00034134115028725524, | |
| "loss": 3.5977, | |
| "mean_token_accuracy": 0.10589548945426941, | |
| "num_tokens": 19456000.0, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": NaN, | |
| "eval_mean_token_accuracy": 0.14411297884393245, | |
| "eval_num_tokens": 20140032.0, | |
| "eval_runtime": 8.9831, | |
| "eval_samples_per_second": 41.077, | |
| "eval_steps_per_second": 5.232, | |
| "step": 156 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 364, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 7, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.85125687348822e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |