{ "best_metric": null, "best_model_checkpoint": null, "epoch": 56.0, "eval_steps": 500, "global_step": 140, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.6, "learning_rate": 0.0005714285714285714, "loss": 1.2446, "step": 4 }, { "epoch": 3.2, "learning_rate": 0.0011428571428571427, "loss": 0.9828, "step": 8 }, { "epoch": 4.8, "learning_rate": 0.0017142857142857142, "loss": 0.7103, "step": 12 }, { "epoch": 6.4, "learning_rate": 0.001968253968253968, "loss": 0.4313, "step": 16 }, { "epoch": 8.0, "learning_rate": 0.0019047619047619048, "loss": 0.2152, "step": 20 }, { "epoch": 9.6, "learning_rate": 0.0018412698412698413, "loss": 0.1221, "step": 24 }, { "epoch": 11.2, "learning_rate": 0.0017777777777777776, "loss": 0.0657, "step": 28 }, { "epoch": 12.8, "learning_rate": 0.001746031746031746, "loss": 0.0725, "step": 32 }, { "epoch": 14.4, "learning_rate": 0.0016825396825396826, "loss": 0.0622, "step": 36 }, { "epoch": 16.0, "learning_rate": 0.0016190476190476191, "loss": 0.0362, "step": 40 }, { "epoch": 17.6, "learning_rate": 0.0015555555555555557, "loss": 0.0266, "step": 44 }, { "epoch": 19.2, "learning_rate": 0.001492063492063492, "loss": 0.0241, "step": 48 }, { "epoch": 20.8, "learning_rate": 0.0014285714285714286, "loss": 0.018, "step": 52 }, { "epoch": 22.4, "learning_rate": 0.0013650793650793651, "loss": 0.0187, "step": 56 }, { "epoch": 24.0, "learning_rate": 0.0013015873015873017, "loss": 0.0159, "step": 60 }, { "epoch": 25.6, "learning_rate": 0.0012698412698412698, "loss": 0.0463, "step": 64 }, { "epoch": 27.2, "learning_rate": 0.0012063492063492064, "loss": 0.0279, "step": 68 }, { "epoch": 28.8, "learning_rate": 0.0011428571428571427, "loss": 0.0133, "step": 72 }, { "epoch": 30.4, "learning_rate": 0.0010793650793650793, "loss": 0.0148, "step": 76 }, { "epoch": 32.0, "learning_rate": 0.0010158730158730158, "loss": 0.0115, "step": 80 }, { "epoch": 33.6, "learning_rate": 0.0009523809523809524, "loss": 0.0139, "step": 84 }, { "epoch": 35.2, "learning_rate": 0.0008888888888888888, "loss": 0.0105, "step": 88 }, { "epoch": 36.8, "learning_rate": 0.0008253968253968254, "loss": 0.0096, "step": 92 }, { "epoch": 38.4, "learning_rate": 0.0007619047619047619, "loss": 0.0129, "step": 96 }, { "epoch": 40.0, "learning_rate": 0.0006984126984126984, "loss": 0.0069, "step": 100 }, { "epoch": 41.6, "learning_rate": 0.0006349206349206349, "loss": 0.0063, "step": 104 }, { "epoch": 43.2, "learning_rate": 0.0005714285714285714, "loss": 0.0053, "step": 108 }, { "epoch": 44.8, "learning_rate": 0.0005079365079365079, "loss": 0.0047, "step": 112 }, { "epoch": 46.4, "learning_rate": 0.0004444444444444444, "loss": 0.0055, "step": 116 }, { "epoch": 48.0, "learning_rate": 0.00038095238095238096, "loss": 0.0033, "step": 120 }, { "epoch": 49.6, "learning_rate": 0.00031746031746031746, "loss": 0.0038, "step": 124 }, { "epoch": 51.2, "learning_rate": 0.00025396825396825396, "loss": 0.0036, "step": 128 }, { "epoch": 52.8, "learning_rate": 0.00019047619047619048, "loss": 0.0029, "step": 132 }, { "epoch": 54.4, "learning_rate": 0.00012698412698412698, "loss": 0.0035, "step": 136 }, { "epoch": 56.0, "learning_rate": 6.349206349206349e-05, "loss": 0.0026, "step": 140 } ], "logging_steps": 4, "max_steps": 140, "num_train_epochs": 70, "save_steps": 500, "total_flos": 4.89773450877993e+16, "trial_name": null, "trial_params": null }