{ "best_metric": null, "best_model_checkpoint": null, "epoch": 73.07692307692308, "global_step": 3800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.92, "learning_rate": 9.940384615384614e-07, "loss": 2.1803, "step": 100 }, { "epoch": 1.92, "eval_loss": 1.6282658576965332, "eval_runtime": 4.7479, "eval_samples_per_second": 4.634, "eval_steps_per_second": 0.632, "step": 100 }, { "epoch": 3.85, "learning_rate": 9.87628205128205e-07, "loss": 1.9556, "step": 200 }, { "epoch": 3.85, "eval_loss": 1.6041302680969238, "eval_runtime": 4.2335, "eval_samples_per_second": 5.197, "eval_steps_per_second": 0.709, "step": 200 }, { "epoch": 5.77, "learning_rate": 9.812179487179486e-07, "loss": 1.8415, "step": 300 }, { "epoch": 5.77, "eval_loss": 1.5072983503341675, "eval_runtime": 4.2682, "eval_samples_per_second": 5.154, "eval_steps_per_second": 0.703, "step": 300 }, { "epoch": 7.69, "learning_rate": 9.748076923076923e-07, "loss": 1.849, "step": 400 }, { "epoch": 7.69, "eval_loss": 1.5139409303665161, "eval_runtime": 4.1966, "eval_samples_per_second": 5.242, "eval_steps_per_second": 0.715, "step": 400 }, { "epoch": 9.62, "learning_rate": 9.683974358974359e-07, "loss": 1.7466, "step": 500 }, { "epoch": 9.62, "eval_loss": 1.4191316366195679, "eval_runtime": 4.1991, "eval_samples_per_second": 5.239, "eval_steps_per_second": 0.714, "step": 500 }, { "epoch": 11.54, "learning_rate": 9.62051282051282e-07, "loss": 1.7443, "step": 600 }, { "epoch": 11.54, "eval_loss": 1.4041415452957153, "eval_runtime": 4.3488, "eval_samples_per_second": 5.059, "eval_steps_per_second": 0.69, "step": 600 }, { "epoch": 13.46, "learning_rate": 9.556410256410255e-07, "loss": 1.6662, "step": 700 }, { "epoch": 13.46, "eval_loss": 1.3958640098571777, "eval_runtime": 4.2134, "eval_samples_per_second": 5.221, "eval_steps_per_second": 0.712, "step": 700 }, { "epoch": 15.38, "learning_rate": 9.492307692307691e-07, "loss": 1.6388, "step": 800 }, { "epoch": 15.38, "eval_loss": 1.364356517791748, "eval_runtime": 4.4313, "eval_samples_per_second": 4.965, "eval_steps_per_second": 0.677, "step": 800 }, { "epoch": 17.31, "learning_rate": 9.428205128205127e-07, "loss": 1.5375, "step": 900 }, { "epoch": 17.31, "eval_loss": 1.3546158075332642, "eval_runtime": 4.2939, "eval_samples_per_second": 5.124, "eval_steps_per_second": 0.699, "step": 900 }, { "epoch": 19.23, "learning_rate": 9.364102564102564e-07, "loss": 1.7777, "step": 1000 }, { "epoch": 19.23, "eval_loss": 1.345603585243225, "eval_runtime": 4.2959, "eval_samples_per_second": 5.121, "eval_steps_per_second": 0.698, "step": 1000 }, { "epoch": 21.15, "learning_rate": 9.3e-07, "loss": 1.562, "step": 1100 }, { "epoch": 21.15, "eval_loss": 1.3052722215652466, "eval_runtime": 4.3299, "eval_samples_per_second": 5.081, "eval_steps_per_second": 0.693, "step": 1100 }, { "epoch": 23.08, "learning_rate": 9.235897435897435e-07, "loss": 1.4714, "step": 1200 }, { "epoch": 23.08, "eval_loss": 1.2963144779205322, "eval_runtime": 4.2895, "eval_samples_per_second": 5.129, "eval_steps_per_second": 0.699, "step": 1200 }, { "epoch": 25.0, "learning_rate": 9.172435897435897e-07, "loss": 1.5037, "step": 1300 }, { "epoch": 25.0, "eval_loss": 1.2800103425979614, "eval_runtime": 4.2403, "eval_samples_per_second": 5.188, "eval_steps_per_second": 0.707, "step": 1300 }, { "epoch": 26.92, "learning_rate": 9.108333333333333e-07, "loss": 1.3721, "step": 1400 }, { "epoch": 26.92, "eval_loss": 1.276384711265564, "eval_runtime": 4.3022, "eval_samples_per_second": 5.114, "eval_steps_per_second": 0.697, "step": 1400 }, { "epoch": 28.85, "learning_rate": 9.044230769230768e-07, "loss": 1.4898, "step": 1500 }, { "epoch": 28.85, "eval_loss": 1.29233980178833, "eval_runtime": 4.2189, "eval_samples_per_second": 5.215, "eval_steps_per_second": 0.711, "step": 1500 }, { "epoch": 30.77, "learning_rate": 8.980128205128205e-07, "loss": 1.4936, "step": 1600 }, { "epoch": 30.77, "eval_loss": 1.2993799448013306, "eval_runtime": 4.1785, "eval_samples_per_second": 5.265, "eval_steps_per_second": 0.718, "step": 1600 }, { "epoch": 32.69, "learning_rate": 8.916025641025641e-07, "loss": 1.4178, "step": 1700 }, { "epoch": 32.69, "eval_loss": 1.253838062286377, "eval_runtime": 4.2708, "eval_samples_per_second": 5.151, "eval_steps_per_second": 0.702, "step": 1700 }, { "epoch": 34.62, "learning_rate": 8.851923076923077e-07, "loss": 1.2665, "step": 1800 }, { "epoch": 34.62, "eval_loss": 1.2696892023086548, "eval_runtime": 4.2383, "eval_samples_per_second": 5.191, "eval_steps_per_second": 0.708, "step": 1800 }, { "epoch": 36.54, "learning_rate": 8.787820512820513e-07, "loss": 1.3008, "step": 1900 }, { "epoch": 36.54, "eval_loss": 1.2657532691955566, "eval_runtime": 4.3241, "eval_samples_per_second": 5.088, "eval_steps_per_second": 0.694, "step": 1900 }, { "epoch": 38.46, "learning_rate": 8.723717948717948e-07, "loss": 1.4281, "step": 2000 }, { "epoch": 38.46, "eval_loss": 1.2740833759307861, "eval_runtime": 4.3835, "eval_samples_per_second": 5.019, "eval_steps_per_second": 0.684, "step": 2000 }, { "epoch": 40.38, "learning_rate": 8.659615384615384e-07, "loss": 1.2315, "step": 2100 }, { "epoch": 40.38, "eval_loss": 1.2776740789413452, "eval_runtime": 4.3172, "eval_samples_per_second": 5.096, "eval_steps_per_second": 0.695, "step": 2100 }, { "epoch": 42.31, "learning_rate": 8.59551282051282e-07, "loss": 1.2442, "step": 2200 }, { "epoch": 42.31, "eval_loss": 1.2726629972457886, "eval_runtime": 4.2859, "eval_samples_per_second": 5.133, "eval_steps_per_second": 0.7, "step": 2200 }, { "epoch": 44.23, "learning_rate": 8.531410256410256e-07, "loss": 1.2882, "step": 2300 }, { "epoch": 44.23, "eval_loss": 1.2753421068191528, "eval_runtime": 4.2108, "eval_samples_per_second": 5.225, "eval_steps_per_second": 0.712, "step": 2300 }, { "epoch": 46.15, "learning_rate": 8.467307692307691e-07, "loss": 1.194, "step": 2400 }, { "epoch": 46.15, "eval_loss": 1.2462925910949707, "eval_runtime": 4.1887, "eval_samples_per_second": 5.252, "eval_steps_per_second": 0.716, "step": 2400 }, { "epoch": 48.08, "learning_rate": 8.403205128205128e-07, "loss": 1.2223, "step": 2500 }, { "epoch": 48.08, "eval_loss": 1.2566279172897339, "eval_runtime": 4.2232, "eval_samples_per_second": 5.209, "eval_steps_per_second": 0.71, "step": 2500 }, { "epoch": 50.0, "learning_rate": 8.339102564102564e-07, "loss": 1.1984, "step": 2600 }, { "epoch": 50.0, "eval_loss": 1.2548397779464722, "eval_runtime": 4.2193, "eval_samples_per_second": 5.214, "eval_steps_per_second": 0.711, "step": 2600 }, { "epoch": 51.92, "learning_rate": 8.275e-07, "loss": 1.1202, "step": 2700 }, { "epoch": 51.92, "eval_loss": 1.2726110219955444, "eval_runtime": 4.197, "eval_samples_per_second": 5.242, "eval_steps_per_second": 0.715, "step": 2700 }, { "epoch": 53.85, "learning_rate": 8.210897435897435e-07, "loss": 1.1128, "step": 2800 }, { "epoch": 53.85, "eval_loss": 1.2732219696044922, "eval_runtime": 4.189, "eval_samples_per_second": 5.252, "eval_steps_per_second": 0.716, "step": 2800 }, { "epoch": 55.77, "learning_rate": 8.146794871794871e-07, "loss": 1.1649, "step": 2900 }, { "epoch": 55.77, "eval_loss": 1.2652595043182373, "eval_runtime": 4.2028, "eval_samples_per_second": 5.235, "eval_steps_per_second": 0.714, "step": 2900 }, { "epoch": 57.69, "learning_rate": 8.082692307692307e-07, "loss": 1.1066, "step": 3000 }, { "epoch": 57.69, "eval_loss": 1.254478931427002, "eval_runtime": 4.2195, "eval_samples_per_second": 5.214, "eval_steps_per_second": 0.711, "step": 3000 }, { "epoch": 59.62, "learning_rate": 8.018589743589743e-07, "loss": 1.0629, "step": 3100 }, { "epoch": 59.62, "eval_loss": 1.2273532152175903, "eval_runtime": 4.2147, "eval_samples_per_second": 5.22, "eval_steps_per_second": 0.712, "step": 3100 }, { "epoch": 61.54, "learning_rate": 7.955128205128204e-07, "loss": 1.093, "step": 3200 }, { "epoch": 61.54, "eval_loss": 1.2593165636062622, "eval_runtime": 4.2295, "eval_samples_per_second": 5.202, "eval_steps_per_second": 0.709, "step": 3200 }, { "epoch": 63.46, "learning_rate": 7.89102564102564e-07, "loss": 0.9813, "step": 3300 }, { "epoch": 63.46, "eval_loss": 1.2721446752548218, "eval_runtime": 4.1692, "eval_samples_per_second": 5.277, "eval_steps_per_second": 0.72, "step": 3300 }, { "epoch": 65.38, "learning_rate": 7.826923076923076e-07, "loss": 1.0538, "step": 3400 }, { "epoch": 65.38, "eval_loss": 1.2700778245925903, "eval_runtime": 4.2208, "eval_samples_per_second": 5.212, "eval_steps_per_second": 0.711, "step": 3400 }, { "epoch": 67.31, "learning_rate": 7.762820512820513e-07, "loss": 0.9368, "step": 3500 }, { "epoch": 67.31, "eval_loss": 1.2771656513214111, "eval_runtime": 4.2547, "eval_samples_per_second": 5.171, "eval_steps_per_second": 0.705, "step": 3500 }, { "epoch": 69.23, "learning_rate": 7.698717948717949e-07, "loss": 1.0294, "step": 3600 }, { "epoch": 69.23, "eval_loss": 1.2606873512268066, "eval_runtime": 4.2609, "eval_samples_per_second": 5.163, "eval_steps_per_second": 0.704, "step": 3600 }, { "epoch": 71.15, "learning_rate": 7.634615384615384e-07, "loss": 0.9596, "step": 3700 }, { "epoch": 71.15, "eval_loss": 1.2618690729141235, "eval_runtime": 4.2669, "eval_samples_per_second": 5.156, "eval_steps_per_second": 0.703, "step": 3700 }, { "epoch": 73.08, "learning_rate": 7.57051282051282e-07, "loss": 0.9926, "step": 3800 }, { "epoch": 73.08, "eval_loss": 1.2698349952697754, "eval_runtime": 4.2523, "eval_samples_per_second": 5.174, "eval_steps_per_second": 0.705, "step": 3800 } ], "max_steps": 15600, "num_train_epochs": 300, "total_flos": 9.161067103204147e+18, "trial_name": null, "trial_params": null }