| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 73.07692307692308, |
| "global_step": 3800, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.92, |
| "learning_rate": 9.940384615384614e-07, |
| "loss": 2.1803, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.92, |
| "eval_loss": 1.6282658576965332, |
| "eval_runtime": 4.7479, |
| "eval_samples_per_second": 4.634, |
| "eval_steps_per_second": 0.632, |
| "step": 100 |
| }, |
| { |
| "epoch": 3.85, |
| "learning_rate": 9.87628205128205e-07, |
| "loss": 1.9556, |
| "step": 200 |
| }, |
| { |
| "epoch": 3.85, |
| "eval_loss": 1.6041302680969238, |
| "eval_runtime": 4.2335, |
| "eval_samples_per_second": 5.197, |
| "eval_steps_per_second": 0.709, |
| "step": 200 |
| }, |
| { |
| "epoch": 5.77, |
| "learning_rate": 9.812179487179486e-07, |
| "loss": 1.8415, |
| "step": 300 |
| }, |
| { |
| "epoch": 5.77, |
| "eval_loss": 1.5072983503341675, |
| "eval_runtime": 4.2682, |
| "eval_samples_per_second": 5.154, |
| "eval_steps_per_second": 0.703, |
| "step": 300 |
| }, |
| { |
| "epoch": 7.69, |
| "learning_rate": 9.748076923076923e-07, |
| "loss": 1.849, |
| "step": 400 |
| }, |
| { |
| "epoch": 7.69, |
| "eval_loss": 1.5139409303665161, |
| "eval_runtime": 4.1966, |
| "eval_samples_per_second": 5.242, |
| "eval_steps_per_second": 0.715, |
| "step": 400 |
| }, |
| { |
| "epoch": 9.62, |
| "learning_rate": 9.683974358974359e-07, |
| "loss": 1.7466, |
| "step": 500 |
| }, |
| { |
| "epoch": 9.62, |
| "eval_loss": 1.4191316366195679, |
| "eval_runtime": 4.1991, |
| "eval_samples_per_second": 5.239, |
| "eval_steps_per_second": 0.714, |
| "step": 500 |
| }, |
| { |
| "epoch": 11.54, |
| "learning_rate": 9.62051282051282e-07, |
| "loss": 1.7443, |
| "step": 600 |
| }, |
| { |
| "epoch": 11.54, |
| "eval_loss": 1.4041415452957153, |
| "eval_runtime": 4.3488, |
| "eval_samples_per_second": 5.059, |
| "eval_steps_per_second": 0.69, |
| "step": 600 |
| }, |
| { |
| "epoch": 13.46, |
| "learning_rate": 9.556410256410255e-07, |
| "loss": 1.6662, |
| "step": 700 |
| }, |
| { |
| "epoch": 13.46, |
| "eval_loss": 1.3958640098571777, |
| "eval_runtime": 4.2134, |
| "eval_samples_per_second": 5.221, |
| "eval_steps_per_second": 0.712, |
| "step": 700 |
| }, |
| { |
| "epoch": 15.38, |
| "learning_rate": 9.492307692307691e-07, |
| "loss": 1.6388, |
| "step": 800 |
| }, |
| { |
| "epoch": 15.38, |
| "eval_loss": 1.364356517791748, |
| "eval_runtime": 4.4313, |
| "eval_samples_per_second": 4.965, |
| "eval_steps_per_second": 0.677, |
| "step": 800 |
| }, |
| { |
| "epoch": 17.31, |
| "learning_rate": 9.428205128205127e-07, |
| "loss": 1.5375, |
| "step": 900 |
| }, |
| { |
| "epoch": 17.31, |
| "eval_loss": 1.3546158075332642, |
| "eval_runtime": 4.2939, |
| "eval_samples_per_second": 5.124, |
| "eval_steps_per_second": 0.699, |
| "step": 900 |
| }, |
| { |
| "epoch": 19.23, |
| "learning_rate": 9.364102564102564e-07, |
| "loss": 1.7777, |
| "step": 1000 |
| }, |
| { |
| "epoch": 19.23, |
| "eval_loss": 1.345603585243225, |
| "eval_runtime": 4.2959, |
| "eval_samples_per_second": 5.121, |
| "eval_steps_per_second": 0.698, |
| "step": 1000 |
| }, |
| { |
| "epoch": 21.15, |
| "learning_rate": 9.3e-07, |
| "loss": 1.562, |
| "step": 1100 |
| }, |
| { |
| "epoch": 21.15, |
| "eval_loss": 1.3052722215652466, |
| "eval_runtime": 4.3299, |
| "eval_samples_per_second": 5.081, |
| "eval_steps_per_second": 0.693, |
| "step": 1100 |
| }, |
| { |
| "epoch": 23.08, |
| "learning_rate": 9.235897435897435e-07, |
| "loss": 1.4714, |
| "step": 1200 |
| }, |
| { |
| "epoch": 23.08, |
| "eval_loss": 1.2963144779205322, |
| "eval_runtime": 4.2895, |
| "eval_samples_per_second": 5.129, |
| "eval_steps_per_second": 0.699, |
| "step": 1200 |
| }, |
| { |
| "epoch": 25.0, |
| "learning_rate": 9.172435897435897e-07, |
| "loss": 1.5037, |
| "step": 1300 |
| }, |
| { |
| "epoch": 25.0, |
| "eval_loss": 1.2800103425979614, |
| "eval_runtime": 4.2403, |
| "eval_samples_per_second": 5.188, |
| "eval_steps_per_second": 0.707, |
| "step": 1300 |
| }, |
| { |
| "epoch": 26.92, |
| "learning_rate": 9.108333333333333e-07, |
| "loss": 1.3721, |
| "step": 1400 |
| }, |
| { |
| "epoch": 26.92, |
| "eval_loss": 1.276384711265564, |
| "eval_runtime": 4.3022, |
| "eval_samples_per_second": 5.114, |
| "eval_steps_per_second": 0.697, |
| "step": 1400 |
| }, |
| { |
| "epoch": 28.85, |
| "learning_rate": 9.044230769230768e-07, |
| "loss": 1.4898, |
| "step": 1500 |
| }, |
| { |
| "epoch": 28.85, |
| "eval_loss": 1.29233980178833, |
| "eval_runtime": 4.2189, |
| "eval_samples_per_second": 5.215, |
| "eval_steps_per_second": 0.711, |
| "step": 1500 |
| }, |
| { |
| "epoch": 30.77, |
| "learning_rate": 8.980128205128205e-07, |
| "loss": 1.4936, |
| "step": 1600 |
| }, |
| { |
| "epoch": 30.77, |
| "eval_loss": 1.2993799448013306, |
| "eval_runtime": 4.1785, |
| "eval_samples_per_second": 5.265, |
| "eval_steps_per_second": 0.718, |
| "step": 1600 |
| }, |
| { |
| "epoch": 32.69, |
| "learning_rate": 8.916025641025641e-07, |
| "loss": 1.4178, |
| "step": 1700 |
| }, |
| { |
| "epoch": 32.69, |
| "eval_loss": 1.253838062286377, |
| "eval_runtime": 4.2708, |
| "eval_samples_per_second": 5.151, |
| "eval_steps_per_second": 0.702, |
| "step": 1700 |
| }, |
| { |
| "epoch": 34.62, |
| "learning_rate": 8.851923076923077e-07, |
| "loss": 1.2665, |
| "step": 1800 |
| }, |
| { |
| "epoch": 34.62, |
| "eval_loss": 1.2696892023086548, |
| "eval_runtime": 4.2383, |
| "eval_samples_per_second": 5.191, |
| "eval_steps_per_second": 0.708, |
| "step": 1800 |
| }, |
| { |
| "epoch": 36.54, |
| "learning_rate": 8.787820512820513e-07, |
| "loss": 1.3008, |
| "step": 1900 |
| }, |
| { |
| "epoch": 36.54, |
| "eval_loss": 1.2657532691955566, |
| "eval_runtime": 4.3241, |
| "eval_samples_per_second": 5.088, |
| "eval_steps_per_second": 0.694, |
| "step": 1900 |
| }, |
| { |
| "epoch": 38.46, |
| "learning_rate": 8.723717948717948e-07, |
| "loss": 1.4281, |
| "step": 2000 |
| }, |
| { |
| "epoch": 38.46, |
| "eval_loss": 1.2740833759307861, |
| "eval_runtime": 4.3835, |
| "eval_samples_per_second": 5.019, |
| "eval_steps_per_second": 0.684, |
| "step": 2000 |
| }, |
| { |
| "epoch": 40.38, |
| "learning_rate": 8.659615384615384e-07, |
| "loss": 1.2315, |
| "step": 2100 |
| }, |
| { |
| "epoch": 40.38, |
| "eval_loss": 1.2776740789413452, |
| "eval_runtime": 4.3172, |
| "eval_samples_per_second": 5.096, |
| "eval_steps_per_second": 0.695, |
| "step": 2100 |
| }, |
| { |
| "epoch": 42.31, |
| "learning_rate": 8.59551282051282e-07, |
| "loss": 1.2442, |
| "step": 2200 |
| }, |
| { |
| "epoch": 42.31, |
| "eval_loss": 1.2726629972457886, |
| "eval_runtime": 4.2859, |
| "eval_samples_per_second": 5.133, |
| "eval_steps_per_second": 0.7, |
| "step": 2200 |
| }, |
| { |
| "epoch": 44.23, |
| "learning_rate": 8.531410256410256e-07, |
| "loss": 1.2882, |
| "step": 2300 |
| }, |
| { |
| "epoch": 44.23, |
| "eval_loss": 1.2753421068191528, |
| "eval_runtime": 4.2108, |
| "eval_samples_per_second": 5.225, |
| "eval_steps_per_second": 0.712, |
| "step": 2300 |
| }, |
| { |
| "epoch": 46.15, |
| "learning_rate": 8.467307692307691e-07, |
| "loss": 1.194, |
| "step": 2400 |
| }, |
| { |
| "epoch": 46.15, |
| "eval_loss": 1.2462925910949707, |
| "eval_runtime": 4.1887, |
| "eval_samples_per_second": 5.252, |
| "eval_steps_per_second": 0.716, |
| "step": 2400 |
| }, |
| { |
| "epoch": 48.08, |
| "learning_rate": 8.403205128205128e-07, |
| "loss": 1.2223, |
| "step": 2500 |
| }, |
| { |
| "epoch": 48.08, |
| "eval_loss": 1.2566279172897339, |
| "eval_runtime": 4.2232, |
| "eval_samples_per_second": 5.209, |
| "eval_steps_per_second": 0.71, |
| "step": 2500 |
| }, |
| { |
| "epoch": 50.0, |
| "learning_rate": 8.339102564102564e-07, |
| "loss": 1.1984, |
| "step": 2600 |
| }, |
| { |
| "epoch": 50.0, |
| "eval_loss": 1.2548397779464722, |
| "eval_runtime": 4.2193, |
| "eval_samples_per_second": 5.214, |
| "eval_steps_per_second": 0.711, |
| "step": 2600 |
| }, |
| { |
| "epoch": 51.92, |
| "learning_rate": 8.275e-07, |
| "loss": 1.1202, |
| "step": 2700 |
| }, |
| { |
| "epoch": 51.92, |
| "eval_loss": 1.2726110219955444, |
| "eval_runtime": 4.197, |
| "eval_samples_per_second": 5.242, |
| "eval_steps_per_second": 0.715, |
| "step": 2700 |
| }, |
| { |
| "epoch": 53.85, |
| "learning_rate": 8.210897435897435e-07, |
| "loss": 1.1128, |
| "step": 2800 |
| }, |
| { |
| "epoch": 53.85, |
| "eval_loss": 1.2732219696044922, |
| "eval_runtime": 4.189, |
| "eval_samples_per_second": 5.252, |
| "eval_steps_per_second": 0.716, |
| "step": 2800 |
| }, |
| { |
| "epoch": 55.77, |
| "learning_rate": 8.146794871794871e-07, |
| "loss": 1.1649, |
| "step": 2900 |
| }, |
| { |
| "epoch": 55.77, |
| "eval_loss": 1.2652595043182373, |
| "eval_runtime": 4.2028, |
| "eval_samples_per_second": 5.235, |
| "eval_steps_per_second": 0.714, |
| "step": 2900 |
| }, |
| { |
| "epoch": 57.69, |
| "learning_rate": 8.082692307692307e-07, |
| "loss": 1.1066, |
| "step": 3000 |
| }, |
| { |
| "epoch": 57.69, |
| "eval_loss": 1.254478931427002, |
| "eval_runtime": 4.2195, |
| "eval_samples_per_second": 5.214, |
| "eval_steps_per_second": 0.711, |
| "step": 3000 |
| }, |
| { |
| "epoch": 59.62, |
| "learning_rate": 8.018589743589743e-07, |
| "loss": 1.0629, |
| "step": 3100 |
| }, |
| { |
| "epoch": 59.62, |
| "eval_loss": 1.2273532152175903, |
| "eval_runtime": 4.2147, |
| "eval_samples_per_second": 5.22, |
| "eval_steps_per_second": 0.712, |
| "step": 3100 |
| }, |
| { |
| "epoch": 61.54, |
| "learning_rate": 7.955128205128204e-07, |
| "loss": 1.093, |
| "step": 3200 |
| }, |
| { |
| "epoch": 61.54, |
| "eval_loss": 1.2593165636062622, |
| "eval_runtime": 4.2295, |
| "eval_samples_per_second": 5.202, |
| "eval_steps_per_second": 0.709, |
| "step": 3200 |
| }, |
| { |
| "epoch": 63.46, |
| "learning_rate": 7.89102564102564e-07, |
| "loss": 0.9813, |
| "step": 3300 |
| }, |
| { |
| "epoch": 63.46, |
| "eval_loss": 1.2721446752548218, |
| "eval_runtime": 4.1692, |
| "eval_samples_per_second": 5.277, |
| "eval_steps_per_second": 0.72, |
| "step": 3300 |
| }, |
| { |
| "epoch": 65.38, |
| "learning_rate": 7.826923076923076e-07, |
| "loss": 1.0538, |
| "step": 3400 |
| }, |
| { |
| "epoch": 65.38, |
| "eval_loss": 1.2700778245925903, |
| "eval_runtime": 4.2208, |
| "eval_samples_per_second": 5.212, |
| "eval_steps_per_second": 0.711, |
| "step": 3400 |
| }, |
| { |
| "epoch": 67.31, |
| "learning_rate": 7.762820512820513e-07, |
| "loss": 0.9368, |
| "step": 3500 |
| }, |
| { |
| "epoch": 67.31, |
| "eval_loss": 1.2771656513214111, |
| "eval_runtime": 4.2547, |
| "eval_samples_per_second": 5.171, |
| "eval_steps_per_second": 0.705, |
| "step": 3500 |
| }, |
| { |
| "epoch": 69.23, |
| "learning_rate": 7.698717948717949e-07, |
| "loss": 1.0294, |
| "step": 3600 |
| }, |
| { |
| "epoch": 69.23, |
| "eval_loss": 1.2606873512268066, |
| "eval_runtime": 4.2609, |
| "eval_samples_per_second": 5.163, |
| "eval_steps_per_second": 0.704, |
| "step": 3600 |
| }, |
| { |
| "epoch": 71.15, |
| "learning_rate": 7.634615384615384e-07, |
| "loss": 0.9596, |
| "step": 3700 |
| }, |
| { |
| "epoch": 71.15, |
| "eval_loss": 1.2618690729141235, |
| "eval_runtime": 4.2669, |
| "eval_samples_per_second": 5.156, |
| "eval_steps_per_second": 0.703, |
| "step": 3700 |
| }, |
| { |
| "epoch": 73.08, |
| "learning_rate": 7.57051282051282e-07, |
| "loss": 0.9926, |
| "step": 3800 |
| }, |
| { |
| "epoch": 73.08, |
| "eval_loss": 1.2698349952697754, |
| "eval_runtime": 4.2523, |
| "eval_samples_per_second": 5.174, |
| "eval_steps_per_second": 0.705, |
| "step": 3800 |
| } |
| ], |
| "max_steps": 15600, |
| "num_train_epochs": 300, |
| "total_flos": 9.161067103204147e+18, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|