| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 37.0, | |
| "global_step": 999, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.2262943855309169e-05, | |
| "loss": 3.1653, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.547526790579403, | |
| "eval_loss": 2.819357395172119, | |
| "eval_runtime": 8.7539, | |
| "eval_samples_per_second": 50.72, | |
| "eval_steps_per_second": 0.228, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.4841962570206113e-05, | |
| "loss": 2.7725, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.5574515806383172, | |
| "eval_loss": 2.6864047050476074, | |
| "eval_runtime": 7.4551, | |
| "eval_samples_per_second": 59.556, | |
| "eval_steps_per_second": 0.268, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 1.6350591807078892e-05, | |
| "loss": 2.6256, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.5682560989844753, | |
| "eval_loss": 2.602532148361206, | |
| "eval_runtime": 7.5503, | |
| "eval_samples_per_second": 58.806, | |
| "eval_steps_per_second": 0.265, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 1.7420981285103056e-05, | |
| "loss": 2.5044, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.601172152498224, | |
| "eval_loss": 2.341265916824341, | |
| "eval_runtime": 7.5499, | |
| "eval_samples_per_second": 58.809, | |
| "eval_steps_per_second": 0.265, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 1.825123986666868e-05, | |
| "loss": 2.4348, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.6008952865794787, | |
| "eval_loss": 2.3422300815582275, | |
| "eval_runtime": 7.5767, | |
| "eval_samples_per_second": 58.601, | |
| "eval_steps_per_second": 0.264, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 1.892961052197583e-05, | |
| "loss": 2.42, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.5931487564523698, | |
| "eval_loss": 2.3707046508789062, | |
| "eval_runtime": 7.5549, | |
| "eval_samples_per_second": 58.77, | |
| "eval_steps_per_second": 0.265, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 1.9503164738653782e-05, | |
| "loss": 2.334, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.6161689510150978, | |
| "eval_loss": 2.23409366607666, | |
| "eval_runtime": 7.5922, | |
| "eval_samples_per_second": 58.481, | |
| "eval_steps_per_second": 0.263, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 1.9999999999999998e-05, | |
| "loss": 2.288, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.6175385999100584, | |
| "eval_loss": 2.2085084915161133, | |
| "eval_runtime": 7.5018, | |
| "eval_samples_per_second": 59.186, | |
| "eval_steps_per_second": 0.267, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "learning_rate": 2e-05, | |
| "loss": 2.2954, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.6139404159640247, | |
| "eval_loss": 2.2152740955352783, | |
| "eval_runtime": 7.565, | |
| "eval_samples_per_second": 58.691, | |
| "eval_steps_per_second": 0.264, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 2e-05, | |
| "loss": 2.2934, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.6153211306628938, | |
| "eval_loss": 2.232224225997925, | |
| "eval_runtime": 7.5858, | |
| "eval_samples_per_second": 58.531, | |
| "eval_steps_per_second": 0.264, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "learning_rate": 2e-05, | |
| "loss": 2.2283, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.6139165373025295, | |
| "eval_loss": 2.231621503829956, | |
| "eval_runtime": 6.8069, | |
| "eval_samples_per_second": 65.228, | |
| "eval_steps_per_second": 0.294, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "learning_rate": 2e-05, | |
| "loss": 2.2228, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.6216490772616965, | |
| "eval_loss": 2.1547019481658936, | |
| "eval_runtime": 7.5498, | |
| "eval_samples_per_second": 58.809, | |
| "eval_steps_per_second": 0.265, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "learning_rate": 2e-05, | |
| "loss": 2.2178, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.6255061354433901, | |
| "eval_loss": 2.1324307918548584, | |
| "eval_runtime": 7.037, | |
| "eval_samples_per_second": 63.095, | |
| "eval_steps_per_second": 0.284, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "learning_rate": 2e-05, | |
| "loss": 2.1743, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.6387313908007641, | |
| "eval_loss": 2.018895387649536, | |
| "eval_runtime": 7.5158, | |
| "eval_samples_per_second": 59.076, | |
| "eval_steps_per_second": 0.266, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "learning_rate": 2e-05, | |
| "loss": 2.0488, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.6706725076959508, | |
| "eval_loss": 1.7761304378509521, | |
| "eval_runtime": 6.6483, | |
| "eval_samples_per_second": 66.784, | |
| "eval_steps_per_second": 0.301, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.9293, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.6976124177972629, | |
| "eval_loss": 1.5710580348968506, | |
| "eval_runtime": 7.0351, | |
| "eval_samples_per_second": 63.112, | |
| "eval_steps_per_second": 0.284, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.824, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.7190793783123921, | |
| "eval_loss": 1.4179129600524902, | |
| "eval_runtime": 7.5577, | |
| "eval_samples_per_second": 58.748, | |
| "eval_steps_per_second": 0.265, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.677, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.7287676129762578, | |
| "eval_loss": 1.3340463638305664, | |
| "eval_runtime": 7.6139, | |
| "eval_samples_per_second": 58.315, | |
| "eval_steps_per_second": 0.263, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.5681, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.7393688419248894, | |
| "eval_loss": 1.262519121170044, | |
| "eval_runtime": 7.6652, | |
| "eval_samples_per_second": 57.924, | |
| "eval_steps_per_second": 0.261, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.4292, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.7635153050702649, | |
| "eval_loss": 1.123979091644287, | |
| "eval_runtime": 7.5316, | |
| "eval_samples_per_second": 58.951, | |
| "eval_steps_per_second": 0.266, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.3347, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.7732615942452775, | |
| "eval_loss": 1.0541319847106934, | |
| "eval_runtime": 7.5273, | |
| "eval_samples_per_second": 58.985, | |
| "eval_steps_per_second": 0.266, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.2435, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.7811072373166627, | |
| "eval_loss": 1.0116688013076782, | |
| "eval_runtime": 7.6371, | |
| "eval_samples_per_second": 58.137, | |
| "eval_steps_per_second": 0.262, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1747, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.7842111556166677, | |
| "eval_loss": 0.9821351766586304, | |
| "eval_runtime": 7.6429, | |
| "eval_samples_per_second": 58.093, | |
| "eval_steps_per_second": 0.262, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1536, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.7867322173128384, | |
| "eval_loss": 0.964832603931427, | |
| "eval_runtime": 6.912, | |
| "eval_samples_per_second": 64.236, | |
| "eval_steps_per_second": 0.289, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0971, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.7866705302810779, | |
| "eval_loss": 0.9783045053482056, | |
| "eval_runtime": 7.5744, | |
| "eval_samples_per_second": 58.618, | |
| "eval_steps_per_second": 0.264, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0809, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.7918671517427532, | |
| "eval_loss": 0.9400666356086731, | |
| "eval_runtime": 7.1416, | |
| "eval_samples_per_second": 62.171, | |
| "eval_steps_per_second": 0.28, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0653, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.7834480395236599, | |
| "eval_loss": 0.9835608005523682, | |
| "eval_runtime": 7.5584, | |
| "eval_samples_per_second": 58.743, | |
| "eval_steps_per_second": 0.265, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0341, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.7897628339837887, | |
| "eval_loss": 0.9648067951202393, | |
| "eval_runtime": 7.5415, | |
| "eval_samples_per_second": 58.874, | |
| "eval_steps_per_second": 0.265, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0056, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_accuracy": 0.7985819252651742, | |
| "eval_loss": 0.9186079502105713, | |
| "eval_runtime": 7.5907, | |
| "eval_samples_per_second": 58.492, | |
| "eval_steps_per_second": 0.263, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9993, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.7981126511353583, | |
| "eval_loss": 0.9101386666297913, | |
| "eval_runtime": 7.191, | |
| "eval_samples_per_second": 61.744, | |
| "eval_steps_per_second": 0.278, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9849, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_accuracy": 0.7893089053803339, | |
| "eval_loss": 0.9685505628585815, | |
| "eval_runtime": 7.5619, | |
| "eval_samples_per_second": 58.716, | |
| "eval_steps_per_second": 0.264, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9799, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.7988627229152401, | |
| "eval_loss": 0.9036659002304077, | |
| "eval_runtime": 7.5514, | |
| "eval_samples_per_second": 58.797, | |
| "eval_steps_per_second": 0.265, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.957, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_accuracy": 0.7937915742793792, | |
| "eval_loss": 0.9258220195770264, | |
| "eval_runtime": 7.548, | |
| "eval_samples_per_second": 58.824, | |
| "eval_steps_per_second": 0.265, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9535, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_accuracy": 0.7996154854496198, | |
| "eval_loss": 0.8886759877204895, | |
| "eval_runtime": 7.5277, | |
| "eval_samples_per_second": 58.982, | |
| "eval_steps_per_second": 0.266, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9343, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_accuracy": 0.8085179621980223, | |
| "eval_loss": 0.8577904105186462, | |
| "eval_runtime": 6.7518, | |
| "eval_samples_per_second": 65.76, | |
| "eval_steps_per_second": 0.296, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9346, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.8009545281527245, | |
| "eval_loss": 0.8876005411148071, | |
| "eval_runtime": 6.6328, | |
| "eval_samples_per_second": 66.94, | |
| "eval_steps_per_second": 0.302, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9187, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_accuracy": 0.8115537848605577, | |
| "eval_loss": 0.8389037251472473, | |
| "eval_runtime": 7.5847, | |
| "eval_samples_per_second": 58.539, | |
| "eval_steps_per_second": 0.264, | |
| "step": 999 | |
| } | |
| ], | |
| "max_steps": 1080, | |
| "num_train_epochs": 40, | |
| "total_flos": 311085194280960.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |