| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9691629955947136, |
| "global_step": 220, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.04, |
| "learning_rate": 9.559471365638767e-05, |
| "loss": 1.5082, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.04, |
| "eval_accuracy": 0.3876652121543884, |
| "eval_loss": 1.4889150857925415, |
| "eval_runtime": 331.6444, |
| "eval_samples_per_second": 1.369, |
| "eval_steps_per_second": 0.344, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 9.118942731277533e-05, |
| "loss": 1.4193, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.09, |
| "eval_accuracy": 0.47356829047203064, |
| "eval_loss": 1.3213800191879272, |
| "eval_runtime": 340.6201, |
| "eval_samples_per_second": 1.333, |
| "eval_steps_per_second": 0.335, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 8.6784140969163e-05, |
| "loss": 1.2624, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.13, |
| "eval_accuracy": 0.5044052600860596, |
| "eval_loss": 1.2342218160629272, |
| "eval_runtime": 350.6765, |
| "eval_samples_per_second": 1.295, |
| "eval_steps_per_second": 0.325, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 8.237885462555066e-05, |
| "loss": 1.3081, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.18, |
| "eval_accuracy": 0.592510998249054, |
| "eval_loss": 1.0834957361221313, |
| "eval_runtime": 348.0747, |
| "eval_samples_per_second": 1.304, |
| "eval_steps_per_second": 0.328, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 7.797356828193832e-05, |
| "loss": 1.1592, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.22, |
| "eval_accuracy": 0.5110132098197937, |
| "eval_loss": 1.0756179094314575, |
| "eval_runtime": 352.6671, |
| "eval_samples_per_second": 1.287, |
| "eval_steps_per_second": 0.323, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 7.3568281938326e-05, |
| "loss": 0.8424, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.26, |
| "eval_accuracy": 0.634361207485199, |
| "eval_loss": 0.965646505355835, |
| "eval_runtime": 341.7366, |
| "eval_samples_per_second": 1.329, |
| "eval_steps_per_second": 0.334, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 6.916299559471366e-05, |
| "loss": 0.7428, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.31, |
| "eval_accuracy": 0.6189427375793457, |
| "eval_loss": 0.8488078713417053, |
| "eval_runtime": 340.5032, |
| "eval_samples_per_second": 1.333, |
| "eval_steps_per_second": 0.335, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 6.475770925110133e-05, |
| "loss": 0.857, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.35, |
| "eval_accuracy": 0.6806167364120483, |
| "eval_loss": 0.7956423163414001, |
| "eval_runtime": 340.5035, |
| "eval_samples_per_second": 1.333, |
| "eval_steps_per_second": 0.335, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 6.035242290748899e-05, |
| "loss": 1.0037, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.4, |
| "eval_accuracy": 0.7731277346611023, |
| "eval_loss": 0.6489915251731873, |
| "eval_runtime": 342.3782, |
| "eval_samples_per_second": 1.326, |
| "eval_steps_per_second": 0.333, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 5.5947136563876653e-05, |
| "loss": 0.8157, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.44, |
| "eval_accuracy": 0.6762114763259888, |
| "eval_loss": 0.6552606225013733, |
| "eval_runtime": 334.1663, |
| "eval_samples_per_second": 1.359, |
| "eval_steps_per_second": 0.341, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 5.154185022026432e-05, |
| "loss": 0.5338, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.48, |
| "eval_accuracy": 0.8039647340774536, |
| "eval_loss": 0.517014741897583, |
| "eval_runtime": 340.6693, |
| "eval_samples_per_second": 1.333, |
| "eval_steps_per_second": 0.335, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 4.7136563876651986e-05, |
| "loss": 0.509, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.53, |
| "eval_accuracy": 0.8259912133216858, |
| "eval_loss": 0.48134031891822815, |
| "eval_runtime": 335.5911, |
| "eval_samples_per_second": 1.353, |
| "eval_steps_per_second": 0.34, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 4.273127753303965e-05, |
| "loss": 0.5301, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.57, |
| "eval_accuracy": 0.8017621040344238, |
| "eval_loss": 0.4793573021888733, |
| "eval_runtime": 330.9864, |
| "eval_samples_per_second": 1.372, |
| "eval_steps_per_second": 0.344, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 3.832599118942731e-05, |
| "loss": 0.6642, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.62, |
| "eval_accuracy": 0.8215858936309814, |
| "eval_loss": 0.42870593070983887, |
| "eval_runtime": 340.6662, |
| "eval_samples_per_second": 1.333, |
| "eval_steps_per_second": 0.335, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 3.392070484581498e-05, |
| "loss": 0.3661, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.66, |
| "eval_accuracy": 0.8590308427810669, |
| "eval_loss": 0.38554972410202026, |
| "eval_runtime": 338.1541, |
| "eval_samples_per_second": 1.343, |
| "eval_steps_per_second": 0.337, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 2.9515418502202647e-05, |
| "loss": 0.2914, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.7, |
| "eval_accuracy": 0.8237885236740112, |
| "eval_loss": 0.4844018816947937, |
| "eval_runtime": 337.4013, |
| "eval_samples_per_second": 1.346, |
| "eval_steps_per_second": 0.338, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 2.511013215859031e-05, |
| "loss": 0.3867, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.75, |
| "eval_accuracy": 0.7775330543518066, |
| "eval_loss": 0.6406939029693604, |
| "eval_runtime": 341.5663, |
| "eval_samples_per_second": 1.329, |
| "eval_steps_per_second": 0.334, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2.0704845814977973e-05, |
| "loss": 0.5115, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.79, |
| "eval_accuracy": 0.8766520023345947, |
| "eval_loss": 0.33483564853668213, |
| "eval_runtime": 340.3465, |
| "eval_samples_per_second": 1.334, |
| "eval_steps_per_second": 0.335, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.629955947136564e-05, |
| "loss": 0.3104, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.84, |
| "eval_accuracy": 0.8634361028671265, |
| "eval_loss": 0.34648624062538147, |
| "eval_runtime": 337.7392, |
| "eval_samples_per_second": 1.344, |
| "eval_steps_per_second": 0.338, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 1.1894273127753304e-05, |
| "loss": 0.3787, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.88, |
| "eval_accuracy": 0.865638792514801, |
| "eval_loss": 0.34313932061195374, |
| "eval_runtime": 337.3724, |
| "eval_samples_per_second": 1.346, |
| "eval_steps_per_second": 0.338, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 7.488986784140969e-06, |
| "loss": 0.2639, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.93, |
| "eval_accuracy": 0.8568282127380371, |
| "eval_loss": 0.3529247045516968, |
| "eval_runtime": 342.9636, |
| "eval_samples_per_second": 1.324, |
| "eval_steps_per_second": 0.332, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 3.0837004405286347e-06, |
| "loss": 0.2893, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.97, |
| "eval_accuracy": 0.865638792514801, |
| "eval_loss": 0.34023645520210266, |
| "eval_runtime": 336.1969, |
| "eval_samples_per_second": 1.35, |
| "eval_steps_per_second": 0.339, |
| "step": 220 |
| } |
| ], |
| "max_steps": 227, |
| "num_train_epochs": 1, |
| "total_flos": 5.371779943296e+16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|