| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 30.0, | |
| "eval_steps": 500, | |
| "global_step": 19710, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.76103500761035, | |
| "grad_norm": 1.1888039112091064, | |
| "learning_rate": 0.0009746321664129883, | |
| "loss": 5.3071, | |
| "max_memory_allocated (GB)": 5.75, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.08006198116451355, | |
| "eval_loss": 6.240572929382324, | |
| "eval_runtime": 1138.1454, | |
| "eval_samples_per_second": 590.273, | |
| "eval_steps_per_second": 0.577, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 657, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 1.5220700152207, | |
| "grad_norm": 1.0501078367233276, | |
| "learning_rate": 0.0009492643328259766, | |
| "loss": 3.1366, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 1000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.10465945339281382, | |
| "eval_loss": 5.848066329956055, | |
| "eval_runtime": 1134.9424, | |
| "eval_samples_per_second": 591.939, | |
| "eval_steps_per_second": 0.579, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 1314, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 2.2831050228310503, | |
| "grad_norm": 0.972637414932251, | |
| "learning_rate": 0.0009238964992389651, | |
| "loss": 2.6048, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 1500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.12382538697294054, | |
| "eval_loss": 5.552162170410156, | |
| "eval_runtime": 1132.6347, | |
| "eval_samples_per_second": 593.145, | |
| "eval_steps_per_second": 0.58, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 1971, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 3.0441400304414, | |
| "grad_norm": 0.8711762428283691, | |
| "learning_rate": 0.0008985286656519534, | |
| "loss": 2.3103, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 2000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 3.8051750380517504, | |
| "grad_norm": 0.859586775302887, | |
| "learning_rate": 0.0008731608320649417, | |
| "loss": 1.9918, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 2500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.1300875089496098, | |
| "eval_loss": 5.555095672607422, | |
| "eval_runtime": 1133.2064, | |
| "eval_samples_per_second": 592.846, | |
| "eval_steps_per_second": 0.58, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 2628, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 4.566210045662101, | |
| "grad_norm": 0.8556590676307678, | |
| "learning_rate": 0.00084779299847793, | |
| "loss": 1.8353, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 3000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.141504308464954, | |
| "eval_loss": 5.414204120635986, | |
| "eval_runtime": 1132.3487, | |
| "eval_samples_per_second": 593.295, | |
| "eval_steps_per_second": 0.58, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 3285, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 5.327245053272451, | |
| "grad_norm": 0.8324838280677795, | |
| "learning_rate": 0.0008224251648909183, | |
| "loss": 1.7262, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 3500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.14951988413511416, | |
| "eval_loss": 5.40610933303833, | |
| "eval_runtime": 1133.7065, | |
| "eval_samples_per_second": 592.585, | |
| "eval_steps_per_second": 0.58, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 3942, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 6.0882800608828, | |
| "grad_norm": 0.7686742544174194, | |
| "learning_rate": 0.0007970573313039067, | |
| "loss": 1.6381, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 4000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 6.8493150684931505, | |
| "grad_norm": 0.7999989986419678, | |
| "learning_rate": 0.000771689497716895, | |
| "loss": 1.5135, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 4500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.14675722704248328, | |
| "eval_loss": 5.426120758056641, | |
| "eval_runtime": 1131.9083, | |
| "eval_samples_per_second": 593.526, | |
| "eval_steps_per_second": 0.58, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 4599, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 7.610350076103501, | |
| "grad_norm": 0.8491269946098328, | |
| "learning_rate": 0.0007463216641298833, | |
| "loss": 1.4225, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 5000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.15733153522462218, | |
| "eval_loss": 5.333346843719482, | |
| "eval_runtime": 1130.7533, | |
| "eval_samples_per_second": 594.132, | |
| "eval_steps_per_second": 0.581, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 5256, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 8.37138508371385, | |
| "grad_norm": 0.7948514819145203, | |
| "learning_rate": 0.0007209538305428717, | |
| "loss": 1.354, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 5500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.16383479429666115, | |
| "eval_loss": 5.220494747161865, | |
| "eval_runtime": 1131.3928, | |
| "eval_samples_per_second": 593.796, | |
| "eval_steps_per_second": 0.581, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 5913, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 9.132420091324201, | |
| "grad_norm": 0.7878388166427612, | |
| "learning_rate": 0.00069558599695586, | |
| "loss": 1.3172, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 6000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 9.89345509893455, | |
| "grad_norm": 0.7506768703460693, | |
| "learning_rate": 0.0006702181633688484, | |
| "loss": 1.2511, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 6500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.17084414356885877, | |
| "eval_loss": 5.212928295135498, | |
| "eval_runtime": 1133.5622, | |
| "eval_samples_per_second": 592.66, | |
| "eval_steps_per_second": 0.58, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 6570, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 10.654490106544902, | |
| "grad_norm": 0.7633622288703918, | |
| "learning_rate": 0.0006448503297818367, | |
| "loss": 1.1742, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 7000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.17239664968287494, | |
| "eval_loss": 5.200212001800537, | |
| "eval_runtime": 1129.7037, | |
| "eval_samples_per_second": 594.684, | |
| "eval_steps_per_second": 0.582, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 7227, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 11.415525114155251, | |
| "grad_norm": 0.7618717551231384, | |
| "learning_rate": 0.000619482496194825, | |
| "loss": 1.1342, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 7500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.17819584797645788, | |
| "eval_loss": 5.163547039031982, | |
| "eval_runtime": 1128.6949, | |
| "eval_samples_per_second": 595.216, | |
| "eval_steps_per_second": 0.582, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 7884, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 12.1765601217656, | |
| "grad_norm": 0.7090550661087036, | |
| "learning_rate": 0.0005941146626078133, | |
| "loss": 1.1111, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 8000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 12.937595129375952, | |
| "grad_norm": 0.7710525393486023, | |
| "learning_rate": 0.0005687468290208016, | |
| "loss": 1.0711, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 8500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.17787879735106435, | |
| "eval_loss": 5.143550872802734, | |
| "eval_runtime": 1133.3031, | |
| "eval_samples_per_second": 592.796, | |
| "eval_steps_per_second": 0.58, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 8541, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 13.698630136986301, | |
| "grad_norm": 0.7961007952690125, | |
| "learning_rate": 0.00054337899543379, | |
| "loss": 0.9971, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 9000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.18167893935402052, | |
| "eval_loss": 5.107571125030518, | |
| "eval_runtime": 1132.5925, | |
| "eval_samples_per_second": 593.167, | |
| "eval_steps_per_second": 0.58, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 9198, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 14.459665144596652, | |
| "grad_norm": 0.7081454992294312, | |
| "learning_rate": 0.0005180111618467784, | |
| "loss": 0.9774, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 9500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.19349168002595946, | |
| "eval_loss": 4.9076433181762695, | |
| "eval_runtime": 1135.4783, | |
| "eval_samples_per_second": 591.66, | |
| "eval_steps_per_second": 0.579, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 9855, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 15.220700152207002, | |
| "grad_norm": 0.706643283367157, | |
| "learning_rate": 0.0004926433282597666, | |
| "loss": 0.9457, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 10000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 15.981735159817351, | |
| "grad_norm": 0.7469919323921204, | |
| "learning_rate": 0.0004672754946727549, | |
| "loss": 0.9174, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 10500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.18904255176632923, | |
| "eval_loss": 5.03179407119751, | |
| "eval_runtime": 1127.5619, | |
| "eval_samples_per_second": 595.814, | |
| "eval_steps_per_second": 0.583, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 10512, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 16.7427701674277, | |
| "grad_norm": 0.7092038989067078, | |
| "learning_rate": 0.0004419076610857433, | |
| "loss": 0.8675, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 11000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.19512456517176552, | |
| "eval_loss": 5.039154052734375, | |
| "eval_runtime": 1129.1255, | |
| "eval_samples_per_second": 594.989, | |
| "eval_steps_per_second": 0.582, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 11169, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 17.503805175038053, | |
| "grad_norm": 0.722985029220581, | |
| "learning_rate": 0.0004165398274987316, | |
| "loss": 0.8499, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 11500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.19776069971435672, | |
| "eval_loss": 5.024279594421387, | |
| "eval_runtime": 1134.2545, | |
| "eval_samples_per_second": 592.298, | |
| "eval_steps_per_second": 0.579, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 11826, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 18.264840182648403, | |
| "grad_norm": 0.703146755695343, | |
| "learning_rate": 0.0003911719939117199, | |
| "loss": 0.8262, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 12000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.19716678797946466, | |
| "eval_loss": 5.084349632263184, | |
| "eval_runtime": 1134.1402, | |
| "eval_samples_per_second": 592.358, | |
| "eval_steps_per_second": 0.579, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 12483, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 19.025875190258752, | |
| "grad_norm": 0.6394225358963013, | |
| "learning_rate": 0.00036580416032470827, | |
| "loss": 0.8039, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 12500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 19.7869101978691, | |
| "grad_norm": 0.6850036978721619, | |
| "learning_rate": 0.0003404363267376966, | |
| "loss": 0.7623, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 13000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.20482214650715894, | |
| "eval_loss": 5.0004353523254395, | |
| "eval_runtime": 1132.7333, | |
| "eval_samples_per_second": 593.094, | |
| "eval_steps_per_second": 0.58, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 13140, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 20.54794520547945, | |
| "grad_norm": 0.7249587178230286, | |
| "learning_rate": 0.00031506849315068495, | |
| "loss": 0.7481, | |
| "max_memory_allocated (GB)": 60.21, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 13500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.21318007731272057, | |
| "eval_loss": 4.842759609222412, | |
| "eval_runtime": 1132.229, | |
| "eval_samples_per_second": 593.358, | |
| "eval_steps_per_second": 0.58, | |
| "max_memory_allocated (GB)": 60.24, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 13797, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 21.308980213089804, | |
| "grad_norm": 0.651644766330719, | |
| "learning_rate": 0.00028970065956367326, | |
| "loss": 0.7284, | |
| "max_memory_allocated (GB)": 60.24, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 14000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.2148576174761877, | |
| "eval_loss": 4.846081733703613, | |
| "eval_runtime": 1134.6589, | |
| "eval_samples_per_second": 592.087, | |
| "eval_steps_per_second": 0.579, | |
| "max_memory_allocated (GB)": 60.24, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 14454, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 22.070015220700153, | |
| "grad_norm": 0.6403504610061646, | |
| "learning_rate": 0.00026433282597666157, | |
| "loss": 0.706, | |
| "max_memory_allocated (GB)": 60.24, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 14500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 22.831050228310502, | |
| "grad_norm": 0.6770262718200684, | |
| "learning_rate": 0.0002389649923896499, | |
| "loss": 0.6834, | |
| "max_memory_allocated (GB)": 60.24, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 15000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.2159159413947548, | |
| "eval_loss": 4.8741374015808105, | |
| "eval_runtime": 1130.2975, | |
| "eval_samples_per_second": 594.372, | |
| "eval_steps_per_second": 0.581, | |
| "max_memory_allocated (GB)": 60.24, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 15111, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 23.59208523592085, | |
| "grad_norm": 0.6229885816574097, | |
| "learning_rate": 0.00021359715880263824, | |
| "loss": 0.6591, | |
| "max_memory_allocated (GB)": 60.24, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 15500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.2186681789832648, | |
| "eval_loss": 4.89931058883667, | |
| "eval_runtime": 1133.9373, | |
| "eval_samples_per_second": 592.464, | |
| "eval_steps_per_second": 0.579, | |
| "max_memory_allocated (GB)": 60.24, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 15768, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 24.3531202435312, | |
| "grad_norm": 0.6464186310768127, | |
| "learning_rate": 0.00018822932521562658, | |
| "loss": 0.6447, | |
| "max_memory_allocated (GB)": 60.24, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 16000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.21962528486180016, | |
| "eval_loss": 4.8415398597717285, | |
| "eval_runtime": 1126.1392, | |
| "eval_samples_per_second": 596.567, | |
| "eval_steps_per_second": 0.583, | |
| "max_memory_allocated (GB)": 60.24, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 16425, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 25.114155251141554, | |
| "grad_norm": 0.695124626159668, | |
| "learning_rate": 0.00016286149162861492, | |
| "loss": 0.6323, | |
| "max_memory_allocated (GB)": 60.24, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 16500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 25.875190258751903, | |
| "grad_norm": 0.7219062447547913, | |
| "learning_rate": 0.00013749365804160323, | |
| "loss": 0.6107, | |
| "max_memory_allocated (GB)": 60.24, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 17000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.22164369166008005, | |
| "eval_loss": 4.859982967376709, | |
| "eval_runtime": 1131.8158, | |
| "eval_samples_per_second": 593.574, | |
| "eval_steps_per_second": 0.58, | |
| "max_memory_allocated (GB)": 60.24, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 17082, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 26.636225266362253, | |
| "grad_norm": 0.6680580377578735, | |
| "learning_rate": 0.00011212582445459158, | |
| "loss": 0.5958, | |
| "max_memory_allocated (GB)": 60.24, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 17500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.22447184277861382, | |
| "eval_loss": 4.839137554168701, | |
| "eval_runtime": 1135.3766, | |
| "eval_samples_per_second": 591.713, | |
| "eval_steps_per_second": 0.579, | |
| "max_memory_allocated (GB)": 60.24, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 17739, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 27.397260273972602, | |
| "grad_norm": 0.6511676907539368, | |
| "learning_rate": 8.67579908675799e-05, | |
| "loss": 0.5836, | |
| "max_memory_allocated (GB)": 60.24, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 18000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.22654234709749826, | |
| "eval_loss": 4.856111526489258, | |
| "eval_runtime": 1131.6209, | |
| "eval_samples_per_second": 593.677, | |
| "eval_steps_per_second": 0.581, | |
| "max_memory_allocated (GB)": 60.24, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 18396, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 28.15829528158295, | |
| "grad_norm": 0.6694862842559814, | |
| "learning_rate": 6.139015728056824e-05, | |
| "loss": 0.5713, | |
| "max_memory_allocated (GB)": 60.24, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 18500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 28.919330289193304, | |
| "grad_norm": 0.6698545813560486, | |
| "learning_rate": 3.6022323693556566e-05, | |
| "loss": 0.5547, | |
| "max_memory_allocated (GB)": 60.24, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 19000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_accuracy": 0.2294940437648943, | |
| "eval_loss": 4.793288230895996, | |
| "eval_runtime": 1134.2517, | |
| "eval_samples_per_second": 592.3, | |
| "eval_steps_per_second": 0.579, | |
| "max_memory_allocated (GB)": 60.24, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 19053, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 29.680365296803654, | |
| "grad_norm": 0.5940834879875183, | |
| "learning_rate": 1.06544901065449e-05, | |
| "loss": 0.547, | |
| "max_memory_allocated (GB)": 60.24, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 19500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.22931691219483877, | |
| "eval_loss": 4.809002876281738, | |
| "eval_runtime": 1130.6604, | |
| "eval_samples_per_second": 594.181, | |
| "eval_steps_per_second": 0.581, | |
| "max_memory_allocated (GB)": 60.24, | |
| "memory_allocated (GB)": 3.2, | |
| "step": 19710, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "max_memory_allocated (GB)": 1.42, | |
| "memory_allocated (GB)": 1.42, | |
| "step": 19710, | |
| "total_flos": 2.9333313524800244e+21, | |
| "total_memory_available (GB)": 94.62, | |
| "train_loss": 0.0, | |
| "train_runtime": 0.2168, | |
| "train_samples_per_second": 92964947.027, | |
| "train_steps_per_second": 90914.595 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 19710, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 30, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.9333313524800244e+21, | |
| "train_batch_size": 128, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |