{ "best_metric": null, "best_model_checkpoint": null, "epoch": 30.0, "eval_steps": 500, "global_step": 19710, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.76103500761035, "grad_norm": 1.1888039112091064, "learning_rate": 0.0009746321664129883, "loss": 5.3071, "max_memory_allocated (GB)": 5.75, "memory_allocated (GB)": 3.2, "step": 500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.0, "eval_accuracy": 0.08006198116451355, "eval_loss": 6.240572929382324, "eval_runtime": 1138.1454, "eval_samples_per_second": 590.273, "eval_steps_per_second": 0.577, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 657, "total_memory_available (GB)": 94.62 }, { "epoch": 1.5220700152207, "grad_norm": 1.0501078367233276, "learning_rate": 0.0009492643328259766, "loss": 3.1366, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 1000, "total_memory_available (GB)": 94.62 }, { "epoch": 2.0, "eval_accuracy": 0.10465945339281382, "eval_loss": 5.848066329956055, "eval_runtime": 1134.9424, "eval_samples_per_second": 591.939, "eval_steps_per_second": 0.579, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 1314, "total_memory_available (GB)": 94.62 }, { "epoch": 2.2831050228310503, "grad_norm": 0.972637414932251, "learning_rate": 0.0009238964992389651, "loss": 2.6048, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 1500, "total_memory_available (GB)": 94.62 }, { "epoch": 3.0, "eval_accuracy": 0.12382538697294054, "eval_loss": 5.552162170410156, "eval_runtime": 1132.6347, "eval_samples_per_second": 593.145, "eval_steps_per_second": 0.58, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 1971, "total_memory_available (GB)": 94.62 }, { "epoch": 3.0441400304414, "grad_norm": 0.8711762428283691, "learning_rate": 0.0008985286656519534, "loss": 2.3103, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 2000, "total_memory_available (GB)": 94.62 }, { "epoch": 3.8051750380517504, "grad_norm": 0.859586775302887, "learning_rate": 0.0008731608320649417, "loss": 1.9918, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 2500, "total_memory_available (GB)": 94.62 }, { "epoch": 4.0, "eval_accuracy": 0.1300875089496098, "eval_loss": 5.555095672607422, "eval_runtime": 1133.2064, "eval_samples_per_second": 592.846, "eval_steps_per_second": 0.58, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 2628, "total_memory_available (GB)": 94.62 }, { "epoch": 4.566210045662101, "grad_norm": 0.8556590676307678, "learning_rate": 0.00084779299847793, "loss": 1.8353, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 3000, "total_memory_available (GB)": 94.62 }, { "epoch": 5.0, "eval_accuracy": 0.141504308464954, "eval_loss": 5.414204120635986, "eval_runtime": 1132.3487, "eval_samples_per_second": 593.295, "eval_steps_per_second": 0.58, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 3285, "total_memory_available (GB)": 94.62 }, { "epoch": 5.327245053272451, "grad_norm": 0.8324838280677795, "learning_rate": 0.0008224251648909183, "loss": 1.7262, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 3500, "total_memory_available (GB)": 94.62 }, { "epoch": 6.0, "eval_accuracy": 0.14951988413511416, "eval_loss": 5.40610933303833, "eval_runtime": 1133.7065, "eval_samples_per_second": 592.585, "eval_steps_per_second": 0.58, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 3942, "total_memory_available (GB)": 94.62 }, { "epoch": 6.0882800608828, "grad_norm": 0.7686742544174194, "learning_rate": 0.0007970573313039067, "loss": 1.6381, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 4000, "total_memory_available (GB)": 94.62 }, { "epoch": 6.8493150684931505, "grad_norm": 0.7999989986419678, "learning_rate": 0.000771689497716895, "loss": 1.5135, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 4500, "total_memory_available (GB)": 94.62 }, { "epoch": 7.0, "eval_accuracy": 0.14675722704248328, "eval_loss": 5.426120758056641, "eval_runtime": 1131.9083, "eval_samples_per_second": 593.526, "eval_steps_per_second": 0.58, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 4599, "total_memory_available (GB)": 94.62 }, { "epoch": 7.610350076103501, "grad_norm": 0.8491269946098328, "learning_rate": 0.0007463216641298833, "loss": 1.4225, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 5000, "total_memory_available (GB)": 94.62 }, { "epoch": 8.0, "eval_accuracy": 0.15733153522462218, "eval_loss": 5.333346843719482, "eval_runtime": 1130.7533, "eval_samples_per_second": 594.132, "eval_steps_per_second": 0.581, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 5256, "total_memory_available (GB)": 94.62 }, { "epoch": 8.37138508371385, "grad_norm": 0.7948514819145203, "learning_rate": 0.0007209538305428717, "loss": 1.354, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 5500, "total_memory_available (GB)": 94.62 }, { "epoch": 9.0, "eval_accuracy": 0.16383479429666115, "eval_loss": 5.220494747161865, "eval_runtime": 1131.3928, "eval_samples_per_second": 593.796, "eval_steps_per_second": 0.581, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 5913, "total_memory_available (GB)": 94.62 }, { "epoch": 9.132420091324201, "grad_norm": 0.7878388166427612, "learning_rate": 0.00069558599695586, "loss": 1.3172, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 6000, "total_memory_available (GB)": 94.62 }, { "epoch": 9.89345509893455, "grad_norm": 0.7506768703460693, "learning_rate": 0.0006702181633688484, "loss": 1.2511, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 6500, "total_memory_available (GB)": 94.62 }, { "epoch": 10.0, "eval_accuracy": 0.17084414356885877, "eval_loss": 5.212928295135498, "eval_runtime": 1133.5622, "eval_samples_per_second": 592.66, "eval_steps_per_second": 0.58, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 6570, "total_memory_available (GB)": 94.62 }, { "epoch": 10.654490106544902, "grad_norm": 0.7633622288703918, "learning_rate": 0.0006448503297818367, "loss": 1.1742, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 7000, "total_memory_available (GB)": 94.62 }, { "epoch": 11.0, "eval_accuracy": 0.17239664968287494, "eval_loss": 5.200212001800537, "eval_runtime": 1129.7037, "eval_samples_per_second": 594.684, "eval_steps_per_second": 0.582, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 7227, "total_memory_available (GB)": 94.62 }, { "epoch": 11.415525114155251, "grad_norm": 0.7618717551231384, "learning_rate": 0.000619482496194825, "loss": 1.1342, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 7500, "total_memory_available (GB)": 94.62 }, { "epoch": 12.0, "eval_accuracy": 0.17819584797645788, "eval_loss": 5.163547039031982, "eval_runtime": 1128.6949, "eval_samples_per_second": 595.216, "eval_steps_per_second": 0.582, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 7884, "total_memory_available (GB)": 94.62 }, { "epoch": 12.1765601217656, "grad_norm": 0.7090550661087036, "learning_rate": 0.0005941146626078133, "loss": 1.1111, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 8000, "total_memory_available (GB)": 94.62 }, { "epoch": 12.937595129375952, "grad_norm": 0.7710525393486023, "learning_rate": 0.0005687468290208016, "loss": 1.0711, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 8500, "total_memory_available (GB)": 94.62 }, { "epoch": 13.0, "eval_accuracy": 0.17787879735106435, "eval_loss": 5.143550872802734, "eval_runtime": 1133.3031, "eval_samples_per_second": 592.796, "eval_steps_per_second": 0.58, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 8541, "total_memory_available (GB)": 94.62 }, { "epoch": 13.698630136986301, "grad_norm": 0.7961007952690125, "learning_rate": 0.00054337899543379, "loss": 0.9971, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 9000, "total_memory_available (GB)": 94.62 }, { "epoch": 14.0, "eval_accuracy": 0.18167893935402052, "eval_loss": 5.107571125030518, "eval_runtime": 1132.5925, "eval_samples_per_second": 593.167, "eval_steps_per_second": 0.58, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 9198, "total_memory_available (GB)": 94.62 }, { "epoch": 14.459665144596652, "grad_norm": 0.7081454992294312, "learning_rate": 0.0005180111618467784, "loss": 0.9774, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 9500, "total_memory_available (GB)": 94.62 }, { "epoch": 15.0, "eval_accuracy": 0.19349168002595946, "eval_loss": 4.9076433181762695, "eval_runtime": 1135.4783, "eval_samples_per_second": 591.66, "eval_steps_per_second": 0.579, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 9855, "total_memory_available (GB)": 94.62 }, { "epoch": 15.220700152207002, "grad_norm": 0.706643283367157, "learning_rate": 0.0004926433282597666, "loss": 0.9457, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 10000, "total_memory_available (GB)": 94.62 }, { "epoch": 15.981735159817351, "grad_norm": 0.7469919323921204, "learning_rate": 0.0004672754946727549, "loss": 0.9174, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 10500, "total_memory_available (GB)": 94.62 }, { "epoch": 16.0, "eval_accuracy": 0.18904255176632923, "eval_loss": 5.03179407119751, "eval_runtime": 1127.5619, "eval_samples_per_second": 595.814, "eval_steps_per_second": 0.583, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 10512, "total_memory_available (GB)": 94.62 }, { "epoch": 16.7427701674277, "grad_norm": 0.7092038989067078, "learning_rate": 0.0004419076610857433, "loss": 0.8675, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 11000, "total_memory_available (GB)": 94.62 }, { "epoch": 17.0, "eval_accuracy": 0.19512456517176552, "eval_loss": 5.039154052734375, "eval_runtime": 1129.1255, "eval_samples_per_second": 594.989, "eval_steps_per_second": 0.582, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 11169, "total_memory_available (GB)": 94.62 }, { "epoch": 17.503805175038053, "grad_norm": 0.722985029220581, "learning_rate": 0.0004165398274987316, "loss": 0.8499, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 11500, "total_memory_available (GB)": 94.62 }, { "epoch": 18.0, "eval_accuracy": 0.19776069971435672, "eval_loss": 5.024279594421387, "eval_runtime": 1134.2545, "eval_samples_per_second": 592.298, "eval_steps_per_second": 0.579, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 11826, "total_memory_available (GB)": 94.62 }, { "epoch": 18.264840182648403, "grad_norm": 0.703146755695343, "learning_rate": 0.0003911719939117199, "loss": 0.8262, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 12000, "total_memory_available (GB)": 94.62 }, { "epoch": 19.0, "eval_accuracy": 0.19716678797946466, "eval_loss": 5.084349632263184, "eval_runtime": 1134.1402, "eval_samples_per_second": 592.358, "eval_steps_per_second": 0.579, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 12483, "total_memory_available (GB)": 94.62 }, { "epoch": 19.025875190258752, "grad_norm": 0.6394225358963013, "learning_rate": 0.00036580416032470827, "loss": 0.8039, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 12500, "total_memory_available (GB)": 94.62 }, { "epoch": 19.7869101978691, "grad_norm": 0.6850036978721619, "learning_rate": 0.0003404363267376966, "loss": 0.7623, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 13000, "total_memory_available (GB)": 94.62 }, { "epoch": 20.0, "eval_accuracy": 0.20482214650715894, "eval_loss": 5.0004353523254395, "eval_runtime": 1132.7333, "eval_samples_per_second": 593.094, "eval_steps_per_second": 0.58, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 13140, "total_memory_available (GB)": 94.62 }, { "epoch": 20.54794520547945, "grad_norm": 0.7249587178230286, "learning_rate": 0.00031506849315068495, "loss": 0.7481, "max_memory_allocated (GB)": 60.21, "memory_allocated (GB)": 3.2, "step": 13500, "total_memory_available (GB)": 94.62 }, { "epoch": 21.0, "eval_accuracy": 0.21318007731272057, "eval_loss": 4.842759609222412, "eval_runtime": 1132.229, "eval_samples_per_second": 593.358, "eval_steps_per_second": 0.58, "max_memory_allocated (GB)": 60.24, "memory_allocated (GB)": 3.2, "step": 13797, "total_memory_available (GB)": 94.62 }, { "epoch": 21.308980213089804, "grad_norm": 0.651644766330719, "learning_rate": 0.00028970065956367326, "loss": 0.7284, "max_memory_allocated (GB)": 60.24, "memory_allocated (GB)": 3.2, "step": 14000, "total_memory_available (GB)": 94.62 }, { "epoch": 22.0, "eval_accuracy": 0.2148576174761877, "eval_loss": 4.846081733703613, "eval_runtime": 1134.6589, "eval_samples_per_second": 592.087, "eval_steps_per_second": 0.579, "max_memory_allocated (GB)": 60.24, "memory_allocated (GB)": 3.2, "step": 14454, "total_memory_available (GB)": 94.62 }, { "epoch": 22.070015220700153, "grad_norm": 0.6403504610061646, "learning_rate": 0.00026433282597666157, "loss": 0.706, "max_memory_allocated (GB)": 60.24, "memory_allocated (GB)": 3.2, "step": 14500, "total_memory_available (GB)": 94.62 }, { "epoch": 22.831050228310502, "grad_norm": 0.6770262718200684, "learning_rate": 0.0002389649923896499, "loss": 0.6834, "max_memory_allocated (GB)": 60.24, "memory_allocated (GB)": 3.2, "step": 15000, "total_memory_available (GB)": 94.62 }, { "epoch": 23.0, "eval_accuracy": 0.2159159413947548, "eval_loss": 4.8741374015808105, "eval_runtime": 1130.2975, "eval_samples_per_second": 594.372, "eval_steps_per_second": 0.581, "max_memory_allocated (GB)": 60.24, "memory_allocated (GB)": 3.2, "step": 15111, "total_memory_available (GB)": 94.62 }, { "epoch": 23.59208523592085, "grad_norm": 0.6229885816574097, "learning_rate": 0.00021359715880263824, "loss": 0.6591, "max_memory_allocated (GB)": 60.24, "memory_allocated (GB)": 3.2, "step": 15500, "total_memory_available (GB)": 94.62 }, { "epoch": 24.0, "eval_accuracy": 0.2186681789832648, "eval_loss": 4.89931058883667, "eval_runtime": 1133.9373, "eval_samples_per_second": 592.464, "eval_steps_per_second": 0.579, "max_memory_allocated (GB)": 60.24, "memory_allocated (GB)": 3.2, "step": 15768, "total_memory_available (GB)": 94.62 }, { "epoch": 24.3531202435312, "grad_norm": 0.6464186310768127, "learning_rate": 0.00018822932521562658, "loss": 0.6447, "max_memory_allocated (GB)": 60.24, "memory_allocated (GB)": 3.2, "step": 16000, "total_memory_available (GB)": 94.62 }, { "epoch": 25.0, "eval_accuracy": 0.21962528486180016, "eval_loss": 4.8415398597717285, "eval_runtime": 1126.1392, "eval_samples_per_second": 596.567, "eval_steps_per_second": 0.583, "max_memory_allocated (GB)": 60.24, "memory_allocated (GB)": 3.2, "step": 16425, "total_memory_available (GB)": 94.62 }, { "epoch": 25.114155251141554, "grad_norm": 0.695124626159668, "learning_rate": 0.00016286149162861492, "loss": 0.6323, "max_memory_allocated (GB)": 60.24, "memory_allocated (GB)": 3.2, "step": 16500, "total_memory_available (GB)": 94.62 }, { "epoch": 25.875190258751903, "grad_norm": 0.7219062447547913, "learning_rate": 0.00013749365804160323, "loss": 0.6107, "max_memory_allocated (GB)": 60.24, "memory_allocated (GB)": 3.2, "step": 17000, "total_memory_available (GB)": 94.62 }, { "epoch": 26.0, "eval_accuracy": 0.22164369166008005, "eval_loss": 4.859982967376709, "eval_runtime": 1131.8158, "eval_samples_per_second": 593.574, "eval_steps_per_second": 0.58, "max_memory_allocated (GB)": 60.24, "memory_allocated (GB)": 3.2, "step": 17082, "total_memory_available (GB)": 94.62 }, { "epoch": 26.636225266362253, "grad_norm": 0.6680580377578735, "learning_rate": 0.00011212582445459158, "loss": 0.5958, "max_memory_allocated (GB)": 60.24, "memory_allocated (GB)": 3.2, "step": 17500, "total_memory_available (GB)": 94.62 }, { "epoch": 27.0, "eval_accuracy": 0.22447184277861382, "eval_loss": 4.839137554168701, "eval_runtime": 1135.3766, "eval_samples_per_second": 591.713, "eval_steps_per_second": 0.579, "max_memory_allocated (GB)": 60.24, "memory_allocated (GB)": 3.2, "step": 17739, "total_memory_available (GB)": 94.62 }, { "epoch": 27.397260273972602, "grad_norm": 0.6511676907539368, "learning_rate": 8.67579908675799e-05, "loss": 0.5836, "max_memory_allocated (GB)": 60.24, "memory_allocated (GB)": 3.2, "step": 18000, "total_memory_available (GB)": 94.62 }, { "epoch": 28.0, "eval_accuracy": 0.22654234709749826, "eval_loss": 4.856111526489258, "eval_runtime": 1131.6209, "eval_samples_per_second": 593.677, "eval_steps_per_second": 0.581, "max_memory_allocated (GB)": 60.24, "memory_allocated (GB)": 3.2, "step": 18396, "total_memory_available (GB)": 94.62 }, { "epoch": 28.15829528158295, "grad_norm": 0.6694862842559814, "learning_rate": 6.139015728056824e-05, "loss": 0.5713, "max_memory_allocated (GB)": 60.24, "memory_allocated (GB)": 3.2, "step": 18500, "total_memory_available (GB)": 94.62 }, { "epoch": 28.919330289193304, "grad_norm": 0.6698545813560486, "learning_rate": 3.6022323693556566e-05, "loss": 0.5547, "max_memory_allocated (GB)": 60.24, "memory_allocated (GB)": 3.2, "step": 19000, "total_memory_available (GB)": 94.62 }, { "epoch": 29.0, "eval_accuracy": 0.2294940437648943, "eval_loss": 4.793288230895996, "eval_runtime": 1134.2517, "eval_samples_per_second": 592.3, "eval_steps_per_second": 0.579, "max_memory_allocated (GB)": 60.24, "memory_allocated (GB)": 3.2, "step": 19053, "total_memory_available (GB)": 94.62 }, { "epoch": 29.680365296803654, "grad_norm": 0.5940834879875183, "learning_rate": 1.06544901065449e-05, "loss": 0.547, "max_memory_allocated (GB)": 60.24, "memory_allocated (GB)": 3.2, "step": 19500, "total_memory_available (GB)": 94.62 }, { "epoch": 30.0, "eval_accuracy": 0.22931691219483877, "eval_loss": 4.809002876281738, "eval_runtime": 1130.6604, "eval_samples_per_second": 594.181, "eval_steps_per_second": 0.581, "max_memory_allocated (GB)": 60.24, "memory_allocated (GB)": 3.2, "step": 19710, "total_memory_available (GB)": 94.62 }, { "epoch": 30.0, "max_memory_allocated (GB)": 1.42, "memory_allocated (GB)": 1.42, "step": 19710, "total_flos": 2.9333313524800244e+21, "total_memory_available (GB)": 94.62, "train_loss": 0.0, "train_runtime": 0.2168, "train_samples_per_second": 92964947.027, "train_steps_per_second": 90914.595 } ], "logging_steps": 500, "max_steps": 19710, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.9333313524800244e+21, "train_batch_size": 128, "trial_name": null, "trial_params": null }