| { |
| "best_metric": 0.10069680213928223, |
| "best_model_checkpoint": "/home/bly/GitHub/results/AKK-T5Small/train_3/checkpoint-35736", |
| "epoch": 4.0, |
| "eval_steps": 500, |
| "global_step": 35736, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.05596597268860533, |
| "grad_norm": 0.10176054388284683, |
| "learning_rate": 7.43700322782826e-05, |
| "loss": 0.1, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.11193194537721066, |
| "grad_norm": 0.10266093909740448, |
| "learning_rate": 0.0001487400645565652, |
| "loss": 0.0879, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.16789791806581597, |
| "grad_norm": 0.10228092223405838, |
| "learning_rate": 0.00022311009683484775, |
| "loss": 0.0828, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.2238638907544213, |
| "grad_norm": 0.09052737057209015, |
| "learning_rate": 0.0002974801291131304, |
| "loss": 0.0806, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.2798298634430266, |
| "grad_norm": 0.09598143398761749, |
| "learning_rate": 0.00037185016139141294, |
| "loss": 0.0789, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.33579583613163194, |
| "grad_norm": 0.1152188628911972, |
| "learning_rate": 0.0004462201936696955, |
| "loss": 0.0809, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.3917618088202373, |
| "grad_norm": 0.09766815602779388, |
| "learning_rate": 0.00046992900838227654, |
| "loss": 0.0833, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.4477277815088426, |
| "grad_norm": 0.1150614470243454, |
| "learning_rate": 0.00046979751363928165, |
| "loss": 0.0834, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.503693754197448, |
| "grad_norm": 0.0978974923491478, |
| "learning_rate": 0.00046966575537976777, |
| "loss": 0.0829, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.5596597268860533, |
| "grad_norm": 0.08220936357975006, |
| "learning_rate": 0.0004695339971202539, |
| "loss": 0.0828, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.6156256995746586, |
| "grad_norm": 0.1285913735628128, |
| "learning_rate": 0.000469402502377259, |
| "loss": 0.084, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.6715916722632639, |
| "grad_norm": 0.11280138045549393, |
| "learning_rate": 0.0004692707441177451, |
| "loss": 0.0841, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.7275576449518693, |
| "grad_norm": 0.1057014912366867, |
| "learning_rate": 0.00046913898585823117, |
| "loss": 0.0837, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.7835236176404746, |
| "grad_norm": 0.12952521443367004, |
| "learning_rate": 0.0004690072275987173, |
| "loss": 0.0834, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.8394895903290799, |
| "grad_norm": 0.08023126423358917, |
| "learning_rate": 0.0004688754693392034, |
| "loss": 0.084, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.8954555630176853, |
| "grad_norm": 0.1279716193675995, |
| "learning_rate": 0.0004687437110796895, |
| "loss": 0.0851, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.9514215357062906, |
| "grad_norm": 0.09486804157495499, |
| "learning_rate": 0.0004686119528201756, |
| "loss": 0.0837, |
| "step": 8500 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.10559933632612228, |
| "eval_runtime": 55.647, |
| "eval_samples_per_second": 3415.456, |
| "eval_steps_per_second": 13.352, |
| "step": 8934 |
| }, |
| { |
| "epoch": 1.007387508394896, |
| "grad_norm": 0.10948459804058075, |
| "learning_rate": 0.0004684801945606617, |
| "loss": 0.0835, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.0633534810835013, |
| "grad_norm": 0.1183663085103035, |
| "learning_rate": 0.0004683486998176668, |
| "loss": 0.0808, |
| "step": 9500 |
| }, |
| { |
| "epoch": 1.1193194537721065, |
| "grad_norm": 0.10495132952928543, |
| "learning_rate": 0.00046821694155815293, |
| "loss": 0.0823, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.1752854264607118, |
| "grad_norm": 0.11127595603466034, |
| "learning_rate": 0.000468085183298639, |
| "loss": 0.0821, |
| "step": 10500 |
| }, |
| { |
| "epoch": 1.2312513991493172, |
| "grad_norm": 0.11221757531166077, |
| "learning_rate": 0.0004679534250391251, |
| "loss": 0.0827, |
| "step": 11000 |
| }, |
| { |
| "epoch": 1.2872173718379225, |
| "grad_norm": 0.10758490115404129, |
| "learning_rate": 0.0004678216667796112, |
| "loss": 0.0832, |
| "step": 11500 |
| }, |
| { |
| "epoch": 1.343183344526528, |
| "grad_norm": 0.11715873330831528, |
| "learning_rate": 0.00046768990852009734, |
| "loss": 0.0832, |
| "step": 12000 |
| }, |
| { |
| "epoch": 1.3991493172151332, |
| "grad_norm": 0.1374848484992981, |
| "learning_rate": 0.0004675581502605834, |
| "loss": 0.0828, |
| "step": 12500 |
| }, |
| { |
| "epoch": 1.4551152899037385, |
| "grad_norm": 0.0902022272348404, |
| "learning_rate": 0.0004674263920010695, |
| "loss": 0.0831, |
| "step": 13000 |
| }, |
| { |
| "epoch": 1.5110812625923438, |
| "grad_norm": 0.09007969498634338, |
| "learning_rate": 0.0004672946337415556, |
| "loss": 0.0828, |
| "step": 13500 |
| }, |
| { |
| "epoch": 1.567047235280949, |
| "grad_norm": 0.11341474205255508, |
| "learning_rate": 0.0004671628754820417, |
| "loss": 0.0829, |
| "step": 14000 |
| }, |
| { |
| "epoch": 1.6230132079695545, |
| "grad_norm": 0.10352347791194916, |
| "learning_rate": 0.0004670311172225278, |
| "loss": 0.083, |
| "step": 14500 |
| }, |
| { |
| "epoch": 1.67897918065816, |
| "grad_norm": 0.09623808413743973, |
| "learning_rate": 0.00046689935896301394, |
| "loss": 0.0831, |
| "step": 15000 |
| }, |
| { |
| "epoch": 1.7349451533467652, |
| "grad_norm": 0.1119527593255043, |
| "learning_rate": 0.0004667676007035, |
| "loss": 0.0834, |
| "step": 15500 |
| }, |
| { |
| "epoch": 1.7909111260353705, |
| "grad_norm": 0.09531886875629425, |
| "learning_rate": 0.0004666358424439861, |
| "loss": 0.0828, |
| "step": 16000 |
| }, |
| { |
| "epoch": 1.8468770987239758, |
| "grad_norm": 0.11060699075460434, |
| "learning_rate": 0.0004665043477009912, |
| "loss": 0.0838, |
| "step": 16500 |
| }, |
| { |
| "epoch": 1.902843071412581, |
| "grad_norm": 0.09451009333133698, |
| "learning_rate": 0.0004663728529579964, |
| "loss": 0.0827, |
| "step": 17000 |
| }, |
| { |
| "epoch": 1.9588090441011865, |
| "grad_norm": 0.09581800550222397, |
| "learning_rate": 0.00046624109469848245, |
| "loss": 0.0839, |
| "step": 17500 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 0.10421828925609589, |
| "eval_runtime": 55.9875, |
| "eval_samples_per_second": 3394.685, |
| "eval_steps_per_second": 13.271, |
| "step": 17868 |
| }, |
| { |
| "epoch": 2.014775016789792, |
| "grad_norm": 0.0865124985575676, |
| "learning_rate": 0.00046610933643896857, |
| "loss": 0.0849, |
| "step": 18000 |
| }, |
| { |
| "epoch": 2.0707409894783972, |
| "grad_norm": 0.1099601686000824, |
| "learning_rate": 0.0004659778416959737, |
| "loss": 0.0856, |
| "step": 18500 |
| }, |
| { |
| "epoch": 2.1267069621670025, |
| "grad_norm": 0.10624418407678604, |
| "learning_rate": 0.0004658460834364598, |
| "loss": 0.0876, |
| "step": 19000 |
| }, |
| { |
| "epoch": 2.1826729348556078, |
| "grad_norm": 0.11239995807409286, |
| "learning_rate": 0.00046571432517694586, |
| "loss": 0.0884, |
| "step": 19500 |
| }, |
| { |
| "epoch": 2.238638907544213, |
| "grad_norm": 0.10761316865682602, |
| "learning_rate": 0.000465582566917432, |
| "loss": 0.0881, |
| "step": 20000 |
| }, |
| { |
| "epoch": 2.2946048802328183, |
| "grad_norm": 0.13719366490840912, |
| "learning_rate": 0.0004654508086579181, |
| "loss": 0.088, |
| "step": 20500 |
| }, |
| { |
| "epoch": 2.3505708529214235, |
| "grad_norm": 0.12646299600601196, |
| "learning_rate": 0.0004653190503984042, |
| "loss": 0.0883, |
| "step": 21000 |
| }, |
| { |
| "epoch": 2.4065368256100292, |
| "grad_norm": 0.10855542868375778, |
| "learning_rate": 0.00046518729213889027, |
| "loss": 0.0894, |
| "step": 21500 |
| }, |
| { |
| "epoch": 2.4625027982986345, |
| "grad_norm": 0.1014239639043808, |
| "learning_rate": 0.0004650555338793764, |
| "loss": 0.089, |
| "step": 22000 |
| }, |
| { |
| "epoch": 2.5184687709872398, |
| "grad_norm": 0.09801892936229706, |
| "learning_rate": 0.00046492377561986245, |
| "loss": 0.0892, |
| "step": 22500 |
| }, |
| { |
| "epoch": 2.574434743675845, |
| "grad_norm": 0.11079917848110199, |
| "learning_rate": 0.0004647920173603486, |
| "loss": 0.0889, |
| "step": 23000 |
| }, |
| { |
| "epoch": 2.6304007163644503, |
| "grad_norm": 0.10639207065105438, |
| "learning_rate": 0.0004646605226173537, |
| "loss": 0.089, |
| "step": 23500 |
| }, |
| { |
| "epoch": 2.686366689053056, |
| "grad_norm": 0.10807494074106216, |
| "learning_rate": 0.00046452876435783985, |
| "loss": 0.0884, |
| "step": 24000 |
| }, |
| { |
| "epoch": 2.742332661741661, |
| "grad_norm": 0.11594618856906891, |
| "learning_rate": 0.0004643970060983259, |
| "loss": 0.089, |
| "step": 24500 |
| }, |
| { |
| "epoch": 2.7982986344302665, |
| "grad_norm": 0.12219995260238647, |
| "learning_rate": 0.00046426524783881203, |
| "loss": 0.0885, |
| "step": 25000 |
| }, |
| { |
| "epoch": 2.8542646071188718, |
| "grad_norm": 0.10122818499803543, |
| "learning_rate": 0.0004641334895792981, |
| "loss": 0.0896, |
| "step": 25500 |
| }, |
| { |
| "epoch": 2.910230579807477, |
| "grad_norm": 0.10384261608123779, |
| "learning_rate": 0.0004640017313197842, |
| "loss": 0.0891, |
| "step": 26000 |
| }, |
| { |
| "epoch": 2.9661965524960823, |
| "grad_norm": 0.11421239376068115, |
| "learning_rate": 0.0004638702365767893, |
| "loss": 0.0882, |
| "step": 26500 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_loss": 0.10217838734388351, |
| "eval_runtime": 55.7151, |
| "eval_samples_per_second": 3411.283, |
| "eval_steps_per_second": 13.336, |
| "step": 26802 |
| }, |
| { |
| "epoch": 3.0221625251846875, |
| "grad_norm": 0.10075175762176514, |
| "learning_rate": 0.00046373847831727543, |
| "loss": 0.0873, |
| "step": 27000 |
| }, |
| { |
| "epoch": 3.0781284978732932, |
| "grad_norm": 0.10860750824213028, |
| "learning_rate": 0.0004636067200577615, |
| "loss": 0.0848, |
| "step": 27500 |
| }, |
| { |
| "epoch": 3.1340944705618985, |
| "grad_norm": 0.08900290727615356, |
| "learning_rate": 0.00046347496179824767, |
| "loss": 0.0849, |
| "step": 28000 |
| }, |
| { |
| "epoch": 3.1900604432505038, |
| "grad_norm": 0.09614147245883942, |
| "learning_rate": 0.00046334320353873373, |
| "loss": 0.085, |
| "step": 28500 |
| }, |
| { |
| "epoch": 3.246026415939109, |
| "grad_norm": 0.10400953888893127, |
| "learning_rate": 0.00046321144527921985, |
| "loss": 0.086, |
| "step": 29000 |
| }, |
| { |
| "epoch": 3.3019923886277143, |
| "grad_norm": 0.09544303268194199, |
| "learning_rate": 0.0004630796870197059, |
| "loss": 0.0859, |
| "step": 29500 |
| }, |
| { |
| "epoch": 3.3579583613163195, |
| "grad_norm": 0.10353045165538788, |
| "learning_rate": 0.00046294792876019203, |
| "loss": 0.0863, |
| "step": 30000 |
| }, |
| { |
| "epoch": 3.413924334004925, |
| "grad_norm": 0.10539606213569641, |
| "learning_rate": 0.00046281669753371623, |
| "loss": 0.0863, |
| "step": 30500 |
| }, |
| { |
| "epoch": 3.4698903066935305, |
| "grad_norm": 0.09320353716611862, |
| "learning_rate": 0.0004626849392742023, |
| "loss": 0.0868, |
| "step": 31000 |
| }, |
| { |
| "epoch": 3.5258562793821357, |
| "grad_norm": 0.09406547993421555, |
| "learning_rate": 0.0004625531810146884, |
| "loss": 0.0871, |
| "step": 31500 |
| }, |
| { |
| "epoch": 3.581822252070741, |
| "grad_norm": 0.11765696853399277, |
| "learning_rate": 0.0004624214227551745, |
| "loss": 0.0859, |
| "step": 32000 |
| }, |
| { |
| "epoch": 3.6377882247593463, |
| "grad_norm": 0.10832593590021133, |
| "learning_rate": 0.0004622896644956606, |
| "loss": 0.0869, |
| "step": 32500 |
| }, |
| { |
| "epoch": 3.6937541974479515, |
| "grad_norm": 0.0849902331829071, |
| "learning_rate": 0.0004621579062361467, |
| "loss": 0.0865, |
| "step": 33000 |
| }, |
| { |
| "epoch": 3.7497201701365572, |
| "grad_norm": 0.11597160995006561, |
| "learning_rate": 0.00046202667500967086, |
| "loss": 0.0869, |
| "step": 33500 |
| }, |
| { |
| "epoch": 3.805686142825162, |
| "grad_norm": 0.09491798281669617, |
| "learning_rate": 0.0004618949167501569, |
| "loss": 0.0876, |
| "step": 34000 |
| }, |
| { |
| "epoch": 3.8616521155137677, |
| "grad_norm": 0.10581225156784058, |
| "learning_rate": 0.00046176315849064304, |
| "loss": 0.087, |
| "step": 34500 |
| }, |
| { |
| "epoch": 3.917618088202373, |
| "grad_norm": 0.08706603944301605, |
| "learning_rate": 0.00046163140023112916, |
| "loss": 0.0865, |
| "step": 35000 |
| }, |
| { |
| "epoch": 3.9735840608909783, |
| "grad_norm": 0.09158705174922943, |
| "learning_rate": 0.0004614996419716153, |
| "loss": 0.0863, |
| "step": 35500 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_loss": 0.10069680213928223, |
| "eval_runtime": 55.9026, |
| "eval_samples_per_second": 3399.842, |
| "eval_steps_per_second": 13.291, |
| "step": 35736 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 1786800, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 200, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 15, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.5477038775690854e+17, |
| "train_batch_size": 128, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|