| { | |
| "best_metric": 0.88762098828324, | |
| "best_model_checkpoint": "test-glue/checkpoint-98176", | |
| "epoch": 4.0, | |
| "global_step": 98176, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.020371577574967405, | |
| "learning_rate": 1.9898142112125166e-05, | |
| "loss": 0.659133544921875, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.04074315514993481, | |
| "learning_rate": 1.9796284224250328e-05, | |
| "loss": 0.5110537109375, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.06111473272490222, | |
| "learning_rate": 1.969442633637549e-05, | |
| "loss": 0.485730224609375, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.08148631029986962, | |
| "learning_rate": 1.9592568448500654e-05, | |
| "loss": 0.48240771484375, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.10185788787483703, | |
| "learning_rate": 1.9490710560625816e-05, | |
| "loss": 0.46434228515625, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.12222946544980444, | |
| "learning_rate": 1.938885267275098e-05, | |
| "loss": 0.449291259765625, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.14260104302477183, | |
| "learning_rate": 1.9286994784876142e-05, | |
| "loss": 0.4585693359375, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.16297262059973924, | |
| "learning_rate": 1.9185136897001307e-05, | |
| "loss": 0.44105908203125, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.18334419817470665, | |
| "learning_rate": 1.9083279009126468e-05, | |
| "loss": 0.444558837890625, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.20371577574967406, | |
| "learning_rate": 1.898142112125163e-05, | |
| "loss": 0.45100146484375, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.22408735332464147, | |
| "learning_rate": 1.8879563233376795e-05, | |
| "loss": 0.448494140625, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.24445893089960888, | |
| "learning_rate": 1.8777705345501956e-05, | |
| "loss": 0.4301044921875, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.2648305084745763, | |
| "learning_rate": 1.867584745762712e-05, | |
| "loss": 0.42643359375, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.28520208604954367, | |
| "learning_rate": 1.8573989569752282e-05, | |
| "loss": 0.41756689453125, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.3055736636245111, | |
| "learning_rate": 1.8472131681877447e-05, | |
| "loss": 0.4308828125, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.3259452411994785, | |
| "learning_rate": 1.837027379400261e-05, | |
| "loss": 0.41683154296875, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.34631681877444587, | |
| "learning_rate": 1.8268415906127773e-05, | |
| "loss": 0.413720703125, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.3666883963494133, | |
| "learning_rate": 1.8166558018252935e-05, | |
| "loss": 0.425869140625, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.3870599739243807, | |
| "learning_rate": 1.80647001303781e-05, | |
| "loss": 0.42243359375, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.4074315514993481, | |
| "learning_rate": 1.796284224250326e-05, | |
| "loss": 0.415326171875, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.4278031290743155, | |
| "learning_rate": 1.7860984354628423e-05, | |
| "loss": 0.4072890625, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.44817470664928294, | |
| "learning_rate": 1.7759126466753587e-05, | |
| "loss": 0.4062841796875, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.4685462842242503, | |
| "learning_rate": 1.765726857887875e-05, | |
| "loss": 0.3890205078125, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.48891786179921776, | |
| "learning_rate": 1.7555410691003914e-05, | |
| "loss": 0.3969296875, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.5092894393741851, | |
| "learning_rate": 1.7453552803129075e-05, | |
| "loss": 0.3952880859375, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.5296610169491526, | |
| "learning_rate": 1.7351694915254237e-05, | |
| "loss": 0.41177734375, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.5500325945241199, | |
| "learning_rate": 1.72498370273794e-05, | |
| "loss": 0.3807109375, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.5704041720990873, | |
| "learning_rate": 1.7147979139504566e-05, | |
| "loss": 0.403853515625, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.5907757496740548, | |
| "learning_rate": 1.7046121251629728e-05, | |
| "loss": 0.411607421875, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.6111473272490222, | |
| "learning_rate": 1.6944263363754893e-05, | |
| "loss": 0.3996162109375, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.6315189048239895, | |
| "learning_rate": 1.6842405475880054e-05, | |
| "loss": 0.3948984375, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.651890482398957, | |
| "learning_rate": 1.6740547588005215e-05, | |
| "loss": 0.389115234375, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.6722620599739244, | |
| "learning_rate": 1.663868970013038e-05, | |
| "loss": 0.403466796875, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.6926336375488917, | |
| "learning_rate": 1.6536831812255542e-05, | |
| "loss": 0.4059140625, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.7130052151238592, | |
| "learning_rate": 1.6434973924380707e-05, | |
| "loss": 0.3873095703125, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.7333767926988266, | |
| "learning_rate": 1.6333116036505868e-05, | |
| "loss": 0.3887783203125, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.753748370273794, | |
| "learning_rate": 1.623125814863103e-05, | |
| "loss": 0.395041015625, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.7741199478487614, | |
| "learning_rate": 1.6129400260756194e-05, | |
| "loss": 0.39855078125, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.7944915254237288, | |
| "learning_rate": 1.6027542372881356e-05, | |
| "loss": 0.390134765625, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.8148631029986962, | |
| "learning_rate": 1.592568448500652e-05, | |
| "loss": 0.3717578125, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.8352346805736637, | |
| "learning_rate": 1.5823826597131685e-05, | |
| "loss": 0.387556640625, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.855606258148631, | |
| "learning_rate": 1.5721968709256844e-05, | |
| "loss": 0.389892578125, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.8759778357235984, | |
| "learning_rate": 1.562011082138201e-05, | |
| "loss": 0.38671875, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.8963494132985659, | |
| "learning_rate": 1.5518252933507173e-05, | |
| "loss": 0.36665234375, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.9167209908735332, | |
| "learning_rate": 1.5416395045632335e-05, | |
| "loss": 0.3940859375, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.9370925684485006, | |
| "learning_rate": 1.53145371577575e-05, | |
| "loss": 0.382552734375, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.9574641460234681, | |
| "learning_rate": 1.5212679269882663e-05, | |
| "loss": 0.38312109375, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.9778357235984355, | |
| "learning_rate": 1.5110821382007822e-05, | |
| "loss": 0.3804765625, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.9982073011734028, | |
| "learning_rate": 1.5008963494132987e-05, | |
| "loss": 0.383294921875, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.8807947019867549, | |
| "eval_loss": 0.3417690396308899, | |
| "step": 24544 | |
| }, | |
| { | |
| "epoch": 1.0185788787483703, | |
| "learning_rate": 1.490710560625815e-05, | |
| "loss": 0.293595703125, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 1.0389504563233376, | |
| "learning_rate": 1.4805247718383314e-05, | |
| "loss": 0.29225, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 1.0593220338983051, | |
| "learning_rate": 1.4703389830508477e-05, | |
| "loss": 0.294130859375, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 1.0796936114732725, | |
| "learning_rate": 1.4601531942633638e-05, | |
| "loss": 0.28494921875, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 1.1000651890482398, | |
| "learning_rate": 1.4499674054758801e-05, | |
| "loss": 0.28199609375, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 1.1204367666232073, | |
| "learning_rate": 1.4397816166883964e-05, | |
| "loss": 0.28830078125, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 1.1408083441981747, | |
| "learning_rate": 1.4295958279009128e-05, | |
| "loss": 0.283265625, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 1.161179921773142, | |
| "learning_rate": 1.419410039113429e-05, | |
| "loss": 0.284984375, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 1.1815514993481095, | |
| "learning_rate": 1.4092242503259454e-05, | |
| "loss": 0.2741875, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 1.2019230769230769, | |
| "learning_rate": 1.3990384615384615e-05, | |
| "loss": 0.293828125, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 1.2222946544980444, | |
| "learning_rate": 1.3888526727509778e-05, | |
| "loss": 0.291970703125, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 1.2426662320730117, | |
| "learning_rate": 1.3786668839634942e-05, | |
| "loss": 0.2823515625, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 1.263037809647979, | |
| "learning_rate": 1.3684810951760106e-05, | |
| "loss": 0.2898671875, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 1.2834093872229466, | |
| "learning_rate": 1.358295306388527e-05, | |
| "loss": 0.291859375, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 1.303780964797914, | |
| "learning_rate": 1.3481095176010431e-05, | |
| "loss": 0.292462890625, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 1.3241525423728815, | |
| "learning_rate": 1.3379237288135594e-05, | |
| "loss": 0.29555859375, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 1.3445241199478488, | |
| "learning_rate": 1.3277379400260757e-05, | |
| "loss": 0.288212890625, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 1.3648956975228161, | |
| "learning_rate": 1.317552151238592e-05, | |
| "loss": 0.294849609375, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 1.3852672750977835, | |
| "learning_rate": 1.3073663624511084e-05, | |
| "loss": 0.289576171875, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 1.405638852672751, | |
| "learning_rate": 1.2971805736636247e-05, | |
| "loss": 0.289185546875, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 1.4260104302477183, | |
| "learning_rate": 1.2869947848761408e-05, | |
| "loss": 0.283068359375, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 1.4463820078226859, | |
| "learning_rate": 1.2768089960886571e-05, | |
| "loss": 0.28625390625, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 1.4667535853976532, | |
| "learning_rate": 1.2666232073011735e-05, | |
| "loss": 0.282048828125, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 1.4871251629726205, | |
| "learning_rate": 1.2564374185136898e-05, | |
| "loss": 0.28896484375, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 1.5074967405475879, | |
| "learning_rate": 1.2462516297262063e-05, | |
| "loss": 0.276443359375, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 1.5278683181225554, | |
| "learning_rate": 1.2360658409387226e-05, | |
| "loss": 0.310044921875, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 1.548239895697523, | |
| "learning_rate": 1.2258800521512385e-05, | |
| "loss": 0.285341796875, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 1.5686114732724903, | |
| "learning_rate": 1.215694263363755e-05, | |
| "loss": 0.282486328125, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 1.5889830508474576, | |
| "learning_rate": 1.2055084745762713e-05, | |
| "loss": 0.295353515625, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 1.609354628422425, | |
| "learning_rate": 1.1953226857887877e-05, | |
| "loss": 0.284724609375, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 1.6297262059973925, | |
| "learning_rate": 1.185136897001304e-05, | |
| "loss": 0.286890625, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 1.6500977835723598, | |
| "learning_rate": 1.1749511082138201e-05, | |
| "loss": 0.278595703125, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 1.6704693611473274, | |
| "learning_rate": 1.1647653194263364e-05, | |
| "loss": 0.30148046875, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 1.6908409387222947, | |
| "learning_rate": 1.1545795306388527e-05, | |
| "loss": 0.274353515625, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 1.711212516297262, | |
| "learning_rate": 1.144393741851369e-05, | |
| "loss": 0.29472265625, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 1.7315840938722293, | |
| "learning_rate": 1.1342079530638854e-05, | |
| "loss": 0.2853359375, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 1.7519556714471969, | |
| "learning_rate": 1.1240221642764017e-05, | |
| "loss": 0.286759765625, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 1.7723272490221644, | |
| "learning_rate": 1.1138363754889178e-05, | |
| "loss": 0.28123046875, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 1.7926988265971318, | |
| "learning_rate": 1.1036505867014341e-05, | |
| "loss": 0.2833515625, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 1.813070404172099, | |
| "learning_rate": 1.0934647979139506e-05, | |
| "loss": 0.281861328125, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 1.8334419817470664, | |
| "learning_rate": 1.083279009126467e-05, | |
| "loss": 0.2866171875, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 1.8538135593220337, | |
| "learning_rate": 1.0730932203389833e-05, | |
| "loss": 0.28151953125, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 1.8741851368970013, | |
| "learning_rate": 1.0629074315514994e-05, | |
| "loss": 0.285787109375, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 1.8945567144719688, | |
| "learning_rate": 1.0527216427640157e-05, | |
| "loss": 0.28494921875, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 1.9149282920469362, | |
| "learning_rate": 1.042535853976532e-05, | |
| "loss": 0.281390625, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 1.9352998696219035, | |
| "learning_rate": 1.0323500651890483e-05, | |
| "loss": 0.2822734375, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 1.9556714471968708, | |
| "learning_rate": 1.0221642764015647e-05, | |
| "loss": 0.2885546875, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 1.9760430247718384, | |
| "learning_rate": 1.011978487614081e-05, | |
| "loss": 0.2759296875, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 1.996414602346806, | |
| "learning_rate": 1.0017926988265971e-05, | |
| "loss": 0.27166796875, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.8865002547121752, | |
| "eval_loss": 0.34813082218170166, | |
| "step": 49088 | |
| }, | |
| { | |
| "epoch": 2.0167861799217732, | |
| "learning_rate": 9.916069100391134e-06, | |
| "loss": 0.2067265625, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 2.0371577574967406, | |
| "learning_rate": 9.814211212516298e-06, | |
| "loss": 0.18627734375, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 2.057529335071708, | |
| "learning_rate": 9.71235332464146e-06, | |
| "loss": 0.20591796875, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 2.077900912646675, | |
| "learning_rate": 9.610495436766624e-06, | |
| "loss": 0.207765625, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 2.098272490221643, | |
| "learning_rate": 9.508637548891787e-06, | |
| "loss": 0.200046875, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 2.1186440677966103, | |
| "learning_rate": 9.40677966101695e-06, | |
| "loss": 0.20955078125, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 2.1390156453715776, | |
| "learning_rate": 9.304921773142113e-06, | |
| "loss": 0.20408203125, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 2.159387222946545, | |
| "learning_rate": 9.203063885267276e-06, | |
| "loss": 0.1967578125, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 2.1797588005215123, | |
| "learning_rate": 9.101205997392438e-06, | |
| "loss": 0.2086171875, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 2.2001303780964796, | |
| "learning_rate": 8.999348109517601e-06, | |
| "loss": 0.19387890625, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 2.2205019556714474, | |
| "learning_rate": 8.897490221642766e-06, | |
| "loss": 0.2149140625, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 2.2408735332464147, | |
| "learning_rate": 8.795632333767927e-06, | |
| "loss": 0.20816015625, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 2.261245110821382, | |
| "learning_rate": 8.69377444589309e-06, | |
| "loss": 0.1981796875, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 2.2816166883963493, | |
| "learning_rate": 8.591916558018254e-06, | |
| "loss": 0.20524609375, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 2.3019882659713167, | |
| "learning_rate": 8.490058670143417e-06, | |
| "loss": 0.20031640625, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 2.322359843546284, | |
| "learning_rate": 8.38820078226858e-06, | |
| "loss": 0.2170234375, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 2.3427314211212518, | |
| "learning_rate": 8.286342894393743e-06, | |
| "loss": 0.202625, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 2.363102998696219, | |
| "learning_rate": 8.184485006518904e-06, | |
| "loss": 0.21112890625, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 2.3834745762711864, | |
| "learning_rate": 8.08262711864407e-06, | |
| "loss": 0.19984375, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 2.4038461538461537, | |
| "learning_rate": 7.980769230769232e-06, | |
| "loss": 0.2016015625, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 2.424217731421121, | |
| "learning_rate": 7.878911342894394e-06, | |
| "loss": 0.20296875, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 2.444589308996089, | |
| "learning_rate": 7.777053455019557e-06, | |
| "loss": 0.20494140625, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 2.464960886571056, | |
| "learning_rate": 7.67519556714472e-06, | |
| "loss": 0.19887890625, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 2.4853324641460235, | |
| "learning_rate": 7.573337679269883e-06, | |
| "loss": 0.2058984375, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 2.505704041720991, | |
| "learning_rate": 7.4714797913950464e-06, | |
| "loss": 0.2057265625, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 2.526075619295958, | |
| "learning_rate": 7.369621903520209e-06, | |
| "loss": 0.1991484375, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 2.5464471968709255, | |
| "learning_rate": 7.267764015645372e-06, | |
| "loss": 0.2016328125, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 2.5668187744458932, | |
| "learning_rate": 7.165906127770536e-06, | |
| "loss": 0.2006640625, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 2.5871903520208606, | |
| "learning_rate": 7.064048239895698e-06, | |
| "loss": 0.20136328125, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 2.607561929595828, | |
| "learning_rate": 6.962190352020861e-06, | |
| "loss": 0.210953125, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 2.627933507170795, | |
| "learning_rate": 6.8603324641460245e-06, | |
| "loss": 0.20728515625, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 2.648305084745763, | |
| "learning_rate": 6.758474576271187e-06, | |
| "loss": 0.19809765625, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 2.6686766623207303, | |
| "learning_rate": 6.65661668839635e-06, | |
| "loss": 0.20791796875, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 2.6890482398956976, | |
| "learning_rate": 6.554758800521513e-06, | |
| "loss": 0.1968359375, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 2.709419817470665, | |
| "learning_rate": 6.452900912646675e-06, | |
| "loss": 0.19569140625, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 2.7297913950456323, | |
| "learning_rate": 6.351043024771839e-06, | |
| "loss": 0.21024609375, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 2.7501629726205996, | |
| "learning_rate": 6.249185136897002e-06, | |
| "loss": 0.20680859375, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 2.770534550195567, | |
| "learning_rate": 6.147327249022165e-06, | |
| "loss": 0.206203125, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 2.7909061277705347, | |
| "learning_rate": 6.045469361147328e-06, | |
| "loss": 0.1981015625, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 2.811277705345502, | |
| "learning_rate": 5.94361147327249e-06, | |
| "loss": 0.20408984375, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 2.8316492829204694, | |
| "learning_rate": 5.841753585397653e-06, | |
| "loss": 0.20057421875, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 2.8520208604954367, | |
| "learning_rate": 5.739895697522817e-06, | |
| "loss": 0.19898046875, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 2.872392438070404, | |
| "learning_rate": 5.63803780964798e-06, | |
| "loss": 0.19495703125, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 2.8927640156453718, | |
| "learning_rate": 5.536179921773143e-06, | |
| "loss": 0.1974296875, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 2.913135593220339, | |
| "learning_rate": 5.434322033898306e-06, | |
| "loss": 0.20246484375, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 2.9335071707953064, | |
| "learning_rate": 5.332464146023468e-06, | |
| "loss": 0.19712109375, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 2.9538787483702738, | |
| "learning_rate": 5.230606258148631e-06, | |
| "loss": 0.202109375, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 2.974250325945241, | |
| "learning_rate": 5.128748370273794e-06, | |
| "loss": 0.2054140625, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 2.9946219035202084, | |
| "learning_rate": 5.026890482398958e-06, | |
| "loss": 0.202515625, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.884666327050433, | |
| "eval_loss": 0.4447501301765442, | |
| "step": 73632 | |
| }, | |
| { | |
| "epoch": 3.014993481095176, | |
| "learning_rate": 4.92503259452412e-06, | |
| "loss": 0.13929296875, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 3.0353650586701435, | |
| "learning_rate": 4.823174706649283e-06, | |
| "loss": 0.14209765625, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 3.055736636245111, | |
| "learning_rate": 4.721316818774446e-06, | |
| "loss": 0.14396875, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 3.076108213820078, | |
| "learning_rate": 4.6194589308996094e-06, | |
| "loss": 0.13436328125, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 3.0964797913950455, | |
| "learning_rate": 4.5176010430247726e-06, | |
| "loss": 0.13274609375, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 3.1168513689700132, | |
| "learning_rate": 4.415743155149935e-06, | |
| "loss": 0.1425546875, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 3.1372229465449806, | |
| "learning_rate": 4.313885267275098e-06, | |
| "loss": 0.1465, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 3.157594524119948, | |
| "learning_rate": 4.212027379400261e-06, | |
| "loss": 0.13299609375, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 3.1779661016949152, | |
| "learning_rate": 4.110169491525424e-06, | |
| "loss": 0.146578125, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 3.1983376792698825, | |
| "learning_rate": 4.0083116036505874e-06, | |
| "loss": 0.1382421875, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 3.21870925684485, | |
| "learning_rate": 3.90645371577575e-06, | |
| "loss": 0.14358984375, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 3.2390808344198176, | |
| "learning_rate": 3.8045958279009133e-06, | |
| "loss": 0.1374140625, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 3.259452411994785, | |
| "learning_rate": 3.702737940026076e-06, | |
| "loss": 0.14011328125, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 3.2798239895697523, | |
| "learning_rate": 3.6008800521512388e-06, | |
| "loss": 0.14218359375, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 3.3001955671447196, | |
| "learning_rate": 3.4990221642764015e-06, | |
| "loss": 0.14026953125, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 3.320567144719687, | |
| "learning_rate": 3.397164276401565e-06, | |
| "loss": 0.1326171875, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 3.3409387222946547, | |
| "learning_rate": 3.2953063885267278e-06, | |
| "loss": 0.124484375, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 3.361310299869622, | |
| "learning_rate": 3.1934485006518905e-06, | |
| "loss": 0.13862890625, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 3.3816818774445894, | |
| "learning_rate": 3.091590612777054e-06, | |
| "loss": 0.14094140625, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 3.4020534550195567, | |
| "learning_rate": 2.9897327249022168e-06, | |
| "loss": 0.13262890625, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 3.422425032594524, | |
| "learning_rate": 2.8878748370273795e-06, | |
| "loss": 0.14966015625, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 3.4427966101694913, | |
| "learning_rate": 2.7860169491525422e-06, | |
| "loss": 0.136, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 3.463168187744459, | |
| "learning_rate": 2.684159061277706e-06, | |
| "loss": 0.15666796875, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 3.4835397653194264, | |
| "learning_rate": 2.5823011734028685e-06, | |
| "loss": 0.12829296875, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 3.5039113428943938, | |
| "learning_rate": 2.4804432855280312e-06, | |
| "loss": 0.134625, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 3.524282920469361, | |
| "learning_rate": 2.3785853976531944e-06, | |
| "loss": 0.1383125, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 3.5446544980443284, | |
| "learning_rate": 2.2767275097783575e-06, | |
| "loss": 0.145578125, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 3.565026075619296, | |
| "learning_rate": 2.1748696219035202e-06, | |
| "loss": 0.13809765625, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 3.5853976531942635, | |
| "learning_rate": 2.0730117340286834e-06, | |
| "loss": 0.131578125, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 3.605769230769231, | |
| "learning_rate": 1.971153846153846e-06, | |
| "loss": 0.128625, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 3.626140808344198, | |
| "learning_rate": 1.8692959582790093e-06, | |
| "loss": 0.1566328125, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 3.6465123859191655, | |
| "learning_rate": 1.7674380704041722e-06, | |
| "loss": 0.13289453125, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 3.666883963494133, | |
| "learning_rate": 1.6655801825293353e-06, | |
| "loss": 0.13489453125, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 3.6872555410691, | |
| "learning_rate": 1.563722294654498e-06, | |
| "loss": 0.1342890625, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 3.707627118644068, | |
| "learning_rate": 1.4618644067796612e-06, | |
| "loss": 0.13596875, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 3.7279986962190352, | |
| "learning_rate": 1.3600065189048241e-06, | |
| "loss": 0.12205859375, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 3.7483702737940026, | |
| "learning_rate": 1.258148631029987e-06, | |
| "loss": 0.136265625, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 3.76874185136897, | |
| "learning_rate": 1.15629074315515e-06, | |
| "loss": 0.1251875, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 3.7891134289439377, | |
| "learning_rate": 1.0544328552803131e-06, | |
| "loss": 0.1440859375, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 3.809485006518905, | |
| "learning_rate": 9.52574967405476e-07, | |
| "loss": 0.14747265625, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 3.8298565840938723, | |
| "learning_rate": 8.507170795306389e-07, | |
| "loss": 0.12144921875, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 3.8502281616688396, | |
| "learning_rate": 7.488591916558019e-07, | |
| "loss": 0.138546875, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 3.870599739243807, | |
| "learning_rate": 6.470013037809649e-07, | |
| "loss": 0.1290390625, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 3.8909713168187743, | |
| "learning_rate": 5.451434159061278e-07, | |
| "loss": 0.13651953125, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 3.9113428943937416, | |
| "learning_rate": 4.432855280312908e-07, | |
| "loss": 0.14065234375, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 3.9317144719687094, | |
| "learning_rate": 3.4142764015645373e-07, | |
| "loss": 0.129953125, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 3.9520860495436767, | |
| "learning_rate": 2.395697522816167e-07, | |
| "loss": 0.127765625, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 3.972457627118644, | |
| "learning_rate": 1.3771186440677968e-07, | |
| "loss": 0.12146875, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 3.9928292046936114, | |
| "learning_rate": 3.585397653194264e-08, | |
| "loss": 0.132578125, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.88762098828324, | |
| "eval_loss": 0.5551679134368896, | |
| "step": 98176 | |
| } | |
| ], | |
| "max_steps": 98176, | |
| "num_train_epochs": 4, | |
| "total_flos": 124136967570323400, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |