| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 657, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.015225045199352935, |
| "grad_norm": 0.8015036989610748, |
| "learning_rate": 6.818181818181818e-07, |
| "loss": 1.2434, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.03045009039870587, |
| "grad_norm": 0.5371339027245265, |
| "learning_rate": 1.4393939393939396e-06, |
| "loss": 1.3254, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.04567513559805881, |
| "grad_norm": 0.3873488090138409, |
| "learning_rate": 2.196969696969697e-06, |
| "loss": 1.2972, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.06090018079741174, |
| "grad_norm": 0.3521791179594526, |
| "learning_rate": 2.954545454545455e-06, |
| "loss": 1.3127, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.07612522599676468, |
| "grad_norm": 0.30676127981755036, |
| "learning_rate": 3.7121212121212124e-06, |
| "loss": 1.2638, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.09135027119611762, |
| "grad_norm": 0.4101414956476006, |
| "learning_rate": 4.46969696969697e-06, |
| "loss": 1.3305, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.10657531639547055, |
| "grad_norm": 0.32298711751246617, |
| "learning_rate": 4.999682116415026e-06, |
| "loss": 1.2712, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.12180036159482348, |
| "grad_norm": 0.317442988654654, |
| "learning_rate": 4.9940331012821616e-06, |
| "loss": 1.273, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.1370254067941764, |
| "grad_norm": 0.30534590571797454, |
| "learning_rate": 4.981338376708957e-06, |
| "loss": 1.2204, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.15225045199352935, |
| "grad_norm": 0.33409391048066833, |
| "learning_rate": 4.961633805627912e-06, |
| "loss": 1.2558, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.1674754971928823, |
| "grad_norm": 0.33936819417642766, |
| "learning_rate": 4.934975053973217e-06, |
| "loss": 1.247, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.18270054239223524, |
| "grad_norm": 0.3159856537332395, |
| "learning_rate": 4.901437433423016e-06, |
| "loss": 1.2884, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.19792558759158815, |
| "grad_norm": 0.321908331706447, |
| "learning_rate": 4.861115688641921e-06, |
| "loss": 1.2543, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.2131506327909411, |
| "grad_norm": 0.328440211023217, |
| "learning_rate": 4.814123729624837e-06, |
| "loss": 1.2735, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.22837567799029404, |
| "grad_norm": 0.35087717949475955, |
| "learning_rate": 4.7605943098982075e-06, |
| "loss": 1.2938, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.24360072318964696, |
| "grad_norm": 0.32756221423558446, |
| "learning_rate": 4.7006786514878e-06, |
| "loss": 1.1977, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.2588257683889999, |
| "grad_norm": 0.31899258273232245, |
| "learning_rate": 4.6345460177124816e-06, |
| "loss": 1.2292, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.2740508135883528, |
| "grad_norm": 0.3291697633439208, |
| "learning_rate": 4.5623832350108674e-06, |
| "loss": 1.2431, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.2892758587877058, |
| "grad_norm": 0.31915754548001085, |
| "learning_rate": 4.4843941651517e-06, |
| "loss": 1.2183, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.3045009039870587, |
| "grad_norm": 0.3104063063524443, |
| "learning_rate": 4.400799129318952e-06, |
| "loss": 1.2598, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.3197259491864117, |
| "grad_norm": 0.3277653309557523, |
| "learning_rate": 4.31183428569867e-06, |
| "loss": 1.2197, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.3349509943857646, |
| "grad_norm": 0.3565586762710126, |
| "learning_rate": 4.217750962325845e-06, |
| "loss": 1.2978, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.3501760395851175, |
| "grad_norm": 0.3124052373761214, |
| "learning_rate": 4.11881494707608e-06, |
| "loss": 1.2376, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.3654010847844705, |
| "grad_norm": 0.321888222544292, |
| "learning_rate": 4.015305736807806e-06, |
| "loss": 1.2338, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.3806261299838234, |
| "grad_norm": 0.36420812224810134, |
| "learning_rate": 3.907515747776275e-06, |
| "loss": 1.2556, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.3958511751831763, |
| "grad_norm": 0.2989183187303492, |
| "learning_rate": 3.795749489549904e-06, |
| "loss": 1.2527, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.4110762203825293, |
| "grad_norm": 0.30715884439307567, |
| "learning_rate": 3.680322704762701e-06, |
| "loss": 1.2467, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.4263012655818822, |
| "grad_norm": 0.41263512309014105, |
| "learning_rate": 3.561561477132971e-06, |
| "loss": 1.2592, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.4415263107812351, |
| "grad_norm": 0.2990764181830289, |
| "learning_rate": 3.4398013102681956e-06, |
| "loss": 1.2942, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.4567513559805881, |
| "grad_norm": 0.30817207269780955, |
| "learning_rate": 3.3153861798584696e-06, |
| "loss": 1.2278, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.471976401179941, |
| "grad_norm": 0.3072887178239842, |
| "learning_rate": 3.1886675619360883e-06, |
| "loss": 1.2753, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.4872014463792939, |
| "grad_norm": 0.38789533497763495, |
| "learning_rate": 3.060003439946462e-06, |
| "loss": 1.2951, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.5024264915786468, |
| "grad_norm": 0.28331258754735067, |
| "learning_rate": 2.929757293435419e-06, |
| "loss": 1.2851, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.5176515367779998, |
| "grad_norm": 0.2962487342346661, |
| "learning_rate": 2.7982970712098795e-06, |
| "loss": 1.2702, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.5328765819773528, |
| "grad_norm": 0.3039773975171447, |
| "learning_rate": 2.665994151872755e-06, |
| "loss": 1.2127, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.5481016271767056, |
| "grad_norm": 0.4131494842990153, |
| "learning_rate": 2.5332222946685707e-06, |
| "loss": 1.2551, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.5633266723760586, |
| "grad_norm": 0.3888599336298796, |
| "learning_rate": 2.4003565836037245e-06, |
| "loss": 1.2354, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.5785517175754116, |
| "grad_norm": 0.2793746233865594, |
| "learning_rate": 2.267772367824249e-06, |
| "loss": 1.2819, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.5937767627747644, |
| "grad_norm": 0.2899583112911672, |
| "learning_rate": 2.135844201244556e-06, |
| "loss": 1.2523, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.6090018079741174, |
| "grad_norm": 0.2923000214972851, |
| "learning_rate": 2.0049447844227265e-06, |
| "loss": 1.2759, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.6242268531734704, |
| "grad_norm": 0.2919228747330785, |
| "learning_rate": 1.875443911671579e-06, |
| "loss": 1.2044, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.6394518983728233, |
| "grad_norm": 0.5366405058825244, |
| "learning_rate": 1.7477074263799632e-06, |
| "loss": 1.2527, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.6546769435721762, |
| "grad_norm": 0.29907607930356067, |
| "learning_rate": 1.6220961874955136e-06, |
| "loss": 1.2442, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.6699019887715292, |
| "grad_norm": 0.30050211246658853, |
| "learning_rate": 1.4989650500885838e-06, |
| "loss": 1.2406, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.6851270339708821, |
| "grad_norm": 0.2834575399227386, |
| "learning_rate": 1.3786618628772938e-06, |
| "loss": 1.2197, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.700352079170235, |
| "grad_norm": 0.28730355579070144, |
| "learning_rate": 1.2615264855457037e-06, |
| "loss": 1.26, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.715577124369588, |
| "grad_norm": 0.5094507361581523, |
| "learning_rate": 1.1478898286312231e-06, |
| "loss": 1.3205, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.730802169568941, |
| "grad_norm": 0.2896864936273596, |
| "learning_rate": 1.038072918693596e-06, |
| "loss": 1.2423, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.7460272147682938, |
| "grad_norm": 0.29550104589134707, |
| "learning_rate": 9.323859914063815e-07, |
| "loss": 1.2507, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.7612522599676468, |
| "grad_norm": 0.2721570960991744, |
| "learning_rate": 8.311276151329775e-07, |
| "loss": 1.2683, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.7764773051669998, |
| "grad_norm": 0.2852630721545509, |
| "learning_rate": 7.345838474630993e-07, |
| "loss": 1.2815, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.7917023503663526, |
| "grad_norm": 0.2909871526763653, |
| "learning_rate": 6.430274270925271e-07, |
| "loss": 1.2503, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.8069273955657056, |
| "grad_norm": 0.30499754932838724, |
| "learning_rate": 5.56717003329082e-07, |
| "loss": 1.2364, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.8221524407650586, |
| "grad_norm": 0.2776051695513212, |
| "learning_rate": 4.758964054014931e-07, |
| "loss": 1.2012, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.8373774859644114, |
| "grad_norm": 0.2905227549359444, |
| "learning_rate": 4.0079395363538056e-07, |
| "loss": 1.3095, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.8526025311637644, |
| "grad_norm": 0.2810906800968861, |
| "learning_rate": 3.3162181444230056e-07, |
| "loss": 1.2799, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.8678275763631174, |
| "grad_norm": 0.2687773919781078, |
| "learning_rate": 2.6857540094402365e-07, |
| "loss": 1.2415, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.8830526215624702, |
| "grad_norm": 0.30078684187223376, |
| "learning_rate": 2.1183282092530067e-07, |
| "loss": 1.2361, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.8982776667618232, |
| "grad_norm": 0.27536926671011003, |
| "learning_rate": 1.6155437367466277e-07, |
| "loss": 1.2392, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.9135027119611762, |
| "grad_norm": 0.2753306105776222, |
| "learning_rate": 1.1788209713469195e-07, |
| "loss": 1.2716, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.928727757160529, |
| "grad_norm": 0.274286963333408, |
| "learning_rate": 8.093936664108071e-08, |
| "loss": 1.2109, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.943952802359882, |
| "grad_norm": 0.29590574898063726, |
| "learning_rate": 5.083054638404722e-08, |
| "loss": 1.2449, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.959177847559235, |
| "grad_norm": 0.27873264107936796, |
| "learning_rate": 2.7640694576737125e-08, |
| "loss": 1.2194, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.9744028927585878, |
| "grad_norm": 0.2757612055037619, |
| "learning_rate": 1.1435323163525026e-08, |
| "loss": 1.2431, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.9896279379579408, |
| "grad_norm": 0.2715956982953583, |
| "learning_rate": 2.2602127470383593e-09, |
| "loss": 1.2422, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 657, |
| "total_flos": 718053792808960.0, |
| "train_loss": 1.2574238450559851, |
| "train_runtime": 37309.1102, |
| "train_samples_per_second": 9.013, |
| "train_steps_per_second": 0.018 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 657, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": false, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 718053792808960.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|