| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 9.979036827195468, |
| "eval_steps": 500, |
| "global_step": 4410, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.11331444759206799, |
| "grad_norm": 0.19077692313024183, |
| "learning_rate": 1.977324263038549e-05, |
| "loss": 0.4576, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.22662889518413598, |
| "grad_norm": 0.10054329784988243, |
| "learning_rate": 1.9546485260770977e-05, |
| "loss": 0.2232, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.33994334277620397, |
| "grad_norm": 0.13335758989667854, |
| "learning_rate": 1.9319727891156463e-05, |
| "loss": 0.2207, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.45325779036827196, |
| "grad_norm": 0.10967673214545687, |
| "learning_rate": 1.9092970521541953e-05, |
| "loss": 0.2201, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.56657223796034, |
| "grad_norm": 0.06223139763910794, |
| "learning_rate": 1.886621315192744e-05, |
| "loss": 0.2198, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.6798866855524079, |
| "grad_norm": 0.05759853004285378, |
| "learning_rate": 1.863945578231293e-05, |
| "loss": 0.2189, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.7932011331444759, |
| "grad_norm": 0.07977821109657812, |
| "learning_rate": 1.8412698412698415e-05, |
| "loss": 0.2186, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.9065155807365439, |
| "grad_norm": 0.06262492170582389, |
| "learning_rate": 1.81859410430839e-05, |
| "loss": 0.2184, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.0181303116147309, |
| "grad_norm": 0.06127223989000747, |
| "learning_rate": 1.795918367346939e-05, |
| "loss": 0.2149, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.1314447592067989, |
| "grad_norm": 0.07785843870579129, |
| "learning_rate": 1.7732426303854877e-05, |
| "loss": 0.2181, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.2447592067988669, |
| "grad_norm": 0.07760327110935973, |
| "learning_rate": 1.7505668934240366e-05, |
| "loss": 0.218, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.3580736543909349, |
| "grad_norm": 0.0470083826860542, |
| "learning_rate": 1.7278911564625852e-05, |
| "loss": 0.2179, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.4713881019830028, |
| "grad_norm": 0.06026004336659209, |
| "learning_rate": 1.705215419501134e-05, |
| "loss": 0.2178, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.5847025495750708, |
| "grad_norm": 0.05471808585590988, |
| "learning_rate": 1.6825396825396828e-05, |
| "loss": 0.2182, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.6980169971671388, |
| "grad_norm": 1.5328581672632824, |
| "learning_rate": 1.6598639455782314e-05, |
| "loss": 0.235, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.8113314447592068, |
| "grad_norm": 0.05133721712099911, |
| "learning_rate": 1.63718820861678e-05, |
| "loss": 0.2183, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.9246458923512748, |
| "grad_norm": 0.05383262507774164, |
| "learning_rate": 1.614512471655329e-05, |
| "loss": 0.2178, |
| "step": 850 |
| }, |
| { |
| "epoch": 2.0362606232294618, |
| "grad_norm": 0.06160640640527019, |
| "learning_rate": 1.5918367346938776e-05, |
| "loss": 0.2143, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.1495750708215295, |
| "grad_norm": 0.06624501350294373, |
| "learning_rate": 1.5691609977324265e-05, |
| "loss": 0.2175, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.2628895184135978, |
| "grad_norm": 0.0811471743466332, |
| "learning_rate": 1.546485260770975e-05, |
| "loss": 0.2176, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.376203966005666, |
| "grad_norm": 0.057146830785920345, |
| "learning_rate": 1.523809523809524e-05, |
| "loss": 0.2174, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.4895184135977337, |
| "grad_norm": 0.06486204088579503, |
| "learning_rate": 1.5011337868480727e-05, |
| "loss": 0.2174, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.6028328611898015, |
| "grad_norm": 0.04691063994533604, |
| "learning_rate": 1.4784580498866215e-05, |
| "loss": 0.2175, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.7161473087818697, |
| "grad_norm": 0.04804575617599843, |
| "learning_rate": 1.4557823129251703e-05, |
| "loss": 0.2173, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.829461756373938, |
| "grad_norm": 0.04015710381391272, |
| "learning_rate": 1.433106575963719e-05, |
| "loss": 0.2172, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.9427762039660057, |
| "grad_norm": 0.04270305052811581, |
| "learning_rate": 1.4104308390022677e-05, |
| "loss": 0.2172, |
| "step": 1300 |
| }, |
| { |
| "epoch": 3.0543909348441924, |
| "grad_norm": 0.06472103175267448, |
| "learning_rate": 1.3877551020408165e-05, |
| "loss": 0.2139, |
| "step": 1350 |
| }, |
| { |
| "epoch": 3.1677053824362607, |
| "grad_norm": 0.06154235619681001, |
| "learning_rate": 1.3650793650793652e-05, |
| "loss": 0.2169, |
| "step": 1400 |
| }, |
| { |
| "epoch": 3.2810198300283284, |
| "grad_norm": 0.03889455279515394, |
| "learning_rate": 1.342403628117914e-05, |
| "loss": 0.217, |
| "step": 1450 |
| }, |
| { |
| "epoch": 3.3943342776203966, |
| "grad_norm": 0.04485600346516124, |
| "learning_rate": 1.3197278911564626e-05, |
| "loss": 0.217, |
| "step": 1500 |
| }, |
| { |
| "epoch": 3.507648725212465, |
| "grad_norm": 0.056364414807305335, |
| "learning_rate": 1.2970521541950114e-05, |
| "loss": 0.2169, |
| "step": 1550 |
| }, |
| { |
| "epoch": 3.6209631728045326, |
| "grad_norm": 0.046168847658336246, |
| "learning_rate": 1.2743764172335602e-05, |
| "loss": 0.2169, |
| "step": 1600 |
| }, |
| { |
| "epoch": 3.7342776203966004, |
| "grad_norm": 0.05350341466041888, |
| "learning_rate": 1.251700680272109e-05, |
| "loss": 0.2168, |
| "step": 1650 |
| }, |
| { |
| "epoch": 3.8475920679886686, |
| "grad_norm": 0.05485192425623718, |
| "learning_rate": 1.2290249433106578e-05, |
| "loss": 0.2167, |
| "step": 1700 |
| }, |
| { |
| "epoch": 3.960906515580737, |
| "grad_norm": 0.04473072936045142, |
| "learning_rate": 1.2063492063492064e-05, |
| "loss": 0.2167, |
| "step": 1750 |
| }, |
| { |
| "epoch": 4.0725212464589235, |
| "grad_norm": 0.05195597672403638, |
| "learning_rate": 1.1836734693877552e-05, |
| "loss": 0.2132, |
| "step": 1800 |
| }, |
| { |
| "epoch": 4.185835694050992, |
| "grad_norm": 0.0419816337852566, |
| "learning_rate": 1.160997732426304e-05, |
| "loss": 0.2164, |
| "step": 1850 |
| }, |
| { |
| "epoch": 4.299150141643059, |
| "grad_norm": 0.05899298928895464, |
| "learning_rate": 1.1383219954648527e-05, |
| "loss": 0.2214, |
| "step": 1900 |
| }, |
| { |
| "epoch": 4.412464589235127, |
| "grad_norm": 0.04346981151532487, |
| "learning_rate": 1.1156462585034013e-05, |
| "loss": 0.2167, |
| "step": 1950 |
| }, |
| { |
| "epoch": 4.5257790368271955, |
| "grad_norm": 0.044189574868086536, |
| "learning_rate": 1.0929705215419501e-05, |
| "loss": 0.2165, |
| "step": 2000 |
| }, |
| { |
| "epoch": 4.639093484419264, |
| "grad_norm": 0.040702976751947055, |
| "learning_rate": 1.0702947845804989e-05, |
| "loss": 0.2164, |
| "step": 2050 |
| }, |
| { |
| "epoch": 4.752407932011332, |
| "grad_norm": 0.050221751939795126, |
| "learning_rate": 1.0476190476190477e-05, |
| "loss": 0.2163, |
| "step": 2100 |
| }, |
| { |
| "epoch": 4.865722379603399, |
| "grad_norm": 0.04269835448208638, |
| "learning_rate": 1.0249433106575966e-05, |
| "loss": 0.2161, |
| "step": 2150 |
| }, |
| { |
| "epoch": 4.9790368271954675, |
| "grad_norm": 0.04264509161022873, |
| "learning_rate": 1.0022675736961451e-05, |
| "loss": 0.2161, |
| "step": 2200 |
| }, |
| { |
| "epoch": 5.090651558073654, |
| "grad_norm": 0.041448351480457786, |
| "learning_rate": 9.795918367346939e-06, |
| "loss": 0.2127, |
| "step": 2250 |
| }, |
| { |
| "epoch": 5.203966005665722, |
| "grad_norm": 0.06135031255383477, |
| "learning_rate": 9.569160997732427e-06, |
| "loss": 0.2159, |
| "step": 2300 |
| }, |
| { |
| "epoch": 5.317280453257791, |
| "grad_norm": 0.04121678258573565, |
| "learning_rate": 9.342403628117914e-06, |
| "loss": 0.2158, |
| "step": 2350 |
| }, |
| { |
| "epoch": 5.430594900849858, |
| "grad_norm": 0.039055759397903865, |
| "learning_rate": 9.115646258503402e-06, |
| "loss": 0.2157, |
| "step": 2400 |
| }, |
| { |
| "epoch": 5.543909348441926, |
| "grad_norm": 0.042614942354305005, |
| "learning_rate": 8.888888888888888e-06, |
| "loss": 0.2156, |
| "step": 2450 |
| }, |
| { |
| "epoch": 5.657223796033994, |
| "grad_norm": 0.03246320825773879, |
| "learning_rate": 8.662131519274378e-06, |
| "loss": 0.2156, |
| "step": 2500 |
| }, |
| { |
| "epoch": 5.770538243626063, |
| "grad_norm": 0.04088525061763399, |
| "learning_rate": 8.435374149659866e-06, |
| "loss": 0.2155, |
| "step": 2550 |
| }, |
| { |
| "epoch": 5.88385269121813, |
| "grad_norm": 0.052784890825823244, |
| "learning_rate": 8.208616780045352e-06, |
| "loss": 0.2154, |
| "step": 2600 |
| }, |
| { |
| "epoch": 5.997167138810198, |
| "grad_norm": 0.029628123447202566, |
| "learning_rate": 7.98185941043084e-06, |
| "loss": 0.2155, |
| "step": 2650 |
| }, |
| { |
| "epoch": 6.108781869688385, |
| "grad_norm": 0.04456012439176859, |
| "learning_rate": 7.755102040816327e-06, |
| "loss": 0.2119, |
| "step": 2700 |
| }, |
| { |
| "epoch": 6.222096317280453, |
| "grad_norm": 0.03833949366557874, |
| "learning_rate": 7.528344671201815e-06, |
| "loss": 0.2152, |
| "step": 2750 |
| }, |
| { |
| "epoch": 6.335410764872521, |
| "grad_norm": 0.04119576615729903, |
| "learning_rate": 7.301587301587301e-06, |
| "loss": 0.2152, |
| "step": 2800 |
| }, |
| { |
| "epoch": 6.4487252124645895, |
| "grad_norm": 0.049472584582330295, |
| "learning_rate": 7.07482993197279e-06, |
| "loss": 0.215, |
| "step": 2850 |
| }, |
| { |
| "epoch": 6.562039660056657, |
| "grad_norm": 0.0400805462450423, |
| "learning_rate": 6.848072562358277e-06, |
| "loss": 0.2149, |
| "step": 2900 |
| }, |
| { |
| "epoch": 6.675354107648725, |
| "grad_norm": 0.05692803452817405, |
| "learning_rate": 6.621315192743765e-06, |
| "loss": 0.2149, |
| "step": 2950 |
| }, |
| { |
| "epoch": 6.788668555240793, |
| "grad_norm": 0.03887092292804971, |
| "learning_rate": 6.394557823129253e-06, |
| "loss": 0.2148, |
| "step": 3000 |
| }, |
| { |
| "epoch": 6.9019830028328615, |
| "grad_norm": 0.036224122110626866, |
| "learning_rate": 6.16780045351474e-06, |
| "loss": 0.2147, |
| "step": 3050 |
| }, |
| { |
| "epoch": 7.013597733711048, |
| "grad_norm": 0.047158172903523164, |
| "learning_rate": 5.9410430839002275e-06, |
| "loss": 0.2116, |
| "step": 3100 |
| }, |
| { |
| "epoch": 7.126912181303116, |
| "grad_norm": 0.03461108864643461, |
| "learning_rate": 5.7142857142857145e-06, |
| "loss": 0.2146, |
| "step": 3150 |
| }, |
| { |
| "epoch": 7.240226628895184, |
| "grad_norm": 0.0370582507441231, |
| "learning_rate": 5.487528344671202e-06, |
| "loss": 0.2145, |
| "step": 3200 |
| }, |
| { |
| "epoch": 7.353541076487252, |
| "grad_norm": 0.04204430021320609, |
| "learning_rate": 5.260770975056689e-06, |
| "loss": 0.2144, |
| "step": 3250 |
| }, |
| { |
| "epoch": 7.46685552407932, |
| "grad_norm": 0.03959334724172543, |
| "learning_rate": 5.034013605442177e-06, |
| "loss": 0.2144, |
| "step": 3300 |
| }, |
| { |
| "epoch": 7.580169971671388, |
| "grad_norm": 0.05397598403564438, |
| "learning_rate": 4.807256235827665e-06, |
| "loss": 0.2143, |
| "step": 3350 |
| }, |
| { |
| "epoch": 7.693484419263456, |
| "grad_norm": 0.03475866061919734, |
| "learning_rate": 4.580498866213152e-06, |
| "loss": 0.2143, |
| "step": 3400 |
| }, |
| { |
| "epoch": 7.806798866855524, |
| "grad_norm": 0.03605731459444405, |
| "learning_rate": 4.35374149659864e-06, |
| "loss": 0.2143, |
| "step": 3450 |
| }, |
| { |
| "epoch": 7.920113314447592, |
| "grad_norm": 0.049256646844772954, |
| "learning_rate": 4.126984126984127e-06, |
| "loss": 0.2143, |
| "step": 3500 |
| }, |
| { |
| "epoch": 8.03172804532578, |
| "grad_norm": 0.043722508869200226, |
| "learning_rate": 3.9002267573696154e-06, |
| "loss": 0.2109, |
| "step": 3550 |
| }, |
| { |
| "epoch": 8.145042492917847, |
| "grad_norm": 0.05138428044227004, |
| "learning_rate": 3.6734693877551024e-06, |
| "loss": 0.214, |
| "step": 3600 |
| }, |
| { |
| "epoch": 8.258356940509914, |
| "grad_norm": 0.043851337146329404, |
| "learning_rate": 3.44671201814059e-06, |
| "loss": 0.2138, |
| "step": 3650 |
| }, |
| { |
| "epoch": 8.371671388101984, |
| "grad_norm": 0.044279311539890204, |
| "learning_rate": 3.2199546485260772e-06, |
| "loss": 0.2139, |
| "step": 3700 |
| }, |
| { |
| "epoch": 8.48498583569405, |
| "grad_norm": 0.03446260373952044, |
| "learning_rate": 2.993197278911565e-06, |
| "loss": 0.2139, |
| "step": 3750 |
| }, |
| { |
| "epoch": 8.598300283286118, |
| "grad_norm": 0.0398317932843614, |
| "learning_rate": 2.7664399092970525e-06, |
| "loss": 0.2137, |
| "step": 3800 |
| }, |
| { |
| "epoch": 8.711614730878187, |
| "grad_norm": 0.03767500988119292, |
| "learning_rate": 2.53968253968254e-06, |
| "loss": 0.2136, |
| "step": 3850 |
| }, |
| { |
| "epoch": 8.824929178470255, |
| "grad_norm": 0.042385639923809684, |
| "learning_rate": 2.3129251700680273e-06, |
| "loss": 0.2137, |
| "step": 3900 |
| }, |
| { |
| "epoch": 8.938243626062324, |
| "grad_norm": 0.04600384237355533, |
| "learning_rate": 2.086167800453515e-06, |
| "loss": 0.2136, |
| "step": 3950 |
| }, |
| { |
| "epoch": 9.04985835694051, |
| "grad_norm": 0.04025841171568849, |
| "learning_rate": 1.8594104308390023e-06, |
| "loss": 0.2103, |
| "step": 4000 |
| }, |
| { |
| "epoch": 9.163172804532579, |
| "grad_norm": 0.04544163454051775, |
| "learning_rate": 1.6326530612244897e-06, |
| "loss": 0.2132, |
| "step": 4050 |
| }, |
| { |
| "epoch": 9.276487252124646, |
| "grad_norm": 0.04725026924810951, |
| "learning_rate": 1.4058956916099775e-06, |
| "loss": 0.2132, |
| "step": 4100 |
| }, |
| { |
| "epoch": 9.389801699716713, |
| "grad_norm": 0.04900241251858758, |
| "learning_rate": 1.179138321995465e-06, |
| "loss": 0.2132, |
| "step": 4150 |
| }, |
| { |
| "epoch": 9.503116147308782, |
| "grad_norm": 0.040273641750525384, |
| "learning_rate": 9.523809523809525e-07, |
| "loss": 0.2131, |
| "step": 4200 |
| }, |
| { |
| "epoch": 9.61643059490085, |
| "grad_norm": 0.045928550994560016, |
| "learning_rate": 7.2562358276644e-07, |
| "loss": 0.2131, |
| "step": 4250 |
| }, |
| { |
| "epoch": 9.729745042492917, |
| "grad_norm": 0.04288769118414568, |
| "learning_rate": 4.988662131519275e-07, |
| "loss": 0.213, |
| "step": 4300 |
| }, |
| { |
| "epoch": 9.843059490084986, |
| "grad_norm": 0.04348109814106462, |
| "learning_rate": 2.72108843537415e-07, |
| "loss": 0.2129, |
| "step": 4350 |
| }, |
| { |
| "epoch": 9.956373937677053, |
| "grad_norm": 0.03750811718272137, |
| "learning_rate": 4.53514739229025e-08, |
| "loss": 0.2128, |
| "step": 4400 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 4410, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.1136582509658112e+16, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|