| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 15276, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.009819324430479183, | |
| "grad_norm": 0.24714912474155426, | |
| "learning_rate": 4.983961770096884e-05, | |
| "loss": 0.3447, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.019638648860958365, | |
| "grad_norm": 0.12986360490322113, | |
| "learning_rate": 4.967596229379419e-05, | |
| "loss": 0.0057, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.02945797329143755, | |
| "grad_norm": 0.13787369430065155, | |
| "learning_rate": 4.9512306886619534e-05, | |
| "loss": 0.0041, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.03927729772191673, | |
| "grad_norm": 0.032512035220861435, | |
| "learning_rate": 4.9348651479444883e-05, | |
| "loss": 0.0031, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.049096622152395915, | |
| "grad_norm": 0.04660605266690254, | |
| "learning_rate": 4.9184996072270227e-05, | |
| "loss": 0.0017, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.0589159465828751, | |
| "grad_norm": 0.12520131468772888, | |
| "learning_rate": 4.9021340665095576e-05, | |
| "loss": 0.0023, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.06873527101335428, | |
| "grad_norm": 0.03921537473797798, | |
| "learning_rate": 4.8857685257920926e-05, | |
| "loss": 0.0016, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.07855459544383346, | |
| "grad_norm": 0.019508639350533485, | |
| "learning_rate": 4.869402985074627e-05, | |
| "loss": 0.0015, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.08837391987431265, | |
| "grad_norm": 0.03215921297669411, | |
| "learning_rate": 4.853037444357162e-05, | |
| "loss": 0.0018, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.09819324430479183, | |
| "grad_norm": 0.01678670570254326, | |
| "learning_rate": 4.836671903639696e-05, | |
| "loss": 0.0012, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.10801256873527101, | |
| "grad_norm": 0.04076138511300087, | |
| "learning_rate": 4.820306362922231e-05, | |
| "loss": 0.0013, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.1178318931657502, | |
| "grad_norm": 0.028516946360468864, | |
| "learning_rate": 4.803940822204766e-05, | |
| "loss": 0.001, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.12765121759622938, | |
| "grad_norm": 0.0024934338871389627, | |
| "learning_rate": 4.7875752814873006e-05, | |
| "loss": 0.0013, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.13747054202670855, | |
| "grad_norm": 0.009505635127425194, | |
| "learning_rate": 4.7712097407698356e-05, | |
| "loss": 0.0006, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.14728986645718775, | |
| "grad_norm": 0.004126217681914568, | |
| "learning_rate": 4.75484420005237e-05, | |
| "loss": 0.0009, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.15710919088766692, | |
| "grad_norm": 0.001864357735030353, | |
| "learning_rate": 4.738478659334905e-05, | |
| "loss": 0.0006, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.16692851531814612, | |
| "grad_norm": 0.05715826526284218, | |
| "learning_rate": 4.722113118617439e-05, | |
| "loss": 0.0009, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.1767478397486253, | |
| "grad_norm": 0.01866191253066063, | |
| "learning_rate": 4.7057475778999735e-05, | |
| "loss": 0.0008, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.1865671641791045, | |
| "grad_norm": 0.02309831976890564, | |
| "learning_rate": 4.6893820371825085e-05, | |
| "loss": 0.0009, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.19638648860958366, | |
| "grad_norm": 0.008644412271678448, | |
| "learning_rate": 4.6730164964650435e-05, | |
| "loss": 0.0008, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.20620581304006286, | |
| "grad_norm": 0.0034850463271141052, | |
| "learning_rate": 4.656650955747578e-05, | |
| "loss": 0.0008, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.21602513747054203, | |
| "grad_norm": 0.015649326145648956, | |
| "learning_rate": 4.640285415030113e-05, | |
| "loss": 0.0005, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.2258444619010212, | |
| "grad_norm": 0.017577216029167175, | |
| "learning_rate": 4.623919874312648e-05, | |
| "loss": 0.0005, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.2356637863315004, | |
| "grad_norm": 0.001943176961503923, | |
| "learning_rate": 4.607554333595182e-05, | |
| "loss": 0.0006, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.24548311076197957, | |
| "grad_norm": 0.013552217744290829, | |
| "learning_rate": 4.591188792877717e-05, | |
| "loss": 0.0007, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.25530243519245877, | |
| "grad_norm": 0.0009095704299397767, | |
| "learning_rate": 4.574823252160252e-05, | |
| "loss": 0.0006, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.26512175962293794, | |
| "grad_norm": 0.12323999404907227, | |
| "learning_rate": 4.5584577114427864e-05, | |
| "loss": 0.001, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.2749410840534171, | |
| "grad_norm": 0.0046529932878911495, | |
| "learning_rate": 4.5420921707253214e-05, | |
| "loss": 0.0006, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.28476040848389633, | |
| "grad_norm": 0.0006078369333408773, | |
| "learning_rate": 4.525726630007856e-05, | |
| "loss": 0.0005, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.2945797329143755, | |
| "grad_norm": 0.0011770805576816201, | |
| "learning_rate": 4.509361089290391e-05, | |
| "loss": 0.0003, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.3043990573448547, | |
| "grad_norm": 0.004274972248822451, | |
| "learning_rate": 4.492995548572925e-05, | |
| "loss": 0.0005, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.31421838177533384, | |
| "grad_norm": 0.0007647090242244303, | |
| "learning_rate": 4.476630007855459e-05, | |
| "loss": 0.0007, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.324037706205813, | |
| "grad_norm": 0.013916688971221447, | |
| "learning_rate": 4.460264467137994e-05, | |
| "loss": 0.0005, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.33385703063629224, | |
| "grad_norm": 0.001363221788778901, | |
| "learning_rate": 4.443898926420529e-05, | |
| "loss": 0.0004, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.3436763550667714, | |
| "grad_norm": 0.00033090286888182163, | |
| "learning_rate": 4.4275333857030636e-05, | |
| "loss": 0.0004, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.3534956794972506, | |
| "grad_norm": 0.000668744498398155, | |
| "learning_rate": 4.4111678449855986e-05, | |
| "loss": 0.0006, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.36331500392772975, | |
| "grad_norm": 0.0539139024913311, | |
| "learning_rate": 4.394802304268133e-05, | |
| "loss": 0.0005, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.373134328358209, | |
| "grad_norm": 0.0008487588493153453, | |
| "learning_rate": 4.378436763550668e-05, | |
| "loss": 0.0004, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.38295365278868815, | |
| "grad_norm": 0.0008443322731181979, | |
| "learning_rate": 4.362071222833203e-05, | |
| "loss": 0.0006, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.3927729772191673, | |
| "grad_norm": 0.00504659116268158, | |
| "learning_rate": 4.345705682115737e-05, | |
| "loss": 0.0006, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.4025923016496465, | |
| "grad_norm": 0.19237877428531647, | |
| "learning_rate": 4.329340141398272e-05, | |
| "loss": 0.0003, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.4124116260801257, | |
| "grad_norm": 0.052810050547122955, | |
| "learning_rate": 4.312974600680807e-05, | |
| "loss": 0.0007, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.4222309505106049, | |
| "grad_norm": 0.0010182256810367107, | |
| "learning_rate": 4.2966090599633415e-05, | |
| "loss": 0.0006, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.43205027494108406, | |
| "grad_norm": 0.008011899888515472, | |
| "learning_rate": 4.2802435192458765e-05, | |
| "loss": 0.0004, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.4418695993715632, | |
| "grad_norm": 0.004001118242740631, | |
| "learning_rate": 4.263877978528411e-05, | |
| "loss": 0.0003, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.4516889238020424, | |
| "grad_norm": 0.0005824828986078501, | |
| "learning_rate": 4.247512437810945e-05, | |
| "loss": 0.0004, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.4615082482325216, | |
| "grad_norm": 0.00035399169428274035, | |
| "learning_rate": 4.23114689709348e-05, | |
| "loss": 0.0006, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.4713275726630008, | |
| "grad_norm": 0.0008132366347126663, | |
| "learning_rate": 4.2147813563760144e-05, | |
| "loss": 0.0004, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.48114689709347996, | |
| "grad_norm": 0.00029086056747473776, | |
| "learning_rate": 4.1984158156585494e-05, | |
| "loss": 0.0004, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.49096622152395913, | |
| "grad_norm": 0.009674192406237125, | |
| "learning_rate": 4.1820502749410844e-05, | |
| "loss": 0.0003, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.5007855459544384, | |
| "grad_norm": 0.0002215866988990456, | |
| "learning_rate": 4.165684734223619e-05, | |
| "loss": 0.0003, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.5106048703849175, | |
| "grad_norm": 0.0003756559453904629, | |
| "learning_rate": 4.149319193506154e-05, | |
| "loss": 0.0003, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.5204241948153967, | |
| "grad_norm": 3.785878652706742e-05, | |
| "learning_rate": 4.132953652788688e-05, | |
| "loss": 0.0002, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.5302435192458759, | |
| "grad_norm": 0.007607210893183947, | |
| "learning_rate": 4.116588112071223e-05, | |
| "loss": 0.0003, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.540062843676355, | |
| "grad_norm": 0.02198871783912182, | |
| "learning_rate": 4.100222571353758e-05, | |
| "loss": 0.0002, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.5498821681068342, | |
| "grad_norm": 0.00041103907278738916, | |
| "learning_rate": 4.0838570306362923e-05, | |
| "loss": 0.0002, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.5597014925373134, | |
| "grad_norm": 0.01565481722354889, | |
| "learning_rate": 4.067491489918827e-05, | |
| "loss": 0.0003, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.5695208169677927, | |
| "grad_norm": 0.0117128761485219, | |
| "learning_rate": 4.051125949201362e-05, | |
| "loss": 0.0004, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.5793401413982718, | |
| "grad_norm": 0.004789320752024651, | |
| "learning_rate": 4.0347604084838966e-05, | |
| "loss": 0.0004, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.589159465828751, | |
| "grad_norm": 0.0011219610460102558, | |
| "learning_rate": 4.018394867766431e-05, | |
| "loss": 0.0004, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.5989787902592302, | |
| "grad_norm": 0.0518529899418354, | |
| "learning_rate": 4.002029327048966e-05, | |
| "loss": 0.0004, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.6087981146897093, | |
| "grad_norm": 0.01609092392027378, | |
| "learning_rate": 3.9856637863315e-05, | |
| "loss": 0.0003, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.6186174391201885, | |
| "grad_norm": 0.002343968488276005, | |
| "learning_rate": 3.969298245614035e-05, | |
| "loss": 0.0003, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.6284367635506677, | |
| "grad_norm": 0.010258428752422333, | |
| "learning_rate": 3.9529327048965696e-05, | |
| "loss": 0.0003, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.6382560879811469, | |
| "grad_norm": 0.0007253732765093446, | |
| "learning_rate": 3.9365671641791046e-05, | |
| "loss": 0.0004, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.648075412411626, | |
| "grad_norm": 0.01221112348139286, | |
| "learning_rate": 3.9202016234616395e-05, | |
| "loss": 0.0003, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.6578947368421053, | |
| "grad_norm": 0.007547037675976753, | |
| "learning_rate": 3.903836082744174e-05, | |
| "loss": 0.0004, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.6677140612725845, | |
| "grad_norm": 0.0037480357568711042, | |
| "learning_rate": 3.887470542026709e-05, | |
| "loss": 0.0003, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.6775333857030637, | |
| "grad_norm": 9.382007556268945e-05, | |
| "learning_rate": 3.871105001309244e-05, | |
| "loss": 0.0005, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.6873527101335428, | |
| "grad_norm": 0.0019377778517082334, | |
| "learning_rate": 3.854739460591778e-05, | |
| "loss": 0.0003, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.697172034564022, | |
| "grad_norm": 0.00031467623193748295, | |
| "learning_rate": 3.838373919874313e-05, | |
| "loss": 0.0003, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.7069913589945012, | |
| "grad_norm": 0.012330332770943642, | |
| "learning_rate": 3.8220083791568475e-05, | |
| "loss": 0.0004, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.7168106834249803, | |
| "grad_norm": 0.43893539905548096, | |
| "learning_rate": 3.8056428384393825e-05, | |
| "loss": 0.002, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.7266300078554595, | |
| "grad_norm": 0.010738670825958252, | |
| "learning_rate": 3.789277297721917e-05, | |
| "loss": 0.0008, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.7364493322859387, | |
| "grad_norm": 0.0009258920326828957, | |
| "learning_rate": 3.772911757004452e-05, | |
| "loss": 0.0005, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.746268656716418, | |
| "grad_norm": 0.002003765432164073, | |
| "learning_rate": 3.756546216286986e-05, | |
| "loss": 0.0004, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.7560879811468971, | |
| "grad_norm": 0.0003309960593469441, | |
| "learning_rate": 3.740180675569521e-05, | |
| "loss": 0.0004, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.7659073055773763, | |
| "grad_norm": 0.014759145677089691, | |
| "learning_rate": 3.7238151348520554e-05, | |
| "loss": 0.0004, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.7757266300078555, | |
| "grad_norm": 0.00703453179448843, | |
| "learning_rate": 3.7074495941345904e-05, | |
| "loss": 0.0005, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.7855459544383346, | |
| "grad_norm": 0.0093814292922616, | |
| "learning_rate": 3.691084053417125e-05, | |
| "loss": 0.0004, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.7953652788688138, | |
| "grad_norm": 0.00033789846929721534, | |
| "learning_rate": 3.67471851269966e-05, | |
| "loss": 0.0003, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.805184603299293, | |
| "grad_norm": 0.004072342533618212, | |
| "learning_rate": 3.658352971982195e-05, | |
| "loss": 0.0004, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.8150039277297721, | |
| "grad_norm": 0.005905472207814455, | |
| "learning_rate": 3.641987431264729e-05, | |
| "loss": 0.0004, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.8248232521602514, | |
| "grad_norm": 0.008759435266256332, | |
| "learning_rate": 3.625621890547264e-05, | |
| "loss": 0.0004, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.8346425765907306, | |
| "grad_norm": 0.01638154312968254, | |
| "learning_rate": 3.609256349829799e-05, | |
| "loss": 0.0003, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.8444619010212098, | |
| "grad_norm": 0.003001204691827297, | |
| "learning_rate": 3.592890809112333e-05, | |
| "loss": 0.0004, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.8542812254516889, | |
| "grad_norm": 0.0006073117256164551, | |
| "learning_rate": 3.576525268394868e-05, | |
| "loss": 0.0003, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.8641005498821681, | |
| "grad_norm": 0.003654340049251914, | |
| "learning_rate": 3.5601597276774026e-05, | |
| "loss": 0.0005, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.8739198743126473, | |
| "grad_norm": 0.2316829115152359, | |
| "learning_rate": 3.5437941869599376e-05, | |
| "loss": 0.0005, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.8837391987431265, | |
| "grad_norm": 0.027139848098158836, | |
| "learning_rate": 3.527428646242472e-05, | |
| "loss": 0.0005, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.8935585231736056, | |
| "grad_norm": 0.004498090595006943, | |
| "learning_rate": 3.511063105525006e-05, | |
| "loss": 0.0004, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.9033778476040848, | |
| "grad_norm": 0.010397707112133503, | |
| "learning_rate": 3.494697564807541e-05, | |
| "loss": 0.0003, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.9131971720345641, | |
| "grad_norm": 0.00014255594578571618, | |
| "learning_rate": 3.478332024090076e-05, | |
| "loss": 0.0003, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.9230164964650432, | |
| "grad_norm": 0.0028459234163165092, | |
| "learning_rate": 3.4619664833726105e-05, | |
| "loss": 0.0003, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.9328358208955224, | |
| "grad_norm": 0.0030736555345356464, | |
| "learning_rate": 3.4456009426551455e-05, | |
| "loss": 0.0002, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.9426551453260016, | |
| "grad_norm": 0.0026709907688200474, | |
| "learning_rate": 3.4292354019376805e-05, | |
| "loss": 0.0003, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.9524744697564808, | |
| "grad_norm": 0.00022516479657497257, | |
| "learning_rate": 3.412869861220215e-05, | |
| "loss": 0.0003, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.9622937941869599, | |
| "grad_norm": 2.6079122108058073e-05, | |
| "learning_rate": 3.39650432050275e-05, | |
| "loss": 0.0002, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.9721131186174391, | |
| "grad_norm": 0.0010724954772740602, | |
| "learning_rate": 3.380138779785284e-05, | |
| "loss": 0.0003, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.9819324430479183, | |
| "grad_norm": 0.00028140624635852873, | |
| "learning_rate": 3.363773239067819e-05, | |
| "loss": 0.0004, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.9917517674783974, | |
| "grad_norm": 0.002564377384260297, | |
| "learning_rate": 3.347407698350354e-05, | |
| "loss": 0.0003, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 1.0015710919088767, | |
| "grad_norm": 0.0027151070535182953, | |
| "learning_rate": 3.3310421576328884e-05, | |
| "loss": 0.0002, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 1.011390416339356, | |
| "grad_norm": 0.0002080993290292099, | |
| "learning_rate": 3.3146766169154234e-05, | |
| "loss": 0.0003, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 1.021209740769835, | |
| "grad_norm": 3.149580516037531e-05, | |
| "learning_rate": 3.298311076197958e-05, | |
| "loss": 0.0003, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.0310290652003142, | |
| "grad_norm": 0.005241520702838898, | |
| "learning_rate": 3.281945535480492e-05, | |
| "loss": 0.0003, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 1.0408483896307934, | |
| "grad_norm": 0.005873743910342455, | |
| "learning_rate": 3.265579994763027e-05, | |
| "loss": 0.0002, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 1.0506677140612726, | |
| "grad_norm": 0.006296052597463131, | |
| "learning_rate": 3.2492144540455613e-05, | |
| "loss": 0.0003, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 1.0604870384917517, | |
| "grad_norm": 0.00039794211625121534, | |
| "learning_rate": 3.232848913328096e-05, | |
| "loss": 0.0004, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.070306362922231, | |
| "grad_norm": 0.0039017247036099434, | |
| "learning_rate": 3.216483372610631e-05, | |
| "loss": 0.0003, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 1.08012568735271, | |
| "grad_norm": 0.002064199186861515, | |
| "learning_rate": 3.2001178318931656e-05, | |
| "loss": 0.0003, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.0899450117831893, | |
| "grad_norm": 0.0002229887613793835, | |
| "learning_rate": 3.1837522911757006e-05, | |
| "loss": 0.0003, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 1.0997643362136684, | |
| "grad_norm": 3.4759577829390764e-05, | |
| "learning_rate": 3.1673867504582356e-05, | |
| "loss": 0.0004, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.1095836606441476, | |
| "grad_norm": 2.991587643919047e-05, | |
| "learning_rate": 3.15102120974077e-05, | |
| "loss": 0.0002, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 1.1194029850746268, | |
| "grad_norm": 0.0017788242548704147, | |
| "learning_rate": 3.134655669023305e-05, | |
| "loss": 0.0003, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 1.129222309505106, | |
| "grad_norm": 0.008592754602432251, | |
| "learning_rate": 3.118290128305839e-05, | |
| "loss": 0.0003, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 1.139041633935585, | |
| "grad_norm": 0.0017567313043400645, | |
| "learning_rate": 3.101924587588374e-05, | |
| "loss": 0.0003, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 1.1488609583660645, | |
| "grad_norm": 8.339332271134481e-05, | |
| "learning_rate": 3.085559046870909e-05, | |
| "loss": 0.0002, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 1.1586802827965437, | |
| "grad_norm": 0.00681919464841485, | |
| "learning_rate": 3.0691935061534435e-05, | |
| "loss": 0.0003, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 1.1684996072270228, | |
| "grad_norm": 1.7358417608193122e-05, | |
| "learning_rate": 3.052827965435978e-05, | |
| "loss": 0.0002, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 1.178318931657502, | |
| "grad_norm": 0.005276073236018419, | |
| "learning_rate": 3.0364624247185132e-05, | |
| "loss": 0.0003, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.1881382560879812, | |
| "grad_norm": 1.6649060853524134e-05, | |
| "learning_rate": 3.0200968840010475e-05, | |
| "loss": 0.0003, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 1.1979575805184604, | |
| "grad_norm": 1.6310365026583895e-05, | |
| "learning_rate": 3.003731343283582e-05, | |
| "loss": 0.0001, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 1.2077769049489395, | |
| "grad_norm": 8.225607598433271e-05, | |
| "learning_rate": 2.987365802566117e-05, | |
| "loss": 0.0003, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 1.2175962293794187, | |
| "grad_norm": 0.06942308694124222, | |
| "learning_rate": 2.9710002618486515e-05, | |
| "loss": 0.0006, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 1.2274155538098979, | |
| "grad_norm": 0.0003915784473065287, | |
| "learning_rate": 2.9546347211311865e-05, | |
| "loss": 0.0008, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 1.237234878240377, | |
| "grad_norm": 5.1170645747333765e-05, | |
| "learning_rate": 2.9382691804137208e-05, | |
| "loss": 0.0004, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 1.2470542026708562, | |
| "grad_norm": 0.005075642839074135, | |
| "learning_rate": 2.9219036396962558e-05, | |
| "loss": 0.0006, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 1.2568735271013354, | |
| "grad_norm": 0.013923396356403828, | |
| "learning_rate": 2.9055380989787908e-05, | |
| "loss": 0.0003, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 1.2666928515318145, | |
| "grad_norm": 0.010688439942896366, | |
| "learning_rate": 2.889172558261325e-05, | |
| "loss": 0.0004, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 1.2765121759622937, | |
| "grad_norm": 0.005004653707146645, | |
| "learning_rate": 2.8728070175438597e-05, | |
| "loss": 0.0002, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.286331500392773, | |
| "grad_norm": 0.015412525273859501, | |
| "learning_rate": 2.8564414768263947e-05, | |
| "loss": 0.0003, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 1.2961508248232523, | |
| "grad_norm": 0.002696032403036952, | |
| "learning_rate": 2.840075936108929e-05, | |
| "loss": 0.0002, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.3059701492537314, | |
| "grad_norm": 0.0005274811992421746, | |
| "learning_rate": 2.823710395391464e-05, | |
| "loss": 0.0003, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 1.3157894736842106, | |
| "grad_norm": 2.0785410015378147e-05, | |
| "learning_rate": 2.8073448546739983e-05, | |
| "loss": 0.0003, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 1.3256087981146898, | |
| "grad_norm": 0.0011626353953033686, | |
| "learning_rate": 2.7909793139565333e-05, | |
| "loss": 0.0001, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 1.335428122545169, | |
| "grad_norm": 0.0003097439184784889, | |
| "learning_rate": 2.774613773239068e-05, | |
| "loss": 0.0002, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 1.3452474469756481, | |
| "grad_norm": 5.218560909270309e-05, | |
| "learning_rate": 2.7582482325216026e-05, | |
| "loss": 0.0002, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 1.3550667714061273, | |
| "grad_norm": 4.878486288362183e-05, | |
| "learning_rate": 2.7418826918041373e-05, | |
| "loss": 0.0002, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 1.3648860958366065, | |
| "grad_norm": 1.3592688446806278e-05, | |
| "learning_rate": 2.7255171510866723e-05, | |
| "loss": 0.0003, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 1.3747054202670856, | |
| "grad_norm": 0.0010299599962309003, | |
| "learning_rate": 2.7091516103692066e-05, | |
| "loss": 0.0003, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.3845247446975648, | |
| "grad_norm": 0.005047979764640331, | |
| "learning_rate": 2.6927860696517416e-05, | |
| "loss": 0.0003, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 1.394344069128044, | |
| "grad_norm": 0.00903361290693283, | |
| "learning_rate": 2.676420528934276e-05, | |
| "loss": 0.0005, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 1.4041633935585232, | |
| "grad_norm": 0.00216244556941092, | |
| "learning_rate": 2.660054988216811e-05, | |
| "loss": 0.0004, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 1.4139827179890023, | |
| "grad_norm": 0.008215115405619144, | |
| "learning_rate": 2.6436894474993455e-05, | |
| "loss": 0.0003, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 1.4238020424194815, | |
| "grad_norm": 0.00024291670706588775, | |
| "learning_rate": 2.62732390678188e-05, | |
| "loss": 0.0003, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 1.4336213668499607, | |
| "grad_norm": 0.004875461105257273, | |
| "learning_rate": 2.610958366064415e-05, | |
| "loss": 0.0003, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 1.4434406912804398, | |
| "grad_norm": 0.000312354473862797, | |
| "learning_rate": 2.59459282534695e-05, | |
| "loss": 0.0003, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 1.453260015710919, | |
| "grad_norm": 8.923052519094199e-05, | |
| "learning_rate": 2.578227284629484e-05, | |
| "loss": 0.0004, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 1.4630793401413982, | |
| "grad_norm": 0.008472305722534657, | |
| "learning_rate": 2.561861743912019e-05, | |
| "loss": 0.0003, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 1.4728986645718773, | |
| "grad_norm": 4.937009362038225e-05, | |
| "learning_rate": 2.5454962031945535e-05, | |
| "loss": 0.0002, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.4827179890023565, | |
| "grad_norm": 0.0038520190864801407, | |
| "learning_rate": 2.5291306624770885e-05, | |
| "loss": 0.0003, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 1.4925373134328357, | |
| "grad_norm": 0.002998525742441416, | |
| "learning_rate": 2.512765121759623e-05, | |
| "loss": 0.0003, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 1.5023566378633149, | |
| "grad_norm": 0.0005952705978415906, | |
| "learning_rate": 2.4963995810421578e-05, | |
| "loss": 0.0003, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 1.512175962293794, | |
| "grad_norm": 2.007077455346007e-05, | |
| "learning_rate": 2.4800340403246924e-05, | |
| "loss": 0.0002, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 1.5219952867242734, | |
| "grad_norm": 0.002962449798360467, | |
| "learning_rate": 2.463668499607227e-05, | |
| "loss": 0.0003, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 1.5318146111547526, | |
| "grad_norm": 3.4512224374338984e-05, | |
| "learning_rate": 2.447302958889762e-05, | |
| "loss": 0.0002, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 1.5416339355852318, | |
| "grad_norm": 5.3643165301764384e-05, | |
| "learning_rate": 2.4309374181722967e-05, | |
| "loss": 0.0002, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 1.551453260015711, | |
| "grad_norm": 4.536865890258923e-05, | |
| "learning_rate": 2.4145718774548314e-05, | |
| "loss": 0.0003, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 1.56127258444619, | |
| "grad_norm": 0.0017064092680811882, | |
| "learning_rate": 2.3982063367373657e-05, | |
| "loss": 0.0003, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 1.5710919088766693, | |
| "grad_norm": 0.0028170356526970863, | |
| "learning_rate": 2.3818407960199007e-05, | |
| "loss": 0.0003, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.5809112333071484, | |
| "grad_norm": 6.762581324437633e-05, | |
| "learning_rate": 2.3654752553024353e-05, | |
| "loss": 0.0003, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 1.5907305577376276, | |
| "grad_norm": 5.572327063418925e-05, | |
| "learning_rate": 2.34910971458497e-05, | |
| "loss": 0.0001, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 1.6005498821681068, | |
| "grad_norm": 4.8497397074243054e-05, | |
| "learning_rate": 2.3327441738675046e-05, | |
| "loss": 0.0003, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 1.6103692065985862, | |
| "grad_norm": 0.000499077548738569, | |
| "learning_rate": 2.3163786331500396e-05, | |
| "loss": 0.0003, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 1.6201885310290653, | |
| "grad_norm": 0.0007215180085040629, | |
| "learning_rate": 2.3000130924325743e-05, | |
| "loss": 0.0003, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 1.6300078554595445, | |
| "grad_norm": 0.00015634812007192522, | |
| "learning_rate": 2.2836475517151086e-05, | |
| "loss": 0.0002, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 1.6398271798900237, | |
| "grad_norm": 0.010850101709365845, | |
| "learning_rate": 2.2672820109976432e-05, | |
| "loss": 0.0002, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 1.6496465043205029, | |
| "grad_norm": 0.0009706264827400446, | |
| "learning_rate": 2.2509164702801782e-05, | |
| "loss": 0.0004, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 1.659465828750982, | |
| "grad_norm": 3.204784661647864e-05, | |
| "learning_rate": 2.234550929562713e-05, | |
| "loss": 0.0003, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 1.6692851531814612, | |
| "grad_norm": 0.009143730625510216, | |
| "learning_rate": 2.2181853888452475e-05, | |
| "loss": 0.0003, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.6791044776119404, | |
| "grad_norm": 0.005089063663035631, | |
| "learning_rate": 2.2018198481277822e-05, | |
| "loss": 0.0004, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 1.6889238020424195, | |
| "grad_norm": 0.0249613169580698, | |
| "learning_rate": 2.1854543074103172e-05, | |
| "loss": 0.0003, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 1.6987431264728987, | |
| "grad_norm": 0.000527512573171407, | |
| "learning_rate": 2.1690887666928515e-05, | |
| "loss": 0.0003, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 1.7085624509033779, | |
| "grad_norm": 0.0011643558973446488, | |
| "learning_rate": 2.152723225975386e-05, | |
| "loss": 0.0001, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 1.718381775333857, | |
| "grad_norm": 0.0030941637232899666, | |
| "learning_rate": 2.1363576852579208e-05, | |
| "loss": 0.0002, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 1.7282010997643362, | |
| "grad_norm": 2.4613122150185518e-05, | |
| "learning_rate": 2.1199921445404558e-05, | |
| "loss": 0.0002, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 1.7380204241948154, | |
| "grad_norm": 0.009969648905098438, | |
| "learning_rate": 2.1036266038229905e-05, | |
| "loss": 0.0003, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 1.7478397486252946, | |
| "grad_norm": 0.0028629146981984377, | |
| "learning_rate": 2.087261063105525e-05, | |
| "loss": 0.0003, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 1.7576590730557737, | |
| "grad_norm": 0.005532170180231333, | |
| "learning_rate": 2.0708955223880598e-05, | |
| "loss": 0.0002, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 1.767478397486253, | |
| "grad_norm": 0.0001023332224576734, | |
| "learning_rate": 2.0545299816705944e-05, | |
| "loss": 0.0004, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.777297721916732, | |
| "grad_norm": 0.0016099640633910894, | |
| "learning_rate": 2.038164440953129e-05, | |
| "loss": 0.0002, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 1.7871170463472112, | |
| "grad_norm": 0.00755878584459424, | |
| "learning_rate": 2.0217989002356637e-05, | |
| "loss": 0.0003, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 1.7969363707776904, | |
| "grad_norm": 0.00131377880461514, | |
| "learning_rate": 2.0054333595181984e-05, | |
| "loss": 0.0003, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 1.8067556952081696, | |
| "grad_norm": 0.0066925594583153725, | |
| "learning_rate": 1.9890678188007334e-05, | |
| "loss": 0.0003, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 1.8165750196386488, | |
| "grad_norm": 0.0016435191500931978, | |
| "learning_rate": 1.972702278083268e-05, | |
| "loss": 0.0004, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 1.826394344069128, | |
| "grad_norm": 0.0049284519627690315, | |
| "learning_rate": 1.9563367373658027e-05, | |
| "loss": 0.0003, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 1.836213668499607, | |
| "grad_norm": 0.007083178497850895, | |
| "learning_rate": 1.9399711966483373e-05, | |
| "loss": 0.0002, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 1.8460329929300863, | |
| "grad_norm": 0.003799445927143097, | |
| "learning_rate": 1.923605655930872e-05, | |
| "loss": 0.0002, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 1.8558523173605654, | |
| "grad_norm": 8.943451575760264e-06, | |
| "learning_rate": 1.9072401152134066e-05, | |
| "loss": 0.0002, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 1.8656716417910446, | |
| "grad_norm": 4.004701168014435e-06, | |
| "learning_rate": 1.8908745744959413e-05, | |
| "loss": 0.0001, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.875490966221524, | |
| "grad_norm": 0.0016747256740927696, | |
| "learning_rate": 1.8745090337784763e-05, | |
| "loss": 0.0003, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 1.8853102906520032, | |
| "grad_norm": 0.00582944555208087, | |
| "learning_rate": 1.858143493061011e-05, | |
| "loss": 0.0003, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 1.8951296150824823, | |
| "grad_norm": 5.8452515077078715e-06, | |
| "learning_rate": 1.8417779523435456e-05, | |
| "loss": 0.0002, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 1.9049489395129615, | |
| "grad_norm": 2.22074459088617e-06, | |
| "learning_rate": 1.8254124116260802e-05, | |
| "loss": 0.0003, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 1.9147682639434407, | |
| "grad_norm": 0.0001783396874088794, | |
| "learning_rate": 1.809046870908615e-05, | |
| "loss": 0.0002, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 1.9245875883739199, | |
| "grad_norm": 9.459259308641776e-05, | |
| "learning_rate": 1.7926813301911495e-05, | |
| "loss": 0.0003, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 1.934406912804399, | |
| "grad_norm": 0.001501628546975553, | |
| "learning_rate": 1.7763157894736842e-05, | |
| "loss": 0.0002, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 1.9442262372348782, | |
| "grad_norm": 0.008209704421460629, | |
| "learning_rate": 1.759950248756219e-05, | |
| "loss": 0.0002, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 1.9540455616653576, | |
| "grad_norm": 2.8594949981197715e-05, | |
| "learning_rate": 1.743584708038754e-05, | |
| "loss": 0.0003, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 1.9638648860958368, | |
| "grad_norm": 0.0010227253660559654, | |
| "learning_rate": 1.7272191673212885e-05, | |
| "loss": 0.0002, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.973684210526316, | |
| "grad_norm": 1.06588067865232e-05, | |
| "learning_rate": 1.710853626603823e-05, | |
| "loss": 0.0003, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 1.983503534956795, | |
| "grad_norm": 0.0035111424513161182, | |
| "learning_rate": 1.6944880858863578e-05, | |
| "loss": 0.0003, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 1.9933228593872743, | |
| "grad_norm": 7.919372001197189e-05, | |
| "learning_rate": 1.6781225451688925e-05, | |
| "loss": 0.0002, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 2.0031421838177534, | |
| "grad_norm": 0.00608315784484148, | |
| "learning_rate": 1.661757004451427e-05, | |
| "loss": 0.0002, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 2.0129615082482326, | |
| "grad_norm": 6.566229330928763e-06, | |
| "learning_rate": 1.6453914637339618e-05, | |
| "loss": 0.0002, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 2.022780832678712, | |
| "grad_norm": 2.852589432222885e-06, | |
| "learning_rate": 1.6290259230164964e-05, | |
| "loss": 0.0001, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 2.032600157109191, | |
| "grad_norm": 2.3466156562790275e-05, | |
| "learning_rate": 1.6126603822990314e-05, | |
| "loss": 0.0002, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 2.04241948153967, | |
| "grad_norm": 0.006443875841796398, | |
| "learning_rate": 1.596294841581566e-05, | |
| "loss": 0.0004, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 2.0522388059701493, | |
| "grad_norm": 0.0006049483199603856, | |
| "learning_rate": 1.5799293008641007e-05, | |
| "loss": 0.0002, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 2.0620581304006285, | |
| "grad_norm": 0.0022505486849695444, | |
| "learning_rate": 1.563563760146635e-05, | |
| "loss": 0.0002, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 2.0718774548311076, | |
| "grad_norm": 4.880329652223736e-05, | |
| "learning_rate": 1.54719821942917e-05, | |
| "loss": 0.0002, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 2.081696779261587, | |
| "grad_norm": 0.006252319552004337, | |
| "learning_rate": 1.5308326787117047e-05, | |
| "loss": 0.0003, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 2.091516103692066, | |
| "grad_norm": 0.0014218160649761558, | |
| "learning_rate": 1.5144671379942393e-05, | |
| "loss": 0.0004, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 2.101335428122545, | |
| "grad_norm": 1.5717498172307387e-05, | |
| "learning_rate": 1.498101597276774e-05, | |
| "loss": 0.0003, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 2.1111547525530243, | |
| "grad_norm": 2.7057717488787603e-06, | |
| "learning_rate": 1.4817360565593088e-05, | |
| "loss": 0.0003, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 2.1209740769835035, | |
| "grad_norm": 0.003399658016860485, | |
| "learning_rate": 1.4653705158418435e-05, | |
| "loss": 0.0002, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 2.1307934014139827, | |
| "grad_norm": 0.0001567010476719588, | |
| "learning_rate": 1.4490049751243781e-05, | |
| "loss": 0.0002, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 2.140612725844462, | |
| "grad_norm": 1.0206712431681808e-05, | |
| "learning_rate": 1.432639434406913e-05, | |
| "loss": 0.0002, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 2.150432050274941, | |
| "grad_norm": 0.0023415617179125547, | |
| "learning_rate": 1.4162738936894476e-05, | |
| "loss": 0.0003, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 2.16025137470542, | |
| "grad_norm": 9.877283446257934e-05, | |
| "learning_rate": 1.3999083529719822e-05, | |
| "loss": 0.0002, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 2.1700706991358993, | |
| "grad_norm": 0.0009852441726252437, | |
| "learning_rate": 1.3835428122545169e-05, | |
| "loss": 0.0001, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 2.1798900235663785, | |
| "grad_norm": 2.5751623979886062e-05, | |
| "learning_rate": 1.3671772715370517e-05, | |
| "loss": 0.0002, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 2.1897093479968577, | |
| "grad_norm": 0.00046911800745874643, | |
| "learning_rate": 1.3508117308195864e-05, | |
| "loss": 0.0003, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 2.199528672427337, | |
| "grad_norm": 2.548624252085574e-05, | |
| "learning_rate": 1.334446190102121e-05, | |
| "loss": 0.0003, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 2.209347996857816, | |
| "grad_norm": 0.00012927035277243704, | |
| "learning_rate": 1.3180806493846557e-05, | |
| "loss": 0.0002, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 2.219167321288295, | |
| "grad_norm": 5.643380973197054e-06, | |
| "learning_rate": 1.3017151086671905e-05, | |
| "loss": 0.0002, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 2.2289866457187744, | |
| "grad_norm": 0.00517466152086854, | |
| "learning_rate": 1.2853495679497251e-05, | |
| "loss": 0.0003, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 2.2388059701492535, | |
| "grad_norm": 0.0007839313475415111, | |
| "learning_rate": 1.2689840272322598e-05, | |
| "loss": 0.0002, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 2.2486252945797327, | |
| "grad_norm": 0.003385524032637477, | |
| "learning_rate": 1.2526184865147945e-05, | |
| "loss": 0.0002, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 2.258444619010212, | |
| "grad_norm": 0.006902114022523165, | |
| "learning_rate": 1.2362529457973291e-05, | |
| "loss": 0.0002, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 2.268263943440691, | |
| "grad_norm": 0.002237598644569516, | |
| "learning_rate": 1.219887405079864e-05, | |
| "loss": 0.0002, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 2.27808326787117, | |
| "grad_norm": 2.9005691430938896e-06, | |
| "learning_rate": 1.2035218643623986e-05, | |
| "loss": 0.0002, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 2.28790259230165, | |
| "grad_norm": 0.0008452658075839281, | |
| "learning_rate": 1.1871563236449332e-05, | |
| "loss": 0.0002, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 2.297721916732129, | |
| "grad_norm": 0.0014033624902367592, | |
| "learning_rate": 1.1707907829274679e-05, | |
| "loss": 0.0002, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 2.307541241162608, | |
| "grad_norm": 0.001146289287135005, | |
| "learning_rate": 1.1544252422100027e-05, | |
| "loss": 0.0002, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 2.3173605655930873, | |
| "grad_norm": 6.133544957265258e-06, | |
| "learning_rate": 1.1380597014925374e-05, | |
| "loss": 0.0001, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 2.3271798900235665, | |
| "grad_norm": 0.0015589894028380513, | |
| "learning_rate": 1.121694160775072e-05, | |
| "loss": 0.0004, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 2.3369992144540457, | |
| "grad_norm": 0.00035342929186299443, | |
| "learning_rate": 1.1053286200576067e-05, | |
| "loss": 0.0002, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 2.346818538884525, | |
| "grad_norm": 1.368164703308139e-05, | |
| "learning_rate": 1.0889630793401415e-05, | |
| "loss": 0.0003, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 2.356637863315004, | |
| "grad_norm": 0.0021006593015044928, | |
| "learning_rate": 1.0725975386226761e-05, | |
| "loss": 0.0002, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.366457187745483, | |
| "grad_norm": 7.3638998401293065e-06, | |
| "learning_rate": 1.0562319979052108e-05, | |
| "loss": 0.0002, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 2.3762765121759624, | |
| "grad_norm": 0.006797213107347488, | |
| "learning_rate": 1.0398664571877455e-05, | |
| "loss": 0.0002, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 2.3860958366064415, | |
| "grad_norm": 0.002575602615252137, | |
| "learning_rate": 1.0235009164702803e-05, | |
| "loss": 0.0004, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 2.3959151610369207, | |
| "grad_norm": 0.0026493787299841642, | |
| "learning_rate": 1.007135375752815e-05, | |
| "loss": 0.0003, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 2.4057344854674, | |
| "grad_norm": 5.164716185390716e-06, | |
| "learning_rate": 9.907698350353496e-06, | |
| "loss": 0.0003, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 2.415553809897879, | |
| "grad_norm": 0.0033771705348044634, | |
| "learning_rate": 9.744042943178842e-06, | |
| "loss": 0.0003, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 2.425373134328358, | |
| "grad_norm": 3.062731593672652e-06, | |
| "learning_rate": 9.58038753600419e-06, | |
| "loss": 0.0003, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 2.4351924587588374, | |
| "grad_norm": 0.002900635125115514, | |
| "learning_rate": 9.416732128829537e-06, | |
| "loss": 0.0002, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 2.4450117831893166, | |
| "grad_norm": 0.005432957783341408, | |
| "learning_rate": 9.253076721654884e-06, | |
| "loss": 0.0003, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 2.4548311076197957, | |
| "grad_norm": 0.0060834819450974464, | |
| "learning_rate": 9.089421314480232e-06, | |
| "loss": 0.0002, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 2.464650432050275, | |
| "grad_norm": 0.0003888920182362199, | |
| "learning_rate": 8.925765907305578e-06, | |
| "loss": 0.0002, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 2.474469756480754, | |
| "grad_norm": 0.0008291418780572712, | |
| "learning_rate": 8.762110500130925e-06, | |
| "loss": 0.0002, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 2.4842890809112332, | |
| "grad_norm": 4.13081716033048e-06, | |
| "learning_rate": 8.598455092956271e-06, | |
| "loss": 0.0002, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 2.4941084053417124, | |
| "grad_norm": 0.0027352613396942616, | |
| "learning_rate": 8.43479968578162e-06, | |
| "loss": 0.0002, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 2.5039277297721916, | |
| "grad_norm": 0.004523648414760828, | |
| "learning_rate": 8.271144278606966e-06, | |
| "loss": 0.0003, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 2.5137470542026707, | |
| "grad_norm": 0.0014511903282254934, | |
| "learning_rate": 8.107488871432313e-06, | |
| "loss": 0.0002, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 2.52356637863315, | |
| "grad_norm": 0.001870101667009294, | |
| "learning_rate": 7.94383346425766e-06, | |
| "loss": 0.0003, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 2.533385703063629, | |
| "grad_norm": 1.8049751133730751e-06, | |
| "learning_rate": 7.780178057083008e-06, | |
| "loss": 0.0002, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 2.5432050274941083, | |
| "grad_norm": 2.639750891830772e-05, | |
| "learning_rate": 7.616522649908353e-06, | |
| "loss": 0.0002, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 2.5530243519245874, | |
| "grad_norm": 0.004915285389870405, | |
| "learning_rate": 7.452867242733701e-06, | |
| "loss": 0.0003, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 2.562843676355067, | |
| "grad_norm": 0.0013427536468952894, | |
| "learning_rate": 7.289211835559047e-06, | |
| "loss": 0.0002, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 2.572663000785546, | |
| "grad_norm": 0.006909032352268696, | |
| "learning_rate": 7.1255564283843945e-06, | |
| "loss": 0.0002, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 2.5824823252160254, | |
| "grad_norm": 0.004452712833881378, | |
| "learning_rate": 6.961901021209741e-06, | |
| "loss": 0.0002, | |
| "step": 13150 | |
| }, | |
| { | |
| "epoch": 2.5923016496465046, | |
| "grad_norm": 1.5004067108748131e-06, | |
| "learning_rate": 6.798245614035088e-06, | |
| "loss": 0.0002, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 2.6021209740769837, | |
| "grad_norm": 9.272382158087566e-06, | |
| "learning_rate": 6.634590206860435e-06, | |
| "loss": 0.0002, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 2.611940298507463, | |
| "grad_norm": 0.004868045449256897, | |
| "learning_rate": 6.470934799685782e-06, | |
| "loss": 0.0002, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 2.621759622937942, | |
| "grad_norm": 6.646020665357355e-06, | |
| "learning_rate": 6.307279392511129e-06, | |
| "loss": 0.0002, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 2.6315789473684212, | |
| "grad_norm": 0.0015008870977908373, | |
| "learning_rate": 6.143623985336476e-06, | |
| "loss": 0.0003, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 2.6413982717989004, | |
| "grad_norm": 0.0031040972098708153, | |
| "learning_rate": 5.979968578161823e-06, | |
| "loss": 0.0003, | |
| "step": 13450 | |
| }, | |
| { | |
| "epoch": 2.6512175962293796, | |
| "grad_norm": 0.0015553946141153574, | |
| "learning_rate": 5.81631317098717e-06, | |
| "loss": 0.0003, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 2.6610369206598588, | |
| "grad_norm": 0.0015114744892343879, | |
| "learning_rate": 5.652657763812517e-06, | |
| "loss": 0.0002, | |
| "step": 13550 | |
| }, | |
| { | |
| "epoch": 2.670856245090338, | |
| "grad_norm": 0.006511743646115065, | |
| "learning_rate": 5.489002356637864e-06, | |
| "loss": 0.0002, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 2.680675569520817, | |
| "grad_norm": 3.6607066249416675e-06, | |
| "learning_rate": 5.3253469494632106e-06, | |
| "loss": 0.0002, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 2.6904948939512963, | |
| "grad_norm": 0.0008585217874497175, | |
| "learning_rate": 5.161691542288558e-06, | |
| "loss": 0.0003, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 2.7003142183817754, | |
| "grad_norm": 0.0005489959730766714, | |
| "learning_rate": 4.9980361351139045e-06, | |
| "loss": 0.0003, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 2.7101335428122546, | |
| "grad_norm": 3.3158432870550314e-06, | |
| "learning_rate": 4.834380727939251e-06, | |
| "loss": 0.0002, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 2.7199528672427338, | |
| "grad_norm": 0.0030354801565408707, | |
| "learning_rate": 4.670725320764598e-06, | |
| "loss": 0.0003, | |
| "step": 13850 | |
| }, | |
| { | |
| "epoch": 2.729772191673213, | |
| "grad_norm": 3.2985217330860905e-06, | |
| "learning_rate": 4.507069913589945e-06, | |
| "loss": 0.0002, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 2.739591516103692, | |
| "grad_norm": 7.288464257726446e-05, | |
| "learning_rate": 4.343414506415292e-06, | |
| "loss": 0.0002, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 2.7494108405341713, | |
| "grad_norm": 0.0043890466913580894, | |
| "learning_rate": 4.179759099240639e-06, | |
| "loss": 0.0003, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 2.7592301649646505, | |
| "grad_norm": 0.0019504046067595482, | |
| "learning_rate": 4.016103692065986e-06, | |
| "loss": 0.0002, | |
| "step": 14050 | |
| }, | |
| { | |
| "epoch": 2.7690494893951296, | |
| "grad_norm": 0.0015608868561685085, | |
| "learning_rate": 3.852448284891333e-06, | |
| "loss": 0.0003, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 2.778868813825609, | |
| "grad_norm": 0.002886673668399453, | |
| "learning_rate": 3.6887928777166797e-06, | |
| "loss": 0.0002, | |
| "step": 14150 | |
| }, | |
| { | |
| "epoch": 2.788688138256088, | |
| "grad_norm": 0.0009562448249198496, | |
| "learning_rate": 3.5251374705420267e-06, | |
| "loss": 0.0003, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 2.798507462686567, | |
| "grad_norm": 2.2277029074757593e-06, | |
| "learning_rate": 3.3614820633673736e-06, | |
| "loss": 0.0003, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 2.8083267871170463, | |
| "grad_norm": 0.0023437589406967163, | |
| "learning_rate": 3.1978266561927206e-06, | |
| "loss": 0.0002, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 2.8181461115475255, | |
| "grad_norm": 1.8503330920793815e-06, | |
| "learning_rate": 3.034171249018068e-06, | |
| "loss": 0.0002, | |
| "step": 14350 | |
| }, | |
| { | |
| "epoch": 2.8279654359780046, | |
| "grad_norm": 4.966601863998221e-06, | |
| "learning_rate": 2.870515841843415e-06, | |
| "loss": 0.0002, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 2.837784760408484, | |
| "grad_norm": 1.3409814982878743e-06, | |
| "learning_rate": 2.706860434668762e-06, | |
| "loss": 0.0002, | |
| "step": 14450 | |
| }, | |
| { | |
| "epoch": 2.847604084838963, | |
| "grad_norm": 6.203641532920301e-05, | |
| "learning_rate": 2.543205027494109e-06, | |
| "loss": 0.0002, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 2.857423409269442, | |
| "grad_norm": 0.0002616413403302431, | |
| "learning_rate": 2.3795496203194553e-06, | |
| "loss": 0.0003, | |
| "step": 14550 | |
| }, | |
| { | |
| "epoch": 2.8672427336999213, | |
| "grad_norm": 6.303464488155441e-07, | |
| "learning_rate": 2.2158942131448023e-06, | |
| "loss": 0.0002, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 2.8770620581304005, | |
| "grad_norm": 0.0017262320034205914, | |
| "learning_rate": 2.0522388059701493e-06, | |
| "loss": 0.0003, | |
| "step": 14650 | |
| }, | |
| { | |
| "epoch": 2.8868813825608797, | |
| "grad_norm": 0.0033360267989337444, | |
| "learning_rate": 1.8885833987954962e-06, | |
| "loss": 0.0003, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 2.896700706991359, | |
| "grad_norm": 2.7147841592523037e-06, | |
| "learning_rate": 1.7249279916208432e-06, | |
| "loss": 0.0002, | |
| "step": 14750 | |
| }, | |
| { | |
| "epoch": 2.906520031421838, | |
| "grad_norm": 0.0019037205493077636, | |
| "learning_rate": 1.5612725844461901e-06, | |
| "loss": 0.0002, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 2.916339355852317, | |
| "grad_norm": 0.00883456040173769, | |
| "learning_rate": 1.397617177271537e-06, | |
| "loss": 0.0003, | |
| "step": 14850 | |
| }, | |
| { | |
| "epoch": 2.9261586802827964, | |
| "grad_norm": 0.000769551086705178, | |
| "learning_rate": 1.233961770096884e-06, | |
| "loss": 0.0003, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 2.9359780047132755, | |
| "grad_norm": 1.074038755177753e-05, | |
| "learning_rate": 1.070306362922231e-06, | |
| "loss": 0.0002, | |
| "step": 14950 | |
| }, | |
| { | |
| "epoch": 2.9457973291437547, | |
| "grad_norm": 0.0046028513461351395, | |
| "learning_rate": 9.066509557475779e-07, | |
| "loss": 0.0003, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 2.955616653574234, | |
| "grad_norm": 0.0024120802991092205, | |
| "learning_rate": 7.429955485729249e-07, | |
| "loss": 0.0002, | |
| "step": 15050 | |
| }, | |
| { | |
| "epoch": 2.965435978004713, | |
| "grad_norm": 0.0019912375137209892, | |
| "learning_rate": 5.793401413982719e-07, | |
| "loss": 0.0003, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 2.975255302435192, | |
| "grad_norm": 0.0013518768828362226, | |
| "learning_rate": 4.1568473422361876e-07, | |
| "loss": 0.0003, | |
| "step": 15150 | |
| }, | |
| { | |
| "epoch": 2.9850746268656714, | |
| "grad_norm": 5.804280954180285e-05, | |
| "learning_rate": 2.520293270489657e-07, | |
| "loss": 0.0003, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 2.994893951296151, | |
| "grad_norm": 0.0008266063523478806, | |
| "learning_rate": 8.837391987431265e-08, | |
| "loss": 0.0002, | |
| "step": 15250 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 15276, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.72042823237632e+16, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |