| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.954887218045113, |
| "eval_steps": 500, |
| "global_step": 260, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.015037593984962405, |
| "grad_norm": 0.4758685231208801, |
| "learning_rate": 7.407407407407407e-07, |
| "loss": 1.9719613790512085, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.03007518796992481, |
| "grad_norm": 1.2873609066009521, |
| "learning_rate": 2.222222222222222e-06, |
| "loss": 2.2840397357940674, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.045112781954887216, |
| "grad_norm": 0.4226410984992981, |
| "learning_rate": 3.7037037037037037e-06, |
| "loss": 2.0337564945220947, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.06015037593984962, |
| "grad_norm": 0.20389820635318756, |
| "learning_rate": 5.185185185185185e-06, |
| "loss": 1.938320517539978, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.07518796992481203, |
| "grad_norm": 1.1121097803115845, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 1.9521509408950806, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.09022556390977443, |
| "grad_norm": 1.0452852249145508, |
| "learning_rate": 8.148148148148148e-06, |
| "loss": 2.3038036823272705, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.10526315789473684, |
| "grad_norm": 0.17127464711666107, |
| "learning_rate": 9.62962962962963e-06, |
| "loss": 1.9031141996383667, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.12030075187969924, |
| "grad_norm": 1.0378482341766357, |
| "learning_rate": 1.1111111111111113e-05, |
| "loss": 2.545740842819214, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.13533834586466165, |
| "grad_norm": 1.3770403861999512, |
| "learning_rate": 1.2592592592592593e-05, |
| "loss": 3.2497336864471436, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.15037593984962405, |
| "grad_norm": 0.6125630140304565, |
| "learning_rate": 1.4074074074074075e-05, |
| "loss": 1.7776570320129395, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.16541353383458646, |
| "grad_norm": 0.6343645453453064, |
| "learning_rate": 1.555555555555556e-05, |
| "loss": 1.9716706275939941, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.18045112781954886, |
| "grad_norm": 0.6397086977958679, |
| "learning_rate": 1.7037037037037038e-05, |
| "loss": 1.9381436109542847, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.19548872180451127, |
| "grad_norm": 1.0100289583206177, |
| "learning_rate": 1.851851851851852e-05, |
| "loss": 1.9333921670913696, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.21052631578947367, |
| "grad_norm": 0.6244422793388367, |
| "learning_rate": 2e-05, |
| "loss": 1.7134058475494385, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.22556390977443608, |
| "grad_norm": 1.4532567262649536, |
| "learning_rate": 1.9994079505294254e-05, |
| "loss": 2.0251219272613525, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.24060150375939848, |
| "grad_norm": 0.5191428661346436, |
| "learning_rate": 1.9976326268767035e-05, |
| "loss": 1.578896164894104, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.2556390977443609, |
| "grad_norm": 0.2822546064853668, |
| "learning_rate": 1.994676502169901e-05, |
| "loss": 1.5274699926376343, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.2706766917293233, |
| "grad_norm": 0.26378244161605835, |
| "learning_rate": 1.9905436944609424e-05, |
| "loss": 1.510546326637268, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.2857142857142857, |
| "grad_norm": 2.3758437633514404, |
| "learning_rate": 1.9852399609889242e-05, |
| "loss": 2.110567331314087, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.3007518796992481, |
| "grad_norm": 0.419758677482605, |
| "learning_rate": 1.9787726901599502e-05, |
| "loss": 1.1862285137176514, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.3157894736842105, |
| "grad_norm": 0.24496647715568542, |
| "learning_rate": 1.9711508912546566e-05, |
| "loss": 1.440342903137207, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.3308270676691729, |
| "grad_norm": 0.3002743721008301, |
| "learning_rate": 1.9623851818777652e-05, |
| "loss": 1.0962785482406616, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.3458646616541353, |
| "grad_norm": 0.11880263686180115, |
| "learning_rate": 1.9524877731671482e-05, |
| "loss": 1.493391513824463, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.3609022556390977, |
| "grad_norm": 1.7871476411819458, |
| "learning_rate": 1.941472452783011e-05, |
| "loss": 1.2433573007583618, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.37593984962406013, |
| "grad_norm": 0.2921432852745056, |
| "learning_rate": 1.9293545657008865e-05, |
| "loss": 1.1390293836593628, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.39097744360902253, |
| "grad_norm": 0.12673601508140564, |
| "learning_rate": 1.9161509928352017e-05, |
| "loss": 1.0903499126434326, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.40601503759398494, |
| "grad_norm": 0.1648157387971878, |
| "learning_rate": 1.901880127523192e-05, |
| "loss": 1.1544872522354126, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.42105263157894735, |
| "grad_norm": 0.08962542563676834, |
| "learning_rate": 1.886561849901922e-05, |
| "loss": 1.0822100639343262, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.43609022556390975, |
| "grad_norm": 0.724937379360199, |
| "learning_rate": 1.870217499214111e-05, |
| "loss": 1.0416043996810913, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.45112781954887216, |
| "grad_norm": 0.19841401278972626, |
| "learning_rate": 1.8528698440813397e-05, |
| "loss": 1.1815505027770996, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.46616541353383456, |
| "grad_norm": 0.2616511285305023, |
| "learning_rate": 1.8345430507860478e-05, |
| "loss": 0.9047210812568665, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.48120300751879697, |
| "grad_norm": 0.22353343665599823, |
| "learning_rate": 1.8152626496065128e-05, |
| "loss": 1.201892614364624, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.49624060150375937, |
| "grad_norm": 0.6634237766265869, |
| "learning_rate": 1.7950554992517014e-05, |
| "loss": 1.1795772314071655, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.5112781954887218, |
| "grad_norm": 0.13962750136852264, |
| "learning_rate": 1.7739497494455412e-05, |
| "loss": 1.3534270524978638, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.5263157894736842, |
| "grad_norm": 0.665572464466095, |
| "learning_rate": 1.7519748017127354e-05, |
| "loss": 1.125345230102539, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.5413533834586466, |
| "grad_norm": 0.179820716381073, |
| "learning_rate": 1.729161268420746e-05, |
| "loss": 0.8090606331825256, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.556390977443609, |
| "grad_norm": 0.11726067215204239, |
| "learning_rate": 1.7055409301350013e-05, |
| "loss": 0.9213717579841614, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.5714285714285714, |
| "grad_norm": 0.1627010852098465, |
| "learning_rate": 1.681146691346742e-05, |
| "loss": 1.0821505784988403, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.5864661654135338, |
| "grad_norm": 0.1170893982052803, |
| "learning_rate": 1.6560125346351663e-05, |
| "loss": 1.300316333770752, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.6015037593984962, |
| "grad_norm": 0.5334263443946838, |
| "learning_rate": 1.6301734733277442e-05, |
| "loss": 0.6484270691871643, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.6165413533834586, |
| "grad_norm": 0.101778045296669, |
| "learning_rate": 1.603665502724633e-05, |
| "loss": 1.2577356100082397, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.631578947368421, |
| "grad_norm": 0.1670590043067932, |
| "learning_rate": 1.576525549955156e-05, |
| "loss": 1.3543009757995605, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.6466165413533834, |
| "grad_norm": 0.20087628066539764, |
| "learning_rate": 1.548791422536178e-05, |
| "loss": 0.9327285885810852, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.6616541353383458, |
| "grad_norm": 0.23011328279972076, |
| "learning_rate": 1.5205017557040656e-05, |
| "loss": 1.1237722635269165, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.6766917293233082, |
| "grad_norm": 0.159201517701149, |
| "learning_rate": 1.4916959585935732e-05, |
| "loss": 1.1964070796966553, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.6917293233082706, |
| "grad_norm": 0.44444212317466736, |
| "learning_rate": 1.4624141593386507e-05, |
| "loss": 1.0308165550231934, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.706766917293233, |
| "grad_norm": 0.22338902950286865, |
| "learning_rate": 1.4326971491716427e-05, |
| "loss": 0.9982426762580872, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.7218045112781954, |
| "grad_norm": 0.2096806913614273, |
| "learning_rate": 1.402586325598752e-05, |
| "loss": 1.3940727710723877, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.7368421052631579, |
| "grad_norm": 0.21466873586177826, |
| "learning_rate": 1.3721236347309314e-05, |
| "loss": 1.1801196336746216, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.7518796992481203, |
| "grad_norm": 0.30590999126434326, |
| "learning_rate": 1.3413515128505363e-05, |
| "loss": 0.6430416703224182, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.7669172932330827, |
| "grad_norm": 0.13401509821414948, |
| "learning_rate": 1.3103128272951363e-05, |
| "loss": 1.3783133029937744, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.7819548872180451, |
| "grad_norm": 0.12506185472011566, |
| "learning_rate": 1.2790508167408509e-05, |
| "loss": 0.9889219403266907, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.7969924812030075, |
| "grad_norm": 0.22044184803962708, |
| "learning_rate": 1.2476090309683804e-05, |
| "loss": 0.6871194243431091, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.8120300751879699, |
| "grad_norm": 0.18642327189445496, |
| "learning_rate": 1.2160312701956553e-05, |
| "loss": 1.0068978071212769, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.8270676691729323, |
| "grad_norm": 0.5397107601165771, |
| "learning_rate": 1.1843615240616111e-05, |
| "loss": 0.8988245725631714, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.8421052631578947, |
| "grad_norm": 0.23860491812229156, |
| "learning_rate": 1.1526439103460874e-05, |
| "loss": 0.7688661813735962, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.8571428571428571, |
| "grad_norm": 0.1502920240163803, |
| "learning_rate": 1.120922613511221e-05, |
| "loss": 1.0024021863937378, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.8721804511278195, |
| "grad_norm": 0.16608496010303497, |
| "learning_rate": 1.0892418231499461e-05, |
| "loss": 1.4191375970840454, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.8872180451127819, |
| "grad_norm": 0.17546993494033813, |
| "learning_rate": 1.057645672427347e-05, |
| "loss": 1.073761224746704, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.9022556390977443, |
| "grad_norm": 5.0469841957092285, |
| "learning_rate": 1.0261781766006174e-05, |
| "loss": 0.9056495428085327, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.9172932330827067, |
| "grad_norm": 0.36213457584381104, |
| "learning_rate": 9.948831717032738e-06, |
| "loss": 0.8672894835472107, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.9323308270676691, |
| "grad_norm": 0.4218904972076416, |
| "learning_rate": 9.638042534790373e-06, |
| "loss": 0.827739417552948, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.9473684210526315, |
| "grad_norm": 0.0983240008354187, |
| "learning_rate": 9.329847166504497e-06, |
| "loss": 0.7799423933029175, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.9624060150375939, |
| "grad_norm": 0.14850644767284393, |
| "learning_rate": 9.024674946068357e-06, |
| "loss": 1.0653791427612305, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.9774436090225563, |
| "grad_norm": 0.27504855394363403, |
| "learning_rate": 8.722950995956172e-06, |
| "loss": 0.9135006666183472, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.9924812030075187, |
| "grad_norm": 0.27222368121147156, |
| "learning_rate": 8.425095635003053e-06, |
| "loss": 1.0402815341949463, |
| "step": 132 |
| }, |
| { |
| "epoch": 1.0075187969924813, |
| "grad_norm": 0.1408149003982544, |
| "learning_rate": 8.13152379287667e-06, |
| "loss": 0.7629735469818115, |
| "step": 134 |
| }, |
| { |
| "epoch": 1.0225563909774436, |
| "grad_norm": 0.15156933665275574, |
| "learning_rate": 7.842644432056336e-06, |
| "loss": 0.9513287544250488, |
| "step": 136 |
| }, |
| { |
| "epoch": 1.037593984962406, |
| "grad_norm": 0.9048015475273132, |
| "learning_rate": 7.55885997812472e-06, |
| "loss": 0.6946667432785034, |
| "step": 138 |
| }, |
| { |
| "epoch": 1.0526315789473684, |
| "grad_norm": 0.24624674022197723, |
| "learning_rate": 7.280565759165833e-06, |
| "loss": 0.7876754403114319, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.0676691729323309, |
| "grad_norm": 0.09362401068210602, |
| "learning_rate": 7.008149455050264e-06, |
| "loss": 0.8406305909156799, |
| "step": 142 |
| }, |
| { |
| "epoch": 1.0827067669172932, |
| "grad_norm": 0.17641493678092957, |
| "learning_rate": 6.741990557374784e-06, |
| "loss": 1.0424906015396118, |
| "step": 144 |
| }, |
| { |
| "epoch": 1.0977443609022557, |
| "grad_norm": 0.12022742629051208, |
| "learning_rate": 6.4824598408087015e-06, |
| "loss": 1.0722497701644897, |
| "step": 146 |
| }, |
| { |
| "epoch": 1.112781954887218, |
| "grad_norm": 0.08780567348003387, |
| "learning_rate": 6.229918846583414e-06, |
| "loss": 1.0312786102294922, |
| "step": 148 |
| }, |
| { |
| "epoch": 1.1278195488721805, |
| "grad_norm": 0.3304075598716736, |
| "learning_rate": 5.984719378844628e-06, |
| "loss": 1.0075746774673462, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.1428571428571428, |
| "grad_norm": 0.10900267213582993, |
| "learning_rate": 5.7472030145689604e-06, |
| "loss": 1.0427347421646118, |
| "step": 152 |
| }, |
| { |
| "epoch": 1.1578947368421053, |
| "grad_norm": 0.09693319350481033, |
| "learning_rate": 5.51770062772752e-06, |
| "loss": 1.0927042961120605, |
| "step": 154 |
| }, |
| { |
| "epoch": 1.1729323308270676, |
| "grad_norm": 0.12883000075817108, |
| "learning_rate": 5.296531928359431e-06, |
| "loss": 0.9705473780632019, |
| "step": 156 |
| }, |
| { |
| "epoch": 1.1879699248120301, |
| "grad_norm": 0.11416517943143845, |
| "learning_rate": 5.084005017197318e-06, |
| "loss": 1.0172467231750488, |
| "step": 158 |
| }, |
| { |
| "epoch": 1.2030075187969924, |
| "grad_norm": 0.10797861963510513, |
| "learning_rate": 4.8804159564652665e-06, |
| "loss": 0.5409541726112366, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.218045112781955, |
| "grad_norm": 0.24861538410186768, |
| "learning_rate": 4.686048357447095e-06, |
| "loss": 0.9430153965950012, |
| "step": 162 |
| }, |
| { |
| "epoch": 1.2330827067669172, |
| "grad_norm": 0.14524255692958832, |
| "learning_rate": 4.501172985399498e-06, |
| "loss": 1.178081750869751, |
| "step": 164 |
| }, |
| { |
| "epoch": 1.2481203007518797, |
| "grad_norm": 0.2940176725387573, |
| "learning_rate": 4.326047382360457e-06, |
| "loss": 0.8844167590141296, |
| "step": 166 |
| }, |
| { |
| "epoch": 1.263157894736842, |
| "grad_norm": 0.1306038349866867, |
| "learning_rate": 4.160915508378359e-06, |
| "loss": 0.7063813209533691, |
| "step": 168 |
| }, |
| { |
| "epoch": 1.2781954887218046, |
| "grad_norm": 0.15677547454833984, |
| "learning_rate": 4.006007401661596e-06, |
| "loss": 0.7762787938117981, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.2932330827067668, |
| "grad_norm": 0.14480236172676086, |
| "learning_rate": 3.861538858122092e-06, |
| "loss": 0.937335193157196, |
| "step": 172 |
| }, |
| { |
| "epoch": 1.3082706766917294, |
| "grad_norm": 0.15142542123794556, |
| "learning_rate": 3.727711130759182e-06, |
| "loss": 0.9747655987739563, |
| "step": 174 |
| }, |
| { |
| "epoch": 1.3233082706766917, |
| "grad_norm": 0.18726477026939392, |
| "learning_rate": 3.6047106493025923e-06, |
| "loss": 0.7746855020523071, |
| "step": 176 |
| }, |
| { |
| "epoch": 1.3383458646616542, |
| "grad_norm": 2.189209461212158, |
| "learning_rate": 3.492708760505093e-06, |
| "loss": 0.6926825642585754, |
| "step": 178 |
| }, |
| { |
| "epoch": 1.3533834586466165, |
| "grad_norm": 0.12323262542486191, |
| "learning_rate": 3.3918614894466045e-06, |
| "loss": 1.2502151727676392, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.368421052631579, |
| "grad_norm": 0.1458672434091568, |
| "learning_rate": 3.3023093221822746e-06, |
| "loss": 0.9960780143737793, |
| "step": 182 |
| }, |
| { |
| "epoch": 1.3834586466165413, |
| "grad_norm": 0.1072113886475563, |
| "learning_rate": 3.224177010037323e-06, |
| "loss": 0.8326720595359802, |
| "step": 184 |
| }, |
| { |
| "epoch": 1.3984962406015038, |
| "grad_norm": 0.09054487943649292, |
| "learning_rate": 3.1575733958212563e-06, |
| "loss": 1.1920455694198608, |
| "step": 186 |
| }, |
| { |
| "epoch": 1.413533834586466, |
| "grad_norm": 0.13850678503513336, |
| "learning_rate": 3.1025912622035687e-06, |
| "loss": 0.5979896783828735, |
| "step": 188 |
| }, |
| { |
| "epoch": 1.4285714285714286, |
| "grad_norm": 0.2317809760570526, |
| "learning_rate": 3.0593072024621396e-06, |
| "loss": 0.969947099685669, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.443609022556391, |
| "grad_norm": 0.29049012064933777, |
| "learning_rate": 3.0277815137843917e-06, |
| "loss": 0.7010709643363953, |
| "step": 192 |
| }, |
| { |
| "epoch": 1.4586466165413534, |
| "grad_norm": 0.17280922830104828, |
| "learning_rate": 3.008058113269836e-06, |
| "loss": 0.7660905718803406, |
| "step": 194 |
| }, |
| { |
| "epoch": 1.4736842105263157, |
| "grad_norm": 0.12446308135986328, |
| "learning_rate": 3.0001644767510154e-06, |
| "loss": 0.9958880543708801, |
| "step": 196 |
| }, |
| { |
| "epoch": 1.4887218045112782, |
| "grad_norm": 0.1736506223678589, |
| "learning_rate": 3.0041116005181016e-06, |
| "loss": 0.9368724226951599, |
| "step": 198 |
| }, |
| { |
| "epoch": 1.5037593984962405, |
| "grad_norm": 0.15870751440525055, |
| "learning_rate": 3.0198939860004202e-06, |
| "loss": 0.9826479554176331, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.518796992481203, |
| "grad_norm": 0.15492001175880432, |
| "learning_rate": 3.0474896474262772e-06, |
| "loss": 1.3954254388809204, |
| "step": 202 |
| }, |
| { |
| "epoch": 1.5338345864661656, |
| "grad_norm": 0.1334741860628128, |
| "learning_rate": 3.08686014245041e-06, |
| "loss": 1.0462982654571533, |
| "step": 204 |
| }, |
| { |
| "epoch": 1.5488721804511278, |
| "grad_norm": 0.26873454451560974, |
| "learning_rate": 3.1379506257063825e-06, |
| "loss": 0.8277729153633118, |
| "step": 206 |
| }, |
| { |
| "epoch": 1.5639097744360901, |
| "grad_norm": 0.16168057918548584, |
| "learning_rate": 3.20068992520934e-06, |
| "loss": 0.985795259475708, |
| "step": 208 |
| }, |
| { |
| "epoch": 1.5789473684210527, |
| "grad_norm": 0.1431337147951126, |
| "learning_rate": 3.274990641502683e-06, |
| "loss": 0.908364474773407, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.5939849624060152, |
| "grad_norm": 0.2742830514907837, |
| "learning_rate": 3.3607492694105405e-06, |
| "loss": 0.6370347142219543, |
| "step": 212 |
| }, |
| { |
| "epoch": 1.6090225563909775, |
| "grad_norm": 0.14003214240074158, |
| "learning_rate": 3.457846342226442e-06, |
| "loss": 0.9584491848945618, |
| "step": 214 |
| }, |
| { |
| "epoch": 1.6240601503759398, |
| "grad_norm": 0.29483214020729065, |
| "learning_rate": 3.5661465981373183e-06, |
| "loss": 1.0774601697921753, |
| "step": 216 |
| }, |
| { |
| "epoch": 1.6390977443609023, |
| "grad_norm": 0.18227490782737732, |
| "learning_rate": 3.6854991686509906e-06, |
| "loss": 0.7987947463989258, |
| "step": 218 |
| }, |
| { |
| "epoch": 1.6541353383458648, |
| "grad_norm": 0.20908379554748535, |
| "learning_rate": 3.815737788764674e-06, |
| "loss": 0.8292507529258728, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.669172932330827, |
| "grad_norm": 0.23883360624313354, |
| "learning_rate": 3.956681028581693e-06, |
| "loss": 0.9586336612701416, |
| "step": 222 |
| }, |
| { |
| "epoch": 1.6842105263157894, |
| "grad_norm": 0.3358386158943176, |
| "learning_rate": 4.108132546053779e-06, |
| "loss": 0.8150299191474915, |
| "step": 224 |
| }, |
| { |
| "epoch": 1.699248120300752, |
| "grad_norm": 0.298909068107605, |
| "learning_rate": 4.269881360496842e-06, |
| "loss": 1.0946331024169922, |
| "step": 226 |
| }, |
| { |
| "epoch": 1.7142857142857144, |
| "grad_norm": 0.24479779601097107, |
| "learning_rate": 4.441702146499222e-06, |
| "loss": 0.9061790108680725, |
| "step": 228 |
| }, |
| { |
| "epoch": 1.7293233082706767, |
| "grad_norm": 0.2154252976179123, |
| "learning_rate": 4.623355547812946e-06, |
| "loss": 1.0011441707611084, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.744360902255639, |
| "grad_norm": 0.1481187492609024, |
| "learning_rate": 4.814588510790782e-06, |
| "loss": 1.1533119678497314, |
| "step": 232 |
| }, |
| { |
| "epoch": 1.7593984962406015, |
| "grad_norm": 0.10200405865907669, |
| "learning_rate": 5.01513463690452e-06, |
| "loss": 1.1073795557022095, |
| "step": 234 |
| }, |
| { |
| "epoch": 1.774436090225564, |
| "grad_norm": 0.19221612811088562, |
| "learning_rate": 5.224714553853478e-06, |
| "loss": 1.148139476776123, |
| "step": 236 |
| }, |
| { |
| "epoch": 1.7894736842105263, |
| "grad_norm": 0.1456657350063324, |
| "learning_rate": 5.443036304746191e-06, |
| "loss": 0.6271846294403076, |
| "step": 238 |
| }, |
| { |
| "epoch": 1.8045112781954886, |
| "grad_norm": 0.3602541983127594, |
| "learning_rate": 5.66979575481317e-06, |
| "loss": 1.0237879753112793, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.8195488721804511, |
| "grad_norm": 0.14290253818035126, |
| "learning_rate": 5.904677015084159e-06, |
| "loss": 0.7585715055465698, |
| "step": 242 |
| }, |
| { |
| "epoch": 1.8345864661654137, |
| "grad_norm": 0.15334200859069824, |
| "learning_rate": 6.147352882439652e-06, |
| "loss": 1.1932705640792847, |
| "step": 244 |
| }, |
| { |
| "epoch": 1.849624060150376, |
| "grad_norm": 0.09177304059267044, |
| "learning_rate": 6.397485295423669e-06, |
| "loss": 1.1568275690078735, |
| "step": 246 |
| }, |
| { |
| "epoch": 1.8646616541353382, |
| "grad_norm": 0.08284302055835724, |
| "learning_rate": 6.6547258051828426e-06, |
| "loss": 0.9782670736312866, |
| "step": 248 |
| }, |
| { |
| "epoch": 1.8796992481203008, |
| "grad_norm": 0.12089983373880386, |
| "learning_rate": 6.918716060875743e-06, |
| "loss": 1.2261128425598145, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.8947368421052633, |
| "grad_norm": 0.1200575977563858, |
| "learning_rate": 7.1890883088761885e-06, |
| "loss": 1.0725328922271729, |
| "step": 252 |
| }, |
| { |
| "epoch": 1.9097744360902256, |
| "grad_norm": 0.2572805881500244, |
| "learning_rate": 7.4654659050752845e-06, |
| "loss": 1.2271314859390259, |
| "step": 254 |
| }, |
| { |
| "epoch": 1.9248120300751879, |
| "grad_norm": 0.2713569104671478, |
| "learning_rate": 7.747463839568292e-06, |
| "loss": 0.8813698291778564, |
| "step": 256 |
| }, |
| { |
| "epoch": 1.9398496240601504, |
| "grad_norm": 0.5769055485725403, |
| "learning_rate": 8.034689272995649e-06, |
| "loss": 0.5630529522895813, |
| "step": 258 |
| }, |
| { |
| "epoch": 1.954887218045113, |
| "grad_norm": 0.13255445659160614, |
| "learning_rate": 8.32674208379076e-06, |
| "loss": 1.0115076303482056, |
| "step": 260 |
| } |
| ], |
| "logging_steps": 2, |
| "max_steps": 532, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 4, |
| "save_steps": 260, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.8566094440628224e+18, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|