| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9963379544860058, |
| "eval_steps": 500, |
| "global_step": 954, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.020925974365681402, |
| "grad_norm": 0.5565130669449413, |
| "learning_rate": 2.0833333333333334e-06, |
| "loss": 0.0288, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.041851948731362804, |
| "grad_norm": 0.18606951178093903, |
| "learning_rate": 4.166666666666667e-06, |
| "loss": 0.0118, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0627779230970442, |
| "grad_norm": 0.03950245765434584, |
| "learning_rate": 6.25e-06, |
| "loss": 0.0039, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.08370389746272561, |
| "grad_norm": 0.03819057800914349, |
| "learning_rate": 8.333333333333334e-06, |
| "loss": 0.0029, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.10462987182840701, |
| "grad_norm": 0.0544544775809417, |
| "learning_rate": 1.0416666666666668e-05, |
| "loss": 0.0028, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.1255558461940884, |
| "grad_norm": 0.017750069891728056, |
| "learning_rate": 1.25e-05, |
| "loss": 0.0024, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.14648182055976983, |
| "grad_norm": 0.025688827192495198, |
| "learning_rate": 1.4583333333333333e-05, |
| "loss": 0.0021, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.16740779492545121, |
| "grad_norm": 0.009892162090487124, |
| "learning_rate": 1.6666666666666667e-05, |
| "loss": 0.0013, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.18833376929113263, |
| "grad_norm": 0.013992833360284824, |
| "learning_rate": 1.8750000000000002e-05, |
| "loss": 0.0006, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.20925974365681402, |
| "grad_norm": 0.009570194364057852, |
| "learning_rate": 1.9998927475076107e-05, |
| "loss": 0.0029, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.23018571802249543, |
| "grad_norm": 0.011885486768552911, |
| "learning_rate": 1.998686421164407e-05, |
| "loss": 0.0019, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.2511116923881768, |
| "grad_norm": 0.00535124159298834, |
| "learning_rate": 1.9961413253717214e-05, |
| "loss": 0.0007, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.2720376667538582, |
| "grad_norm": 0.014087761849564343, |
| "learning_rate": 1.9922608719076874e-05, |
| "loss": 0.002, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.29296364111953965, |
| "grad_norm": 0.014243222134916782, |
| "learning_rate": 1.9870502626379127e-05, |
| "loss": 0.0011, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.31388961548522104, |
| "grad_norm": 0.02081790679071402, |
| "learning_rate": 1.980516482542224e-05, |
| "loss": 0.0019, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.33481558985090243, |
| "grad_norm": 0.017403596614001808, |
| "learning_rate": 1.972668290351084e-05, |
| "loss": 0.0022, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.3557415642165838, |
| "grad_norm": 0.014181950638291016, |
| "learning_rate": 1.9635162068042547e-05, |
| "loss": 0.0013, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.37666753858226526, |
| "grad_norm": 0.004266630945852344, |
| "learning_rate": 1.9530725005474195e-05, |
| "loss": 0.0016, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.39759351294794665, |
| "grad_norm": 0.003584919486125673, |
| "learning_rate": 1.9413511716856973e-05, |
| "loss": 0.0017, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.41851948731362804, |
| "grad_norm": 0.010407187744097775, |
| "learning_rate": 1.9283679330160726e-05, |
| "loss": 0.0006, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.4394454616793094, |
| "grad_norm": 0.011994286058888401, |
| "learning_rate": 1.9141401889639167e-05, |
| "loss": 0.004, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.46037143604499087, |
| "grad_norm": 0.002559284420598525, |
| "learning_rate": 1.898687012251826e-05, |
| "loss": 0.0014, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.48129741041067226, |
| "grad_norm": 0.01635207542112704, |
| "learning_rate": 1.8820291183320602e-05, |
| "loss": 0.0035, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.5022233847763536, |
| "grad_norm": 0.019845090979002694, |
| "learning_rate": 1.8641888376168483e-05, |
| "loss": 0.0004, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.5231493591420351, |
| "grad_norm": 0.037240037127226344, |
| "learning_rate": 1.845190085543795e-05, |
| "loss": 0.0032, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.5440753335077164, |
| "grad_norm": 0.006653507161649646, |
| "learning_rate": 1.8250583305165098e-05, |
| "loss": 0.0009, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.5650013078733979, |
| "grad_norm": 0.6020150741013875, |
| "learning_rate": 1.8038205597634392e-05, |
| "loss": 0.0011, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.5859272822390793, |
| "grad_norm": 0.05961677548525192, |
| "learning_rate": 1.7815052431606702e-05, |
| "loss": 0.0042, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.6068532566047606, |
| "grad_norm": 0.03667937697835753, |
| "learning_rate": 1.7581422950671942e-05, |
| "loss": 0.0029, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.6277792309704421, |
| "grad_norm": 0.02203193747208106, |
| "learning_rate": 1.733763034223804e-05, |
| "loss": 0.0017, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.6487052053361234, |
| "grad_norm": 0.03047813653712161, |
| "learning_rate": 1.7084001417693702e-05, |
| "loss": 0.0013, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.6696311797018049, |
| "grad_norm": 0.02403172913701344, |
| "learning_rate": 1.682087617430782e-05, |
| "loss": 0.001, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.6905571540674863, |
| "grad_norm": 0.020885112433327038, |
| "learning_rate": 1.6548607339452853e-05, |
| "loss": 0.0009, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.7114831284331676, |
| "grad_norm": 0.02143325180735118, |
| "learning_rate": 1.626755989776303e-05, |
| "loss": 0.0007, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.7324091027988491, |
| "grad_norm": 0.028502523632724423, |
| "learning_rate": 1.5978110601861408e-05, |
| "loss": 0.0016, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.7533350771645305, |
| "grad_norm": 0.00832352415812105, |
| "learning_rate": 1.568064746731156e-05, |
| "loss": 0.0009, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.7742610515302119, |
| "grad_norm": 0.009527918206519135, |
| "learning_rate": 1.5375569252470897e-05, |
| "loss": 0.0018, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.7951870258958933, |
| "grad_norm": 0.025970668054977514, |
| "learning_rate": 1.506328492394303e-05, |
| "loss": 0.0006, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.8161130002615746, |
| "grad_norm": 0.026371283237352123, |
| "learning_rate": 1.4744213108345605e-05, |
| "loss": 0.0023, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.8370389746272561, |
| "grad_norm": 0.030148449665476192, |
| "learning_rate": 1.4418781531128636e-05, |
| "loss": 0.0021, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.8579649489929375, |
| "grad_norm": 0.009123367946997817, |
| "learning_rate": 1.4087426443195549e-05, |
| "loss": 0.0009, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.8788909233586188, |
| "grad_norm": 0.033901495227343266, |
| "learning_rate": 1.375059203609562e-05, |
| "loss": 0.0023, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.8998168977243003, |
| "grad_norm": 0.007725818014371622, |
| "learning_rate": 1.3408729846571716e-05, |
| "loss": 0.0006, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.9207428720899817, |
| "grad_norm": 0.01076304174801131, |
| "learning_rate": 1.3062298151261592e-05, |
| "loss": 0.0009, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.9416688464556631, |
| "grad_norm": 0.08792335870716929, |
| "learning_rate": 1.2711761352364172e-05, |
| "loss": 0.0016, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.9625948208213445, |
| "grad_norm": 0.019193508970073957, |
| "learning_rate": 1.2357589355094275e-05, |
| "loss": 0.0005, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.9835207951870258, |
| "grad_norm": 0.008676099200769616, |
| "learning_rate": 1.2000256937760446e-05, |
| "loss": 0.0019, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.0044467695527073, |
| "grad_norm": 0.0014044382775048332, |
| "learning_rate": 1.1640243115310219e-05, |
| "loss": 0.0013, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.0253727439183886, |
| "grad_norm": 0.0047799061066043395, |
| "learning_rate": 1.127803049719605e-05, |
| "loss": 0.0008, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.0462987182840702, |
| "grad_norm": 0.006651240484213586, |
| "learning_rate": 1.091410464042268e-05, |
| "loss": 0.0004, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.0672246926497515, |
| "grad_norm": 0.007370492651937019, |
| "learning_rate": 1.0548953398643276e-05, |
| "loss": 0.0002, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.0881506670154328, |
| "grad_norm": 0.004999091099852188, |
| "learning_rate": 1.0183066268176775e-05, |
| "loss": 0.0013, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.1090766413811144, |
| "grad_norm": 0.0067648135423557494, |
| "learning_rate": 9.81693373182323e-06, |
| "loss": 0.0004, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.1300026157467957, |
| "grad_norm": 0.006355010008632378, |
| "learning_rate": 9.451046601356725e-06, |
| "loss": 0.0005, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.150928590112477, |
| "grad_norm": 0.0019993701626922662, |
| "learning_rate": 9.085895359577324e-06, |
| "loss": 0.0012, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.1718545644781586, |
| "grad_norm": 0.07841604107285226, |
| "learning_rate": 8.721969502803954e-06, |
| "loss": 0.0007, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.19278053884384, |
| "grad_norm": 0.008878084655677647, |
| "learning_rate": 8.359756884689785e-06, |
| "loss": 0.0006, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.2137065132095213, |
| "grad_norm": 0.022433859996456522, |
| "learning_rate": 7.999743062239557e-06, |
| "loss": 0.001, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.2346324875752028, |
| "grad_norm": 0.007321094493119974, |
| "learning_rate": 7.642410644905726e-06, |
| "loss": 0.0002, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.2555584619408842, |
| "grad_norm": 0.004130666592411099, |
| "learning_rate": 7.2882386476358304e-06, |
| "loss": 0.0008, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.2764844363065655, |
| "grad_norm": 0.009136384794392362, |
| "learning_rate": 6.937701848738407e-06, |
| "loss": 0.0005, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.297410410672247, |
| "grad_norm": 0.014936959834984399, |
| "learning_rate": 6.591270153428288e-06, |
| "loss": 0.0015, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.3183363850379284, |
| "grad_norm": 0.00995968283183629, |
| "learning_rate": 6.249407963904381e-06, |
| "loss": 0.0003, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.3392623594036097, |
| "grad_norm": 0.007922264005744792, |
| "learning_rate": 5.912573556804453e-06, |
| "loss": 0.002, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.3601883337692913, |
| "grad_norm": 0.009732474218704226, |
| "learning_rate": 5.581218468871365e-06, |
| "loss": 0.0015, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.3811143081349726, |
| "grad_norm": 0.029322482465715245, |
| "learning_rate": 5.2557868916543996e-06, |
| "loss": 0.0004, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.402040282500654, |
| "grad_norm": 0.002219147933917833, |
| "learning_rate": 4.9367150760569746e-06, |
| "loss": 0.0013, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.4229662568663353, |
| "grad_norm": 0.016601760824032873, |
| "learning_rate": 4.6244307475291025e-06, |
| "loss": 0.0007, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.4438922312320168, |
| "grad_norm": 0.03618373662395389, |
| "learning_rate": 4.319352532688444e-06, |
| "loss": 0.0016, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.4648182055976982, |
| "grad_norm": 0.008705175714719602, |
| "learning_rate": 4.0218893981385935e-06, |
| "loss": 0.0009, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.4857441799633795, |
| "grad_norm": 0.0029313903115019873, |
| "learning_rate": 3.732440102236975e-06, |
| "loss": 0.0002, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.5066701543290608, |
| "grad_norm": 0.020710402409599028, |
| "learning_rate": 3.4513926605471504e-06, |
| "loss": 0.0007, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.5275961286947424, |
| "grad_norm": 0.03507089834475249, |
| "learning_rate": 3.1791238256921785e-06, |
| "loss": 0.0021, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.5485221030604237, |
| "grad_norm": 0.07040984119700786, |
| "learning_rate": 2.9159985823062997e-06, |
| "loss": 0.0012, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.569448077426105, |
| "grad_norm": 0.0046045007369338465, |
| "learning_rate": 2.662369657761963e-06, |
| "loss": 0.0005, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.5903740517917866, |
| "grad_norm": 0.044927829447691095, |
| "learning_rate": 2.418577049328058e-06, |
| "loss": 0.001, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.611300026157468, |
| "grad_norm": 0.005367586480852439, |
| "learning_rate": 2.1849475683932996e-06, |
| "loss": 0.0005, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.6322260005231493, |
| "grad_norm": 0.014755038810438978, |
| "learning_rate": 1.961794402365611e-06, |
| "loss": 0.0012, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.6531519748888308, |
| "grad_norm": 0.007586271120637286, |
| "learning_rate": 1.7494166948349057e-06, |
| "loss": 0.0023, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.6740779492545121, |
| "grad_norm": 0.01450560934843513, |
| "learning_rate": 1.5480991445620541e-06, |
| "loss": 0.001, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.6950039236201935, |
| "grad_norm": 0.007197310242295822, |
| "learning_rate": 1.3581116238315194e-06, |
| "loss": 0.0004, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.715929897985875, |
| "grad_norm": 0.011429665649141593, |
| "learning_rate": 1.1797088166794002e-06, |
| "loss": 0.0009, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.7368558723515564, |
| "grad_norm": 0.005479589436376736, |
| "learning_rate": 1.013129877481741e-06, |
| "loss": 0.0015, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.7577818467172377, |
| "grad_norm": 0.010284806366489312, |
| "learning_rate": 8.585981103608343e-07, |
| "loss": 0.0003, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.7787078210829192, |
| "grad_norm": 0.004805391506128972, |
| "learning_rate": 7.163206698392744e-07, |
| "loss": 0.0007, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.7996337954486006, |
| "grad_norm": 0.00832006380541767, |
| "learning_rate": 5.864882831430274e-07, |
| "loss": 0.0013, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.820559769814282, |
| "grad_norm": 0.0016488023765299498, |
| "learning_rate": 4.6927499452580574e-07, |
| "loss": 0.0004, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.8414857441799635, |
| "grad_norm": 0.010828047107950923, |
| "learning_rate": 3.6483793195745686e-07, |
| "loss": 0.002, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.8624117185456448, |
| "grad_norm": 0.00029272379323776047, |
| "learning_rate": 2.733170964891607e-07, |
| "loss": 0.0008, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.8833376929113261, |
| "grad_norm": 0.011245028659871065, |
| "learning_rate": 1.9483517457776436e-07, |
| "loss": 0.0013, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.9042636672770077, |
| "grad_norm": 0.005356139523384608, |
| "learning_rate": 1.2949737362087156e-07, |
| "loss": 0.0009, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.925189641642689, |
| "grad_norm": 0.006490951580098314, |
| "learning_rate": 7.73912809231292e-08, |
| "loss": 0.0005, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.9461156160083704, |
| "grad_norm": 0.003383501070683301, |
| "learning_rate": 3.858674628278825e-08, |
| "loss": 0.003, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.967041590374052, |
| "grad_norm": 0.012526402652665877, |
| "learning_rate": 1.3135788355934652e-08, |
| "loss": 0.0004, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.9879675647397332, |
| "grad_norm": 0.016656680718419487, |
| "learning_rate": 1.0725249238940916e-09, |
| "loss": 0.0006, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.9963379544860058, |
| "step": 954, |
| "total_flos": 841298254757888.0, |
| "train_loss": 0.001730952451793395, |
| "train_runtime": 49706.0079, |
| "train_samples_per_second": 2.461, |
| "train_steps_per_second": 0.019 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 954, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 841298254757888.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|